Browse Source

Reimplemented SW SPI for DUE in assembler. This allows to reach 12Mhz as SPI Clock and improves 4x the transfer speed to the SD card. This is REQUIRED so access to SD from USB is usable (allows 600Kbytes/second transfer speeds)

etagle 6 years ago
parent
commit
d8a4db72ac
1 changed files with 192 additions and 49 deletions
  1. 192
    49
      Marlin/src/HAL/HAL_DUE/HAL_spi_Due.cpp

+ 192
- 49
Marlin/src/HAL/HAL_DUE/HAL_spi_Due.cpp View File

23
 /**
23
 /**
24
  * Software SPI functions originally from Arduino Sd2Card Library
24
  * Software SPI functions originally from Arduino Sd2Card Library
25
  * Copyright (C) 2009 by William Greiman
25
  * Copyright (C) 2009 by William Greiman
26
+ *
27
+ * Completely rewritten and tuned by Eduardo José Tagle in 2017/2018
28
+ * in ARM thumb2 inline assembler and tuned for maximum speed and performance
29
+ * allowing SPI clocks of up to 12 Mhz to increase SD card read/write performance
26
  */
30
  */
27
 
31
 
28
 /**
32
 /**
53
   // software SPI
57
   // software SPI
54
   // --------------------------------------------------------------------------
58
   // --------------------------------------------------------------------------
55
 
59
 
60
+  // set optimization so ARDUINO optimizes this file
61
+  #pragma GCC optimize (3)
62
+
56
   /* ---------------- Delay Cycles routine -------------- */
63
   /* ---------------- Delay Cycles routine -------------- */
57
 
64
 
58
   /* https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles */
65
   /* https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles */
105
 
112
 
106
   typedef uint8_t (*pfnSpiTransfer) (uint8_t b);
113
   typedef uint8_t (*pfnSpiTransfer) (uint8_t b);
107
 
114
 
108
-  // bitbanging transfer
109
-  #define SWSPI_BIT_XFER(n) \
110
-      WRITE(MOSI_PIN, bout & (1 << n)); \
111
-      WRITE(SCK_PIN, HIGH); /* Sampling point */\
112
-      /* (implicit by overhead) DELAY_NS(63); 5.3 cycles @ 84mhz */ \
113
-      bin |= (READ(MISO_PIN) != 0) << n; \
114
-      WRITE(SCK_PIN, LOW); /* Toggling point*/ \
115
-      /* (implicit by overhead) DELAY_NS(63); 5.3 cycles @ 84mhz */
116
-
117
-  // run at ~8 .. ~10Mhz
118
-  static uint8_t spiTransfer0(uint8_t bout) { // using Mode 0
119
-    volatile uint8_t bin = 0; /* volatile to disable deferred processing */
120
-    SWSPI_BIT_XFER(7);
121
-    SWSPI_BIT_XFER(6);
122
-    SWSPI_BIT_XFER(5);
123
-    SWSPI_BIT_XFER(4);
124
-    SWSPI_BIT_XFER(3);
125
-    SWSPI_BIT_XFER(2);
126
-    SWSPI_BIT_XFER(1);
127
-    SWSPI_BIT_XFER(0);
128
-    return bin;
115
+  /* ---------------- Macros to be able to access definitions from asm */
116
+
117
+  #define _PORT(IO) DIO ##  IO ## _WPORT
118
+  #define _PIN_MASK(IO) MASK(DIO ## IO ## _PIN)
119
+  #define _PIN_SHIFT(IO) DIO ## IO ## _PIN
120
+  #define PORT(IO) _PORT(IO)
121
+  #define PIN_MASK(IO) _PIN_MASK(IO)
122
+  #define PIN_SHIFT(IO) _PIN_SHIFT(IO)
123
+
124
+  // run at ~8 .. ~10Mhz - Tx version (Rx data discarded)
125
+  static uint8_t spiTransferTx0(uint8_t bout) { // using Mode 0
126
+    register uint32_t MOSI_PORT_PLUS30 = ((uint32_t) PORT(MOSI_PIN)) + 0x30;  /* SODR of port */
127
+    register uint32_t MOSI_MASK = PIN_MASK(MOSI_PIN);
128
+    register uint32_t SCK_PORT_PLUS30 = ((uint32_t) PORT(SCK_PIN)) + 0x30;    /* SODR of port */
129
+    register uint32_t SCK_MASK = PIN_MASK(SCK_PIN);
130
+    register uint32_t idx;
131
+
132
+    /* Negate bout, as the assembler requires a negated value */
133
+    bout = ~bout;
134
+
135
+    /* The software SPI routine */
136
+    __asm__ __volatile__(
137
+      ".syntax unified" "\n\t" // is to prevent CM0,CM1 non-unified syntax
138
+
139
+      /* Bit 7 */
140
+      " ubfx %[idx],%[txval],#7,#1" "\n\t"                      /* Place bit 7 in bit 0 of idx*/
141
+
142
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
143
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
144
+      " ubfx %[idx],%[txval],#6,#1" "\n\t"                      /* Place bit 6 in bit 0 of idx*/
145
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
146
+
147
+      /* Bit 6 */
148
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
149
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
150
+      " ubfx %[idx],%[txval],#5,#1" "\n\t"                      /* Place bit 5 in bit 0 of idx*/
151
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
152
+
153
+      /* Bit 5 */
154
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
155
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
156
+      " ubfx %[idx],%[txval],#4,#1" "\n\t"                      /* Place bit 4 in bit 0 of idx*/
157
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
158
+
159
+      /* Bit 4 */
160
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
161
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
162
+      " ubfx %[idx],%[txval],#3,#1" "\n\t"                      /* Place bit 3 in bit 0 of idx*/
163
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
164
+
165
+      /* Bit 3 */
166
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
167
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
168
+      " ubfx %[idx],%[txval],#2,#1" "\n\t"                      /* Place bit 2 in bit 0 of idx*/
169
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
170
+
171
+      /* Bit 2 */
172
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
173
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
174
+      " ubfx %[idx],%[txval],#1,#1" "\n\t"                      /* Place bit 1 in bit 0 of idx*/
175
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
176
+
177
+      /* Bit 1 */
178
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
179
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
180
+      " ubfx %[idx],%[txval],#0,#1" "\n\t"                      /* Place bit 0 in bit 0 of idx*/
181
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
182
+
183
+      /* Bit 0 */
184
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
185
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
186
+      " nop"  "\n\t"
187
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
188
+
189
+      : [mosi_mask]"+r"( MOSI_MASK ),
190
+        [mosi_port]"+r"( MOSI_PORT_PLUS30 ),
191
+        [sck_mask]"+r"( SCK_MASK ),
192
+        [sck_port]"+r"( SCK_PORT_PLUS30 ),
193
+        [idx]"+r"( idx ),
194
+        [txval]"+r"( bout )
195
+      :
196
+      : "cc"
197
+    );
198
+
199
+    return 0;
200
+  }
201
+
202
+  // run at ~8 .. ~10Mhz - Rx version (Tx line not altered)
203
+  static uint8_t spiTransferRx0(uint8_t bout) { // using Mode 0
204
+    int bin = 0, work = 0;
205
+    register uint32_t MISO_PORT_PLUS3C = ((uint32_t) PORT(MISO_PIN)) + 0x3C;  /* PDSR of port */
206
+    register uint32_t SCK_PORT_PLUS30 = ((uint32_t) PORT(SCK_PIN)) + 0x30;    /* SODR of port */
207
+    register uint32_t SCK_MASK = PIN_MASK(SCK_PIN);
208
+    UNUSED(bout);
209
+
210
+    /* The software SPI routine */
211
+    __asm__ __volatile__(
212
+      ".syntax unified" "\n\t" // is to prevent CM0,CM1 non-unified syntax
213
+
214
+      /* bit 7 */
215
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
216
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
217
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
218
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
219
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
220
+
221
+      /* bit 6 */
222
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
223
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
224
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
225
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
226
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
227
+
228
+      /* bit 5 */
229
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
230
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
231
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
232
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
233
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
234
+
235
+      /* bit 4 */
236
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
237
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
238
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
239
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
240
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
241
+
242
+      /* bit 3 */
243
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
244
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
245
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
246
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
247
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
248
+
249
+      /* bit 2 */
250
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
251
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
252
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
253
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
254
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
255
+
256
+      /* bit 1 */
257
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
258
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
259
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
260
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
261
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
262
+
263
+      /* bit 0 */
264
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
265
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
266
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
267
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
268
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
269
+
270
+      : [miso_port]"+r"( MISO_PORT_PLUS3C ),
271
+        [sck_mask]"+r"( SCK_MASK ),
272
+        [sck_port]"+r"( SCK_PORT_PLUS30 ),
273
+        [bin]"+r"(bin),
274
+        [work]"+r"(work)
275
+      : [miso_shift]"M"( PIN_SHIFT(MISO_PIN) + 1 )      /* So we move to the carry */
276
+      : "cc"
277
+    );
278
+
279
+    return (uint8_t)bin;
129
   }
280
   }
130
 
281
 
131
   // run at ~4Mhz
282
   // run at ~4Mhz
133
     int bits = 8;
284
     int bits = 8;
134
     do {
285
     do {
135
       WRITE(MOSI_PIN, b & 0x80);
286
       WRITE(MOSI_PIN, b & 0x80);
136
-      b <<= 1; // little setup time
287
+      b <<= 1;        // little setup time
137
 
288
 
138
       WRITE(SCK_PIN, HIGH);
289
       WRITE(SCK_PIN, HIGH);
139
-      DELAY_NS(125); // 10 cycles @ 84mhz
290
+      DELAY_NS(125);  // 10 cycles @ 84mhz
140
 
291
 
141
       b |= (READ(MISO_PIN) != 0);
292
       b |= (READ(MISO_PIN) != 0);
142
 
293
 
143
       WRITE(SCK_PIN, LOW);
294
       WRITE(SCK_PIN, LOW);
144
-      DELAY_NS(125); // 10 cycles @ 84mhz
295
+      DELAY_NS(125);  // 10 cycles @ 84mhz
145
     } while (--bits);
296
     } while (--bits);
146
     return b;
297
     return b;
147
   }
298
   }
166
     return b;
317
     return b;
167
   }
318
   }
168
 
319
 
169
-  // Use the generic one
170
-  static pfnSpiTransfer spiTransfer = spiTransferX;
320
+  // Pointers to generic functions
321
+  static pfnSpiTransfer spiTransferTx = spiTransferX;
322
+  static pfnSpiTransfer spiTransferRx = spiTransferX;
171
 
323
 
172
   void spiBegin() {
324
   void spiBegin() {
173
     SET_OUTPUT(SS_PIN);
325
     SET_OUTPUT(SS_PIN);
190
   void spiInit(uint8_t spiRate) {
342
   void spiInit(uint8_t spiRate) {
191
     switch (spiRate) {
343
     switch (spiRate) {
192
       case 0:
344
       case 0:
193
-        spiTransfer = spiTransfer0;
345
+        spiTransferTx = spiTransferTx0;
346
+        spiTransferRx = spiTransferRx0;
194
         break;
347
         break;
195
       case 1:
348
       case 1:
196
-        spiTransfer = spiTransfer1;
349
+        spiTransferTx = spiTransfer1;
350
+        spiTransferRx = spiTransfer1;
197
         break;
351
         break;
198
       default:
352
       default:
199
         spiDelayCyclesX4 = (F_CPU/1000000) >> (6 - spiRate);
353
         spiDelayCyclesX4 = (F_CPU/1000000) >> (6 - spiRate);
200
-        spiTransfer = spiTransferX;
354
+        spiTransferTx = spiTransferX;
355
+        spiTransferRx = spiTransferX;
201
         break;
356
         break;
202
     }
357
     }
203
 
358
 
208
 
363
 
209
   uint8_t spiRec() {
364
   uint8_t spiRec() {
210
     WRITE(SS_PIN, LOW);
365
     WRITE(SS_PIN, LOW);
211
-    uint8_t b = spiTransfer(0xff);
366
+    WRITE(MOSI_PIN, 1); /* Output 1s 1*/
367
+    uint8_t b = spiTransferRx(0xFF);
212
     WRITE(SS_PIN, HIGH);
368
     WRITE(SS_PIN, HIGH);
213
     return b;
369
     return b;
214
   }
370
   }
215
 
371
 
216
-  void spiRead(uint8_t*buf, uint16_t nbyte) {
372
+  void spiRead(uint8_t* buf, uint16_t nbyte) {
217
     if (nbyte == 0) return;
373
     if (nbyte == 0) return;
218
     WRITE(SS_PIN, LOW);
374
     WRITE(SS_PIN, LOW);
375
+    WRITE(MOSI_PIN, 1); /* Output 1s 1*/
219
     for (int i = 0; i < nbyte; i++) {
376
     for (int i = 0; i < nbyte; i++) {
220
-      buf[i] = spiTransfer(0xff);
377
+      buf[i] = spiTransferRx(0xff);
221
     }
378
     }
222
     WRITE(SS_PIN, HIGH);
379
     WRITE(SS_PIN, HIGH);
223
   }
380
   }
224
 
381
 
225
   void spiSend(uint8_t b) {
382
   void spiSend(uint8_t b) {
226
     WRITE(SS_PIN, LOW);
383
     WRITE(SS_PIN, LOW);
227
-    uint8_t response = spiTransfer(b);
228
-    UNUSED(response);
229
-    WRITE(SS_PIN, HIGH);
230
-  }
231
-
232
-  static void spiSend(const uint8_t* buf, size_t n) {
233
-    uint8_t response;
234
-    if (n == 0) return;
235
-    WRITE(SS_PIN, LOW);
236
-    for (uint16_t i = 0; i < n; i++) {
237
-      response = spiTransfer(buf[i]);
238
-    }
239
-    UNUSED(response);
384
+    (void) spiTransferTx(b);
240
     WRITE(SS_PIN, HIGH);
385
     WRITE(SS_PIN, HIGH);
241
   }
386
   }
242
 
387
 
243
   void spiSendBlock(uint8_t token, const uint8_t* buf) {
388
   void spiSendBlock(uint8_t token, const uint8_t* buf) {
244
-    uint8_t response;
245
 
389
 
246
     WRITE(SS_PIN, LOW);
390
     WRITE(SS_PIN, LOW);
247
-    response = spiTransfer(token);
391
+    (void) spiTransferTx(token);
248
 
392
 
249
     for (uint16_t i = 0; i < 512; i++) {
393
     for (uint16_t i = 0; i < 512; i++) {
250
-      response = spiTransfer(buf[i]);
394
+      (void) spiTransferTx(buf[i]);
251
     }
395
     }
252
-    UNUSED(response);
253
     WRITE(SS_PIN, HIGH);
396
     WRITE(SS_PIN, HIGH);
254
   }
397
   }
255
 
398
 

Loading…
Cancel
Save