Browse Source

Reimplemented SW SPI for DUE in assembler. This allows to reach 12Mhz as SPI Clock and improves 4x the transfer speed to the SD card. This is REQUIRED so access to SD from USB is usable (allows 600Kbytes/second transfer speeds)

etagle 6 years ago
parent
commit
d8a4db72ac
1 changed files with 192 additions and 49 deletions
  1. 192
    49
      Marlin/src/HAL/HAL_DUE/HAL_spi_Due.cpp

+ 192
- 49
Marlin/src/HAL/HAL_DUE/HAL_spi_Due.cpp View File

@@ -23,6 +23,10 @@
23 23
 /**
24 24
  * Software SPI functions originally from Arduino Sd2Card Library
25 25
  * Copyright (C) 2009 by William Greiman
26
+ *
27
+ * Completely rewritten and tuned by Eduardo José Tagle in 2017/2018
28
+ * in ARM thumb2 inline assembler and tuned for maximum speed and performance
29
+ * allowing SPI clocks of up to 12 Mhz to increase SD card read/write performance
26 30
  */
27 31
 
28 32
 /**
@@ -53,6 +57,9 @@
53 57
   // software SPI
54 58
   // --------------------------------------------------------------------------
55 59
 
60
+  // set optimization so ARDUINO optimizes this file
61
+  #pragma GCC optimize (3)
62
+
56 63
   /* ---------------- Delay Cycles routine -------------- */
57 64
 
58 65
   /* https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles */
@@ -105,27 +112,171 @@
105 112
 
106 113
   typedef uint8_t (*pfnSpiTransfer) (uint8_t b);
107 114
 
108
-  // bitbanging transfer
109
-  #define SWSPI_BIT_XFER(n) \
110
-      WRITE(MOSI_PIN, bout & (1 << n)); \
111
-      WRITE(SCK_PIN, HIGH); /* Sampling point */\
112
-      /* (implicit by overhead) DELAY_NS(63); 5.3 cycles @ 84mhz */ \
113
-      bin |= (READ(MISO_PIN) != 0) << n; \
114
-      WRITE(SCK_PIN, LOW); /* Toggling point*/ \
115
-      /* (implicit by overhead) DELAY_NS(63); 5.3 cycles @ 84mhz */
116
-
117
-  // run at ~8 .. ~10Mhz
118
-  static uint8_t spiTransfer0(uint8_t bout) { // using Mode 0
119
-    volatile uint8_t bin = 0; /* volatile to disable deferred processing */
120
-    SWSPI_BIT_XFER(7);
121
-    SWSPI_BIT_XFER(6);
122
-    SWSPI_BIT_XFER(5);
123
-    SWSPI_BIT_XFER(4);
124
-    SWSPI_BIT_XFER(3);
125
-    SWSPI_BIT_XFER(2);
126
-    SWSPI_BIT_XFER(1);
127
-    SWSPI_BIT_XFER(0);
128
-    return bin;
115
+  /* ---------------- Macros to be able to access definitions from asm */
116
+
117
+  #define _PORT(IO) DIO ##  IO ## _WPORT
118
+  #define _PIN_MASK(IO) MASK(DIO ## IO ## _PIN)
119
+  #define _PIN_SHIFT(IO) DIO ## IO ## _PIN
120
+  #define PORT(IO) _PORT(IO)
121
+  #define PIN_MASK(IO) _PIN_MASK(IO)
122
+  #define PIN_SHIFT(IO) _PIN_SHIFT(IO)
123
+
124
+  // run at ~8 .. ~10Mhz - Tx version (Rx data discarded)
125
+  static uint8_t spiTransferTx0(uint8_t bout) { // using Mode 0
126
+    register uint32_t MOSI_PORT_PLUS30 = ((uint32_t) PORT(MOSI_PIN)) + 0x30;  /* SODR of port */
127
+    register uint32_t MOSI_MASK = PIN_MASK(MOSI_PIN);
128
+    register uint32_t SCK_PORT_PLUS30 = ((uint32_t) PORT(SCK_PIN)) + 0x30;    /* SODR of port */
129
+    register uint32_t SCK_MASK = PIN_MASK(SCK_PIN);
130
+    register uint32_t idx;
131
+
132
+    /* Negate bout, as the assembler requires a negated value */
133
+    bout = ~bout;
134
+
135
+    /* The software SPI routine */
136
+    __asm__ __volatile__(
137
+      ".syntax unified" "\n\t" // is to prevent CM0,CM1 non-unified syntax
138
+
139
+      /* Bit 7 */
140
+      " ubfx %[idx],%[txval],#7,#1" "\n\t"                      /* Place bit 7 in bit 0 of idx*/
141
+
142
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
143
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
144
+      " ubfx %[idx],%[txval],#6,#1" "\n\t"                      /* Place bit 6 in bit 0 of idx*/
145
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
146
+
147
+      /* Bit 6 */
148
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
149
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
150
+      " ubfx %[idx],%[txval],#5,#1" "\n\t"                      /* Place bit 5 in bit 0 of idx*/
151
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
152
+
153
+      /* Bit 5 */
154
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
155
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
156
+      " ubfx %[idx],%[txval],#4,#1" "\n\t"                      /* Place bit 4 in bit 0 of idx*/
157
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
158
+
159
+      /* Bit 4 */
160
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
161
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
162
+      " ubfx %[idx],%[txval],#3,#1" "\n\t"                      /* Place bit 3 in bit 0 of idx*/
163
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
164
+
165
+      /* Bit 3 */
166
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
167
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
168
+      " ubfx %[idx],%[txval],#2,#1" "\n\t"                      /* Place bit 2 in bit 0 of idx*/
169
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
170
+
171
+      /* Bit 2 */
172
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
173
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
174
+      " ubfx %[idx],%[txval],#1,#1" "\n\t"                      /* Place bit 1 in bit 0 of idx*/
175
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
176
+
177
+      /* Bit 1 */
178
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
179
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
180
+      " ubfx %[idx],%[txval],#0,#1" "\n\t"                      /* Place bit 0 in bit 0 of idx*/
181
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
182
+
183
+      /* Bit 0 */
184
+      " str %[mosi_mask],[%[mosi_port], %[idx],LSL #2]" "\n\t"  /* Access the proper SODR or CODR registers based on that bit */
185
+      " str %[sck_mask],[%[sck_port]]" "\n\t"                   /* SODR */
186
+      " nop"  "\n\t"
187
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"              /* CODR */
188
+
189
+      : [mosi_mask]"+r"( MOSI_MASK ),
190
+        [mosi_port]"+r"( MOSI_PORT_PLUS30 ),
191
+        [sck_mask]"+r"( SCK_MASK ),
192
+        [sck_port]"+r"( SCK_PORT_PLUS30 ),
193
+        [idx]"+r"( idx ),
194
+        [txval]"+r"( bout )
195
+      :
196
+      : "cc"
197
+    );
198
+
199
+    return 0;
200
+  }
201
+
202
+  // run at ~8 .. ~10Mhz - Rx version (Tx line not altered)
203
+  static uint8_t spiTransferRx0(uint8_t bout) { // using Mode 0
204
+    int bin = 0, work = 0;
205
+    register uint32_t MISO_PORT_PLUS3C = ((uint32_t) PORT(MISO_PIN)) + 0x3C;  /* PDSR of port */
206
+    register uint32_t SCK_PORT_PLUS30 = ((uint32_t) PORT(SCK_PIN)) + 0x30;    /* SODR of port */
207
+    register uint32_t SCK_MASK = PIN_MASK(SCK_PIN);
208
+    UNUSED(bout);
209
+
210
+    /* The software SPI routine */
211
+    __asm__ __volatile__(
212
+      ".syntax unified" "\n\t" // is to prevent CM0,CM1 non-unified syntax
213
+
214
+      /* bit 7 */
215
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
216
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
217
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
218
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
219
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
220
+
221
+      /* bit 6 */
222
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
223
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
224
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
225
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
226
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
227
+
228
+      /* bit 5 */
229
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
230
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
231
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
232
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
233
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
234
+
235
+      /* bit 4 */
236
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
237
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
238
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
239
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
240
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
241
+
242
+      /* bit 3 */
243
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
244
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
245
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
246
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
247
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
248
+
249
+      /* bit 2 */
250
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
251
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
252
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
253
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
254
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
255
+
256
+      /* bit 1 */
257
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
258
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
259
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
260
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
261
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
262
+
263
+      /* bit 0 */
264
+      " str %[sck_mask],[%[sck_port]]" "\n\t"           /* SODR */
265
+      " ldr %[work],[%[miso_port]]" "\n\t"              /* PDSR */
266
+      " str %[sck_mask],[%[sck_port],#0x4]" "\n\t"      /* CODR */
267
+      " lsrs %[work],%[work],%[miso_shift]" "\n\t"      /* Isolate input into carry */
268
+      " adc %[bin],%[bin],%[bin]" "\n\t"                /* Shift left result and add the carry */
269
+
270
+      : [miso_port]"+r"( MISO_PORT_PLUS3C ),
271
+        [sck_mask]"+r"( SCK_MASK ),
272
+        [sck_port]"+r"( SCK_PORT_PLUS30 ),
273
+        [bin]"+r"(bin),
274
+        [work]"+r"(work)
275
+      : [miso_shift]"M"( PIN_SHIFT(MISO_PIN) + 1 )      /* So we move to the carry */
276
+      : "cc"
277
+    );
278
+
279
+    return (uint8_t)bin;
129 280
   }
130 281
 
131 282
   // run at ~4Mhz
@@ -133,15 +284,15 @@
133 284
     int bits = 8;
134 285
     do {
135 286
       WRITE(MOSI_PIN, b & 0x80);
136
-      b <<= 1; // little setup time
287
+      b <<= 1;        // little setup time
137 288
 
138 289
       WRITE(SCK_PIN, HIGH);
139
-      DELAY_NS(125); // 10 cycles @ 84mhz
290
+      DELAY_NS(125);  // 10 cycles @ 84mhz
140 291
 
141 292
       b |= (READ(MISO_PIN) != 0);
142 293
 
143 294
       WRITE(SCK_PIN, LOW);
144
-      DELAY_NS(125); // 10 cycles @ 84mhz
295
+      DELAY_NS(125);  // 10 cycles @ 84mhz
145 296
     } while (--bits);
146 297
     return b;
147 298
   }
@@ -166,8 +317,9 @@
166 317
     return b;
167 318
   }
168 319
 
169
-  // Use the generic one
170
-  static pfnSpiTransfer spiTransfer = spiTransferX;
320
+  // Pointers to generic functions
321
+  static pfnSpiTransfer spiTransferTx = spiTransferX;
322
+  static pfnSpiTransfer spiTransferRx = spiTransferX;
171 323
 
172 324
   void spiBegin() {
173 325
     SET_OUTPUT(SS_PIN);
@@ -190,14 +342,17 @@
190 342
   void spiInit(uint8_t spiRate) {
191 343
     switch (spiRate) {
192 344
       case 0:
193
-        spiTransfer = spiTransfer0;
345
+        spiTransferTx = spiTransferTx0;
346
+        spiTransferRx = spiTransferRx0;
194 347
         break;
195 348
       case 1:
196
-        spiTransfer = spiTransfer1;
349
+        spiTransferTx = spiTransfer1;
350
+        spiTransferRx = spiTransfer1;
197 351
         break;
198 352
       default:
199 353
         spiDelayCyclesX4 = (F_CPU/1000000) >> (6 - spiRate);
200
-        spiTransfer = spiTransferX;
354
+        spiTransferTx = spiTransferX;
355
+        spiTransferRx = spiTransferX;
201 356
         break;
202 357
     }
203 358
 
@@ -208,48 +363,36 @@
208 363
 
209 364
   uint8_t spiRec() {
210 365
     WRITE(SS_PIN, LOW);
211
-    uint8_t b = spiTransfer(0xff);
366
+    WRITE(MOSI_PIN, 1); /* Output 1s 1*/
367
+    uint8_t b = spiTransferRx(0xFF);
212 368
     WRITE(SS_PIN, HIGH);
213 369
     return b;
214 370
   }
215 371
 
216
-  void spiRead(uint8_t*buf, uint16_t nbyte) {
372
+  void spiRead(uint8_t* buf, uint16_t nbyte) {
217 373
     if (nbyte == 0) return;
218 374
     WRITE(SS_PIN, LOW);
375
+    WRITE(MOSI_PIN, 1); /* Output 1s 1*/
219 376
     for (int i = 0; i < nbyte; i++) {
220
-      buf[i] = spiTransfer(0xff);
377
+      buf[i] = spiTransferRx(0xff);
221 378
     }
222 379
     WRITE(SS_PIN, HIGH);
223 380
   }
224 381
 
225 382
   void spiSend(uint8_t b) {
226 383
     WRITE(SS_PIN, LOW);
227
-    uint8_t response = spiTransfer(b);
228
-    UNUSED(response);
229
-    WRITE(SS_PIN, HIGH);
230
-  }
231
-
232
-  static void spiSend(const uint8_t* buf, size_t n) {
233
-    uint8_t response;
234
-    if (n == 0) return;
235
-    WRITE(SS_PIN, LOW);
236
-    for (uint16_t i = 0; i < n; i++) {
237
-      response = spiTransfer(buf[i]);
238
-    }
239
-    UNUSED(response);
384
+    (void) spiTransferTx(b);
240 385
     WRITE(SS_PIN, HIGH);
241 386
   }
242 387
 
243 388
   void spiSendBlock(uint8_t token, const uint8_t* buf) {
244
-    uint8_t response;
245 389
 
246 390
     WRITE(SS_PIN, LOW);
247
-    response = spiTransfer(token);
391
+    (void) spiTransferTx(token);
248 392
 
249 393
     for (uint16_t i = 0; i < 512; i++) {
250
-      response = spiTransfer(buf[i]);
394
+      (void) spiTransferTx(buf[i]);
251 395
     }
252
-    UNUSED(response);
253 396
     WRITE(SS_PIN, HIGH);
254 397
   }
255 398
 

Loading…
Cancel
Save