Explorar el Código

Convert custom maths to inlines (#10728)

Scott Lahteine hace 6 años
padre
commit
883b0c9880
No account linked to committer's email address

+ 76
- 73
Marlin/src/HAL/HAL_AVR/math_AVR.h Ver fichero

@@ -23,92 +23,95 @@
23 23
 #ifndef _MATH_AVR_H_
24 24
 #define _MATH_AVR_H_
25 25
 
26
-#define a(CODE) " " CODE "\n\t"
27
-
28 26
 /**
29 27
  * Optimized math functions for AVR
30 28
  */
31 29
 
32 30
 // intRes = longIn1 * longIn2 >> 24
33 31
 // uses:
34
-// r26 to store 0
35
-// r27 to store bits 16-23 of the 48bit result. The top bit is used to round the two byte result.
32
+// A[tmp] to store 0
33
+// B[tmp] to store bits 16-23 of the 48bit result. The top bit is used to round the two byte result.
36 34
 // note that the lower two bytes and the upper byte of the 48bit result are not calculated.
37 35
 // this can cause the result to be out by one as the lower bytes may cause carries into the upper ones.
38
-// B0 A0 are bits 24-39 and are the returned value
39
-// C1 B1 A1 is longIn1
40
-// D2 C2 B2 A2 is longIn2
36
+// B A are bits 24-39 and are the returned value
37
+// C B A is longIn1
38
+// D C B A is longIn2
41 39
 //
42
-#define MultiU24X32toH16(intRes, longIn1, longIn2) \
43
-  asm volatile ( \
44
-                 A("clr r26")      \
45
-                 A("mul %A1, %B2") \
46
-                 A("mov r27, r1")  \
47
-                 A("mul %B1, %C2") \
48
-                 A("movw %A0, r0") \
49
-                 A("mul %C1, %C2") \
50
-                 A("add %B0, r0")  \
51
-                 A("mul %C1, %B2") \
52
-                 A("add %A0, r0")  \
53
-                 A("adc %B0, r1")  \
54
-                 A("mul %A1, %C2") \
55
-                 A("add r27, r0")  \
56
-                 A("adc %A0, r1")  \
57
-                 A("adc %B0, r26") \
58
-                 A("mul %B1, %B2") \
59
-                 A("add r27, r0")  \
60
-                 A("adc %A0, r1")  \
61
-                 A("adc %B0, r26") \
62
-                 A("mul %C1, %A2") \
63
-                 A("add r27, r0")  \
64
-                 A("adc %A0, r1")  \
65
-                 A("adc %B0, r26") \
66
-                 A("mul %B1, %A2") \
67
-                 A("add r27, r1")  \
68
-                 A("adc %A0, r26") \
69
-                 A("adc %B0, r26") \
70
-                 A("lsr r27")      \
71
-                 A("adc %A0, r26") \
72
-                 A("adc %B0, r26") \
73
-                 A("mul %D2, %A1") \
74
-                 A("add %A0, r0")  \
75
-                 A("adc %B0, r1")  \
76
-                 A("mul %D2, %B1") \
77
-                 A("add %B0, r0")  \
78
-                 A("clr r1")       \
79
-                 : \
80
-                 "=&r" (intRes) \
81
-                 : \
82
-                 "d" (longIn1), \
83
-                 "d" (longIn2) \
84
-                 : \
85
-                 "r26" , "r27" \
86
-               )
40
+static FORCE_INLINE uint16_t MultiU24X32toH16(uint32_t longIn1, uint32_t longIn2) {
41
+  register uint8_t tmp1;
42
+  register uint8_t tmp2;
43
+  register uint16_t intRes;
44
+  __asm__ __volatile__(
45
+    A("clr %[tmp1]")
46
+    A("mul %A[longIn1], %B[longIn2]")
47
+    A("mov %[tmp2], r1")
48
+    A("mul %B[longIn1], %C[longIn2]")
49
+    A("movw %A[intRes], r0")
50
+    A("mul %C[longIn1], %C[longIn2]")
51
+    A("add %B[intRes], r0")
52
+    A("mul %C[longIn1], %B[longIn2]")
53
+    A("add %A[intRes], r0")
54
+    A("adc %B[intRes], r1")
55
+    A("mul %A[longIn1], %C[longIn2]")
56
+    A("add %[tmp2], r0")
57
+    A("adc %A[intRes], r1")
58
+    A("adc %B[intRes], %[tmp1]")
59
+    A("mul %B[longIn1], %B[longIn2]")
60
+    A("add %[tmp2], r0")
61
+    A("adc %A[intRes], r1")
62
+    A("adc %B[intRes], %[tmp1]")
63
+    A("mul %C[longIn1], %A[longIn2]")
64
+    A("add %[tmp2], r0")
65
+    A("adc %A[intRes], r1")
66
+    A("adc %B[intRes], %[tmp1]")
67
+    A("mul %B[longIn1], %A[longIn2]")
68
+    A("add %[tmp2], r1")
69
+    A("adc %A[intRes], %[tmp1]")
70
+    A("adc %B[intRes], %[tmp1]")
71
+    A("lsr %[tmp2]")
72
+    A("adc %A[intRes], %[tmp1]")
73
+    A("adc %B[intRes], %[tmp1]")
74
+    A("mul %D[longIn2], %A[longIn1]")
75
+    A("add %A[intRes], r0")
76
+    A("adc %B[intRes], r1")
77
+    A("mul %D[longIn2], %B[longIn1]")
78
+    A("add %B[intRes], r0")
79
+    A("clr r1")
80
+      : [intRes] "=&r" (intRes),
81
+        [tmp1] "=&r" (tmp1),
82
+        [tmp2] "=&r" (tmp2)
83
+      : [longIn1] "d" (longIn1),
84
+        [longIn2] "d" (longIn2)
85
+      : "cc"
86
+  );
87
+  return intRes;
88
+}
87 89
 
88 90
 // intRes = intIn1 * intIn2 >> 16
89 91
 // uses:
90 92
 // r26 to store 0
91 93
 // r27 to store the byte 1 of the 24 bit result
92
-#define MultiU16X8toH16(intRes, charIn1, intIn2) \
93
-  asm volatile ( \
94
-                 A("clr r26")      \
95
-                 A("mul %A1, %B2") \
96
-                 A("movw %A0, r0") \
97
-                 A("mul %A1, %A2") \
98
-                 A("add %A0, r1")  \
99
-                 A("adc %B0, r26") \
100
-                 A("lsr r0")       \
101
-                 A("adc %A0, r26") \
102
-                 A("adc %B0, r26") \
103
-                 A("clr r1")       \
104
-                 : \
105
-                 "=&r" (intRes) \
106
-                 : \
107
-                 "d" (charIn1), \
108
-                 "d" (intIn2) \
109
-                 : \
110
-                 "r26" \
111
-               )
112
-
94
+static FORCE_INLINE uint16_t MultiU16X8toH16(uint8_t charIn1, uint16_t intIn2) {
95
+  register uint8_t tmp;
96
+  register uint16_t intRes;
97
+  __asm__ __volatile__ (
98
+    A("clr %[tmp]")
99
+    A("mul %[charIn1], %B[intIn2]")
100
+    A("movw %A[intRes], r0")
101
+    A("mul %[charIn1], %A[intIn2]")
102
+    A("add %A[intRes], r1")
103
+    A("adc %B[intRes], %[tmp]")
104
+    A("lsr r0")
105
+    A("adc %A[intRes], %[tmp]")
106
+    A("adc %B[intRes], %[tmp]")
107
+    A("clr r1")
108
+      : [intRes] "=&r" (intRes),
109
+        [tmp] "=&r" (tmp)
110
+      : [charIn1] "d" (charIn1),
111
+        [intIn2] "d" (intIn2)
112
+      : "cc"
113
+  );
114
+  return intRes;
115
+}
113 116
 
114 117
 #endif // _MATH_AVR_H_

+ 5
- 3
Marlin/src/HAL/math_32bit.h Ver fichero

@@ -23,11 +23,13 @@
23 23
 #ifndef MATH_32BIT_H
24 24
 #define MATH_32BIT_H
25 25
 
26
+#include "../core/macros.h"
27
+
26 28
 /**
27 29
  * Math helper functions for 32 bit CPUs
28 30
  */
29
-
30
-#define MultiU32X32toH32(intRes, longIn1, longIn2) intRes = ((uint64_t)longIn1 * longIn2 + 0x80000000) >> 32
31
-#define MultiU32X24toH32(intRes, longIn1, longIn2) intRes = ((uint64_t)longIn1 * longIn2 + 0x00800000) >> 24
31
+static FORCE_INLINE uint32_t MultiU32X24toH32(uint32_t longIn1, uint32_t longIn2) {
32
+  return ((uint64_t)longIn1 * longIn2 + 0x00800000) >> 24;
33
+}
32 34
 
33 35
 #endif // MATH_32BIT_H

+ 9
- 14
Marlin/src/module/stepper.cpp Ver fichero

@@ -1158,6 +1158,12 @@ HAL_STEP_TIMER_ISR {
1158 1158
   HAL_timer_isr_epilogue(STEP_TIMER_NUM);
1159 1159
 }
1160 1160
 
1161
+#ifdef CPU_32_BIT
1162
+  #define STEP_MULTIPLY(A,B) MultiU32X24toH32(A, B);
1163
+#else
1164
+  #define STEP_MULTIPLY(A,B) MultiU24X32toH16(A, B);
1165
+#endif
1166
+
1161 1167
 void Stepper::isr() {
1162 1168
 
1163 1169
   #define ENDSTOP_NOMINAL_OCR_VAL 1500 * HAL_TICKS_PER_US // Check endstops every 1.5ms to guarantee two stepper ISRs within 5ms for BLTouch
@@ -1525,14 +1531,7 @@ void Stepper::isr() {
1525 1531
           ? _eval_bezier_curve(acceleration_time)
1526 1532
           : current_block->cruise_rate;
1527 1533
     #else
1528
-      #ifdef CPU_32_BIT
1529
-        MultiU32X24toH32(acc_step_rate, acceleration_time, current_block->acceleration_rate);
1530
-      #else
1531
-        MultiU24X32toH16(acc_step_rate, acceleration_time, current_block->acceleration_rate);
1532
-      #endif
1533
-      acc_step_rate += current_block->initial_rate;
1534
-
1535
-      // upper limit
1534
+      acc_step_rate = STEP_MULTIPLY(acceleration_time, current_block->acceleration_rate) + current_block->initial_rate;
1536 1535
       NOMORE(acc_step_rate, current_block->nominal_rate);
1537 1536
     #endif
1538 1537
 
@@ -1576,18 +1575,14 @@ void Stepper::isr() {
1576 1575
     #else
1577 1576
 
1578 1577
       // Using the old trapezoidal control
1579
-      #ifdef CPU_32_BIT
1580
-        MultiU32X24toH32(step_rate, deceleration_time, current_block->acceleration_rate);
1581
-      #else
1582
-        MultiU24X32toH16(step_rate, deceleration_time, current_block->acceleration_rate);
1583
-      #endif
1584
-
1578
+      step_rate = STEP_MULTIPLY(deceleration_time, current_block->acceleration_rate);
1585 1579
       if (step_rate < acc_step_rate) { // Still decelerating?
1586 1580
         step_rate = acc_step_rate - step_rate;
1587 1581
         NOLESS(step_rate, current_block->final_rate);
1588 1582
       }
1589 1583
       else
1590 1584
         step_rate = current_block->final_rate;
1585
+
1591 1586
     #endif
1592 1587
 
1593 1588
     // step_rate to timer interval

+ 10
- 10
Marlin/src/module/stepper.h Ver fichero

@@ -340,24 +340,24 @@ class Stepper {
340 340
 
341 341
       #ifdef CPU_32_BIT
342 342
         // In case of high-performance processor, it is able to calculate in real-time
343
-        const uint32_t MIN_TIME_PER_STEP = (HAL_STEPPER_TIMER_RATE) / ((STEP_DOUBLER_FREQUENCY) * 2);
343
+        const uint32_t min_time_per_step = (HAL_STEPPER_TIMER_RATE) / ((STEP_DOUBLER_FREQUENCY) * 2);
344 344
         timer = uint32_t(HAL_STEPPER_TIMER_RATE) / step_rate;
345
-        NOLESS(timer, MIN_TIME_PER_STEP); // (STEP_DOUBLER_FREQUENCY * 2 kHz - this should never happen)
345
+        NOLESS(timer, min_time_per_step); // (STEP_DOUBLER_FREQUENCY * 2 kHz - this should never happen)
346 346
       #else
347 347
         NOLESS(step_rate, F_CPU / 500000);
348 348
         step_rate -= F_CPU / 500000; // Correct for minimal speed
349 349
         if (step_rate >= (8 * 256)) { // higher step rate
350
-          unsigned short table_address = (unsigned short)&speed_lookuptable_fast[(unsigned char)(step_rate >> 8)][0];
351
-          unsigned char tmp_step_rate = (step_rate & 0x00FF);
352
-          unsigned short gain = (unsigned short)pgm_read_word_near(table_address + 2);
353
-          MultiU16X8toH16(timer, tmp_step_rate, gain);
354
-          timer = (unsigned short)pgm_read_word_near(table_address) - timer;
350
+          uint8_t tmp_step_rate = (step_rate & 0x00FF);
351
+          uint16_t table_address = (uint16_t)&speed_lookuptable_fast[(uint8_t)(step_rate >> 8)][0];
352
+          uint16_t gain = (uint16_t)pgm_read_word_near(table_address + 2);
353
+          timer = MultiU16X8toH16(tmp_step_rate, gain);
354
+          timer = (uint16_t)pgm_read_word_near(table_address) - timer;
355 355
         }
356 356
         else { // lower step rates
357
-          unsigned short table_address = (unsigned short)&speed_lookuptable_slow[0][0];
357
+          uint16_t table_address = (uint16_t)&speed_lookuptable_slow[0][0];
358 358
           table_address += ((step_rate) >> 1) & 0xFFFC;
359
-          timer = (unsigned short)pgm_read_word_near(table_address);
360
-          timer -= (((unsigned short)pgm_read_word_near(table_address + 2) * (unsigned char)(step_rate & 0x0007)) >> 3);
359
+          timer = (uint16_t)pgm_read_word_near(table_address);
360
+          timer -= (((uint16_t)pgm_read_word_near(table_address + 2) * (uint8_t)(step_rate & 0x0007)) >> 3);
361 361
         }
362 362
         if (timer < 100) { // (20kHz - this should never happen)
363 363
           timer = 100;

Loading…
Cancelar
Guardar