|
@@ -23,92 +23,95 @@
|
23
|
23
|
#ifndef _MATH_AVR_H_
|
24
|
24
|
#define _MATH_AVR_H_
|
25
|
25
|
|
26
|
|
-#define a(CODE) " " CODE "\n\t"
|
27
|
|
-
|
28
|
26
|
/**
|
29
|
27
|
* Optimized math functions for AVR
|
30
|
28
|
*/
|
31
|
29
|
|
32
|
30
|
// intRes = longIn1 * longIn2 >> 24
|
33
|
31
|
// uses:
|
34
|
|
-// r26 to store 0
|
35
|
|
-// r27 to store bits 16-23 of the 48bit result. The top bit is used to round the two byte result.
|
|
32
|
+// A[tmp] to store 0
|
|
33
|
+// B[tmp] to store bits 16-23 of the 48bit result. The top bit is used to round the two byte result.
|
36
|
34
|
// note that the lower two bytes and the upper byte of the 48bit result are not calculated.
|
37
|
35
|
// this can cause the result to be out by one as the lower bytes may cause carries into the upper ones.
|
38
|
|
-// B0 A0 are bits 24-39 and are the returned value
|
39
|
|
-// C1 B1 A1 is longIn1
|
40
|
|
-// D2 C2 B2 A2 is longIn2
|
|
36
|
+// B A are bits 24-39 and are the returned value
|
|
37
|
+// C B A is longIn1
|
|
38
|
+// D C B A is longIn2
|
41
|
39
|
//
|
42
|
|
-#define MultiU24X32toH16(intRes, longIn1, longIn2) \
|
43
|
|
- asm volatile ( \
|
44
|
|
- A("clr r26") \
|
45
|
|
- A("mul %A1, %B2") \
|
46
|
|
- A("mov r27, r1") \
|
47
|
|
- A("mul %B1, %C2") \
|
48
|
|
- A("movw %A0, r0") \
|
49
|
|
- A("mul %C1, %C2") \
|
50
|
|
- A("add %B0, r0") \
|
51
|
|
- A("mul %C1, %B2") \
|
52
|
|
- A("add %A0, r0") \
|
53
|
|
- A("adc %B0, r1") \
|
54
|
|
- A("mul %A1, %C2") \
|
55
|
|
- A("add r27, r0") \
|
56
|
|
- A("adc %A0, r1") \
|
57
|
|
- A("adc %B0, r26") \
|
58
|
|
- A("mul %B1, %B2") \
|
59
|
|
- A("add r27, r0") \
|
60
|
|
- A("adc %A0, r1") \
|
61
|
|
- A("adc %B0, r26") \
|
62
|
|
- A("mul %C1, %A2") \
|
63
|
|
- A("add r27, r0") \
|
64
|
|
- A("adc %A0, r1") \
|
65
|
|
- A("adc %B0, r26") \
|
66
|
|
- A("mul %B1, %A2") \
|
67
|
|
- A("add r27, r1") \
|
68
|
|
- A("adc %A0, r26") \
|
69
|
|
- A("adc %B0, r26") \
|
70
|
|
- A("lsr r27") \
|
71
|
|
- A("adc %A0, r26") \
|
72
|
|
- A("adc %B0, r26") \
|
73
|
|
- A("mul %D2, %A1") \
|
74
|
|
- A("add %A0, r0") \
|
75
|
|
- A("adc %B0, r1") \
|
76
|
|
- A("mul %D2, %B1") \
|
77
|
|
- A("add %B0, r0") \
|
78
|
|
- A("clr r1") \
|
79
|
|
- : \
|
80
|
|
- "=&r" (intRes) \
|
81
|
|
- : \
|
82
|
|
- "d" (longIn1), \
|
83
|
|
- "d" (longIn2) \
|
84
|
|
- : \
|
85
|
|
- "r26" , "r27" \
|
86
|
|
- )
|
|
40
|
+static FORCE_INLINE uint16_t MultiU24X32toH16(uint32_t longIn1, uint32_t longIn2) {
|
|
41
|
+ register uint8_t tmp1;
|
|
42
|
+ register uint8_t tmp2;
|
|
43
|
+ register uint16_t intRes;
|
|
44
|
+ __asm__ __volatile__(
|
|
45
|
+ A("clr %[tmp1]")
|
|
46
|
+ A("mul %A[longIn1], %B[longIn2]")
|
|
47
|
+ A("mov %[tmp2], r1")
|
|
48
|
+ A("mul %B[longIn1], %C[longIn2]")
|
|
49
|
+ A("movw %A[intRes], r0")
|
|
50
|
+ A("mul %C[longIn1], %C[longIn2]")
|
|
51
|
+ A("add %B[intRes], r0")
|
|
52
|
+ A("mul %C[longIn1], %B[longIn2]")
|
|
53
|
+ A("add %A[intRes], r0")
|
|
54
|
+ A("adc %B[intRes], r1")
|
|
55
|
+ A("mul %A[longIn1], %C[longIn2]")
|
|
56
|
+ A("add %[tmp2], r0")
|
|
57
|
+ A("adc %A[intRes], r1")
|
|
58
|
+ A("adc %B[intRes], %[tmp1]")
|
|
59
|
+ A("mul %B[longIn1], %B[longIn2]")
|
|
60
|
+ A("add %[tmp2], r0")
|
|
61
|
+ A("adc %A[intRes], r1")
|
|
62
|
+ A("adc %B[intRes], %[tmp1]")
|
|
63
|
+ A("mul %C[longIn1], %A[longIn2]")
|
|
64
|
+ A("add %[tmp2], r0")
|
|
65
|
+ A("adc %A[intRes], r1")
|
|
66
|
+ A("adc %B[intRes], %[tmp1]")
|
|
67
|
+ A("mul %B[longIn1], %A[longIn2]")
|
|
68
|
+ A("add %[tmp2], r1")
|
|
69
|
+ A("adc %A[intRes], %[tmp1]")
|
|
70
|
+ A("adc %B[intRes], %[tmp1]")
|
|
71
|
+ A("lsr %[tmp2]")
|
|
72
|
+ A("adc %A[intRes], %[tmp1]")
|
|
73
|
+ A("adc %B[intRes], %[tmp1]")
|
|
74
|
+ A("mul %D[longIn2], %A[longIn1]")
|
|
75
|
+ A("add %A[intRes], r0")
|
|
76
|
+ A("adc %B[intRes], r1")
|
|
77
|
+ A("mul %D[longIn2], %B[longIn1]")
|
|
78
|
+ A("add %B[intRes], r0")
|
|
79
|
+ A("clr r1")
|
|
80
|
+ : [intRes] "=&r" (intRes),
|
|
81
|
+ [tmp1] "=&r" (tmp1),
|
|
82
|
+ [tmp2] "=&r" (tmp2)
|
|
83
|
+ : [longIn1] "d" (longIn1),
|
|
84
|
+ [longIn2] "d" (longIn2)
|
|
85
|
+ : "cc"
|
|
86
|
+ );
|
|
87
|
+ return intRes;
|
|
88
|
+}
|
87
|
89
|
|
88
|
90
|
// intRes = intIn1 * intIn2 >> 16
|
89
|
91
|
// uses:
|
90
|
92
|
// r26 to store 0
|
91
|
93
|
// r27 to store the byte 1 of the 24 bit result
|
92
|
|
-#define MultiU16X8toH16(intRes, charIn1, intIn2) \
|
93
|
|
- asm volatile ( \
|
94
|
|
- A("clr r26") \
|
95
|
|
- A("mul %A1, %B2") \
|
96
|
|
- A("movw %A0, r0") \
|
97
|
|
- A("mul %A1, %A2") \
|
98
|
|
- A("add %A0, r1") \
|
99
|
|
- A("adc %B0, r26") \
|
100
|
|
- A("lsr r0") \
|
101
|
|
- A("adc %A0, r26") \
|
102
|
|
- A("adc %B0, r26") \
|
103
|
|
- A("clr r1") \
|
104
|
|
- : \
|
105
|
|
- "=&r" (intRes) \
|
106
|
|
- : \
|
107
|
|
- "d" (charIn1), \
|
108
|
|
- "d" (intIn2) \
|
109
|
|
- : \
|
110
|
|
- "r26" \
|
111
|
|
- )
|
112
|
|
-
|
|
94
|
+static FORCE_INLINE uint16_t MultiU16X8toH16(uint8_t charIn1, uint16_t intIn2) {
|
|
95
|
+ register uint8_t tmp;
|
|
96
|
+ register uint16_t intRes;
|
|
97
|
+ __asm__ __volatile__ (
|
|
98
|
+ A("clr %[tmp]")
|
|
99
|
+ A("mul %[charIn1], %B[intIn2]")
|
|
100
|
+ A("movw %A[intRes], r0")
|
|
101
|
+ A("mul %[charIn1], %A[intIn2]")
|
|
102
|
+ A("add %A[intRes], r1")
|
|
103
|
+ A("adc %B[intRes], %[tmp]")
|
|
104
|
+ A("lsr r0")
|
|
105
|
+ A("adc %A[intRes], %[tmp]")
|
|
106
|
+ A("adc %B[intRes], %[tmp]")
|
|
107
|
+ A("clr r1")
|
|
108
|
+ : [intRes] "=&r" (intRes),
|
|
109
|
+ [tmp] "=&r" (tmp)
|
|
110
|
+ : [charIn1] "d" (charIn1),
|
|
111
|
+ [intIn2] "d" (intIn2)
|
|
112
|
+ : "cc"
|
|
113
|
+ );
|
|
114
|
+ return intRes;
|
|
115
|
+}
|
113
|
116
|
|
114
|
117
|
#endif // _MATH_AVR_H_
|