My Marlin configs for Fabrikator Mini and CTC i3 Pro B
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Delay.h 7.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. /**
  2. * Marlin 3D Printer Firmware
  3. * Copyright (c) 2020 MarlinFirmware [https://github.com/MarlinFirmware/Marlin]
  4. *
  5. * Based on Sprinter and grbl.
  6. * Copyright (c) 2011 Camiel Gubbels / Erik van der Zalm
  7. *
  8. * This program is free software: you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation, either version 3 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program. If not, see <https://www.gnu.org/licenses/>.
  20. *
  21. */
  22. #pragma once
  23. #include "../../inc/MarlinConfigPre.h"
  24. /**
  25. * Busy wait delay cycles routines:
  26. *
  27. * DELAY_CYCLES(count): Delay execution in cycles
  28. * DELAY_NS(count): Delay execution in nanoseconds
  29. * DELAY_US(count): Delay execution in microseconds
  30. */
  31. #include "../../core/macros.h"
  32. void calibrate_delay_loop();
  33. #if defined(__arm__) || defined(__thumb__)
  34. // We want to have delay_cycle function with the lowest possible overhead, so we adjust at the function at runtime based on the current CPU best feature
  35. typedef void (*DelayImpl)(uint32_t);
  36. extern DelayImpl DelayCycleFnc;
  37. // I've measured 36 cycles on my system to call the cycle waiting method, but it shouldn't change much to have a bit more margin, it only consume a bit more flash
  38. #define TRIP_POINT_FOR_CALLING_FUNCTION 40
  39. // A simple recursive template class that output exactly one 'nop' of code per recursion
  40. template <int N> struct NopWriter {
  41. FORCE_INLINE static void build() {
  42. __asm__ __volatile__("nop");
  43. NopWriter<N-1>::build();
  44. }
  45. };
  46. // End the loop
  47. template <> struct NopWriter<0> { FORCE_INLINE static void build() {} };
  48. namespace Private {
  49. // Split recursing template in 2 different class so we don't reach the maximum template instantiation depth limit
  50. template <bool belowTP, int N> struct Helper {
  51. FORCE_INLINE static void build() {
  52. DelayCycleFnc(N - 2); // Approximative cost of calling the function (might be off by one or 2 cycles)
  53. }
  54. };
  55. template <int N> struct Helper<true, N> {
  56. FORCE_INLINE static void build() {
  57. NopWriter<N - 1>::build();
  58. }
  59. };
  60. template <> struct Helper<true, 0> {
  61. FORCE_INLINE static void build() {}
  62. };
  63. }
  64. // Select a behavior based on the constexpr'ness of the parameter
  65. // If called with a compile-time parameter, then write as many NOP as required to reach the asked cycle count
  66. // (there is some tripping point here to start looping when it's more profitable than gruntly executing NOPs)
  67. // If not called from a compile-time parameter, fallback to a runtime loop counting version instead
  68. template <bool compileTime, int Cycles>
  69. struct SmartDelay {
  70. FORCE_INLINE SmartDelay(int) {
  71. if (Cycles == 0) return;
  72. Private::Helper<Cycles < TRIP_POINT_FOR_CALLING_FUNCTION, Cycles>::build();
  73. }
  74. };
  75. // Runtime version below. There is no way this would run under less than ~TRIP_POINT_FOR_CALLING_FUNCTION cycles
  76. template <int T>
  77. struct SmartDelay<false, T> {
  78. FORCE_INLINE SmartDelay(int v) { DelayCycleFnc(v); }
  79. };
  80. #define DELAY_CYCLES(X) do { SmartDelay<IS_CONSTEXPR(X), IS_CONSTEXPR(X) ? X : 0> _smrtdly_X(X); } while(0)
  81. // For delay in microseconds, no smart delay selection is required, directly call the delay function
  82. // Teensy compiler is too old and does not accept smart delay compile-time / run-time selection correctly
  83. #define DELAY_US(x) DelayCycleFnc((x) * ((F_CPU) / 1000000UL))
  84. #elif defined(__AVR__)
  85. FORCE_INLINE static void __delay_up_to_3c(uint8_t cycles) {
  86. switch (cycles) {
  87. case 3:
  88. __asm__ __volatile__(A("RJMP .+0") A("NOP"));
  89. break;
  90. case 2:
  91. __asm__ __volatile__(A("RJMP .+0"));
  92. break;
  93. case 1:
  94. __asm__ __volatile__(A("NOP"));
  95. break;
  96. }
  97. }
  98. // Delay in cycles
  99. FORCE_INLINE static void DELAY_CYCLES(uint16_t cycles) {
  100. if (__builtin_constant_p(cycles)) {
  101. if (cycles <= 3) {
  102. __delay_up_to_3c(cycles);
  103. }
  104. else if (cycles == 4) {
  105. __delay_up_to_3c(2);
  106. __delay_up_to_3c(2);
  107. }
  108. else {
  109. cycles -= 1 + 4; // Compensate for the first LDI (1) and the first round (4)
  110. __delay_up_to_3c(cycles % 4);
  111. cycles /= 4;
  112. // The following code burns [1 + 4 * (rounds+1)] cycles
  113. uint16_t dummy;
  114. __asm__ __volatile__(
  115. // "manually" load counter from constants, otherwise the compiler may optimize this part away
  116. A("LDI %A[rounds], %[l]") // 1c
  117. A("LDI %B[rounds], %[h]") // 1c (compensating the non branching BRCC)
  118. L("1")
  119. A("SBIW %[rounds], 1") // 2c
  120. A("BRCC 1b") // 2c when branching, else 1c (end of loop)
  121. : // Outputs ...
  122. [rounds] "=w" (dummy) // Restrict to a wo (=) 16 bit register pair (w)
  123. : // Inputs ...
  124. [l] "M" (cycles%256), // Restrict to 0..255 constant (M)
  125. [h] "M" (cycles/256) // Restrict to 0..255 constant (M)
  126. :// Clobbers ...
  127. "cc" // Indicate we are modifying flags like Carry (cc)
  128. );
  129. }
  130. }
  131. else {
  132. __asm__ __volatile__(
  133. L("1")
  134. A("SBIW %[cycles], 4") // 2c
  135. A("BRCC 1b") // 2c when branching, else 1c (end of loop)
  136. : [cycles] "+w" (cycles) // output: Restrict to a rw (+) 16 bit register pair (w)
  137. : // input: -
  138. : "cc" // clobbers: We are modifying flags like Carry (cc)
  139. );
  140. }
  141. }
  142. // Delay in microseconds
  143. #define DELAY_US(x) DELAY_CYCLES((x) * ((F_CPU) / 1000000UL))
  144. #elif defined(ESP32) || defined(__PLAT_LINUX__) || defined(__PLAT_NATIVE_SIM__)
  145. // DELAY_CYCLES specified inside platform
  146. // Delay in microseconds
  147. #define DELAY_US(x) DELAY_CYCLES((x) * ((F_CPU) / 1000000UL))
  148. #else
  149. #error "Unsupported MCU architecture"
  150. #endif
  151. /**************************************************************
  152. * Delay in nanoseconds. Requires the F_CPU macro.
  153. * These macros follow avr-libc delay conventions.
  154. *
  155. * For AVR there are three possible operation modes, due to its
  156. * slower clock speeds and thus coarser delay resolution. For
  157. * example, when F_CPU = 16000000 the resolution is 62.5ns.
  158. *
  159. * Round up (default)
  160. * Round up the delay according to the CPU clock resolution.
  161. * e.g., 100 will give a delay of 2 cycles (125ns).
  162. *
  163. * Round down (DELAY_NS_ROUND_DOWN)
  164. * Round down the delay according to the CPU clock resolution.
  165. * e.g., 100 will be rounded down to 1 cycle (62.5ns).
  166. *
  167. * Nearest (DELAY_NS_ROUND_CLOSEST)
  168. * Round the delay to the nearest number of clock cycles.
  169. * e.g., 165 will be rounded up to 3 cycles (187.5ns) because
  170. * it's closer to the requested delay than 2 cycle (125ns).
  171. */
  172. #ifndef __AVR__
  173. #undef DELAY_NS_ROUND_DOWN
  174. #undef DELAY_NS_ROUND_CLOSEST
  175. #endif
  176. #if ENABLED(DELAY_NS_ROUND_DOWN)
  177. #define DELAY_NS(x) DELAY_CYCLES((x) * ((F_CPU) / 1000000UL) / 1000UL) // floor
  178. #elif ENABLED(DELAY_NS_ROUND_CLOSEST)
  179. #define DELAY_NS(x) DELAY_CYCLES(((x) * ((F_CPU) / 1000000UL) + 500) / 1000UL) // round
  180. #else
  181. #define DELAY_NS(x) DELAY_CYCLES(((x) * ((F_CPU) / 1000000UL) + 999) / 1000UL) // "ceil"
  182. #endif