|
@@ -75,6 +75,11 @@ int pf_bsearch_r(void *userdata, size_t num_data, pf_bsearch_cb_comp_t cb_comp,
|
75
|
75
|
return -1;
|
76
|
76
|
}
|
77
|
77
|
|
|
78
|
+/* Returns true if passed byte is first byte of UTF-8 char sequence */
|
|
79
|
+static inline bool utf8_is_start_byte_of_char(const uint8_t b) {
|
|
80
|
+ return 0x80 != (b & 0xC0);
|
|
81
|
+}
|
|
82
|
+
|
78
|
83
|
/* This function gets the character at the pstart position, interpreting UTF8 multibyte sequences
|
79
|
84
|
and returns the pointer to the next character */
|
80
|
85
|
uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t *pval) {
|
|
@@ -131,8 +136,8 @@ uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t
|
131
|
136
|
p++;
|
132
|
137
|
}
|
133
|
138
|
#endif
|
134
|
|
- else if (0x80 == (0xC0 & valcur))
|
135
|
|
- for (; 0x80 == (0xC0 & valcur); ) { p++; valcur = cb_read_byte(p); }
|
|
139
|
+ else if (!utf8_is_start_byte_of_char(valcur))
|
|
140
|
+ for (; !utf8_is_start_byte_of_char(valcur); ) { p++; valcur = cb_read_byte(p); }
|
136
|
141
|
else
|
137
|
142
|
for (; 0xFC < (0xFE & valcur); ) { p++; valcur = cb_read_byte(p); }
|
138
|
143
|
|
|
@@ -143,12 +148,12 @@ uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t
|
143
|
148
|
|
144
|
149
|
static inline uint8_t utf8_strlen_cb(const char *pstart, read_byte_cb_t cb_read_byte) {
|
145
|
150
|
uint8_t cnt = 0;
|
146
|
|
- uint8_t *pnext = (uint8_t *)pstart;
|
|
151
|
+ uint8_t *p = (uint8_t *)pstart;
|
147
|
152
|
for (;;) {
|
148
|
|
- wchar_t ch;
|
149
|
|
- pnext = get_utf8_value_cb(pnext, cb_read_byte, &ch);
|
150
|
|
- if (!ch) break;
|
151
|
|
- cnt++;
|
|
153
|
+ const uint8_t b = cb_read_byte(p);
|
|
154
|
+ if (!b) break;
|
|
155
|
+ if (utf8_is_start_byte_of_char(b)) cnt++;
|
|
156
|
+ p++;
|
152
|
157
|
}
|
153
|
158
|
return cnt;
|
154
|
159
|
}
|
|
@@ -160,3 +165,26 @@ uint8_t utf8_strlen(const char *pstart) {
|
160
|
165
|
uint8_t utf8_strlen_P(PGM_P pstart) {
|
161
|
166
|
return utf8_strlen_cb(pstart, read_byte_rom);
|
162
|
167
|
}
|
|
168
|
+
|
|
169
|
+static inline uint8_t utf8_byte_pos_by_char_num_cb(const char *pstart, read_byte_cb_t cb_read_byte, const uint8_t charnum) {
|
|
170
|
+ uint8_t *p = (uint8_t *)pstart;
|
|
171
|
+ uint8_t char_idx = 0;
|
|
172
|
+ uint8_t byte_idx = 0;
|
|
173
|
+ for (;;) {
|
|
174
|
+ const uint8_t b = cb_read_byte(p + byte_idx);
|
|
175
|
+ if (!b) return byte_idx; // Termination byte of string
|
|
176
|
+ if (utf8_is_start_byte_of_char(b)) {
|
|
177
|
+ char_idx++;
|
|
178
|
+ if (char_idx == charnum + 1) return byte_idx;
|
|
179
|
+ }
|
|
180
|
+ byte_idx++;
|
|
181
|
+ }
|
|
182
|
+}
|
|
183
|
+
|
|
184
|
+uint8_t utf8_byte_pos_by_char_num(const char *pstart, const uint8_t charnum) {
|
|
185
|
+ return utf8_byte_pos_by_char_num_cb(pstart, read_byte_ram, charnum);
|
|
186
|
+}
|
|
187
|
+
|
|
188
|
+uint8_t utf8_byte_pos_by_char_num_P(PGM_P pstart, const uint8_t charnum) {
|
|
189
|
+ return utf8_byte_pos_by_char_num_cb(pstart, read_byte_rom, charnum);
|
|
190
|
+}
|