123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- /**
- * @file genpages.c
- * @brief generate required font page files
- * @author Yunhui Fu (yhfudev@gmail.com)
- * @version 1.0
- * @date 2015-02-19
- * @copyright Yunhui Fu (2015)
- */
-
- #include <stdio.h>
- #include <stdint.h> /* uint8_t */
- #include <stdlib.h> /* size_t */
- #include <string.h>
- #include <assert.h>
- #include "getline.h"
-
- wchar_t get_val_utf82uni(uint8_t *pstart) {
- size_t cntleft;
- wchar_t retval = 0;
-
- if (0 == (0x80 & *pstart)) return *pstart;
-
- if (((*pstart & 0xE0) ^ 0xC0) == 0) {
- cntleft = 1;
- retval = *pstart & ~0xE0;
- }
- else if (((*pstart & 0xF0) ^ 0xE0) == 0) {
- cntleft = 2;
- retval = *pstart & ~0xF0;
- }
- else if (((*pstart & 0xF8) ^ 0xF0) == 0) {
- cntleft = 3;
- retval = *pstart & ~0xF8;
- }
- else if (((*pstart & 0xFC) ^ 0xF8) == 0) {
- cntleft = 4;
- retval = *pstart & ~0xFC;
- }
- else if (((*pstart & 0xFE) ^ 0xFC) == 0) {
- cntleft = 5;
- retval = *pstart & ~0xFE;
- }
- else {
- /* encoding error */
- cntleft = 0;
- retval = 0;
- }
- pstart++;
- for (; cntleft > 0; cntleft --) {
- retval <<= 6;
- retval |= *pstart & 0x3F;
- pstart++;
- }
- return retval;
- }
-
- /**
- * @brief 转换 UTF-8 编码的一个字符为本地的 Unicode 字符(wchar_t)
- *
- * @param pstart : 存储 UTF-8 字符的指针
- * @param pval : 需要返回的 Unicode 字符存放地址指针
- *
- * @return 成功返回下个 UTF-8 字符的位置
- *
- * 转换 UTF-8 编码的一个字符为本地的 Unicode 字符(wchar_t)
- */
- uint8_t* get_utf8_value(uint8_t *pstart, wchar_t *pval) {
- uint32_t val = 0;
- uint8_t *p = pstart;
- /*size_t maxlen = strlen(pstart);*/
-
- assert(NULL != pstart);
-
- if (0 == (0x80 & *p)) {
- val = (size_t)*p;
- p++;
- }
- else if (0xC0 == (0xE0 & *p)) {
- val = *p & 0x1F;
- val <<= 6;
- p++;
- val |= (*p & 0x3F);
- p++;
- assert((wchar_t)val == get_val_utf82uni(pstart));
- }
- else if (0xE0 == (0xF0 & *p)) {
- val = *p & 0x0F;
- val <<= 6; p++;
- val |= (*p & 0x3F);
- val <<= 6; p++;
- val |= (*p & 0x3F);
- p++;
- assert((wchar_t)val == get_val_utf82uni(pstart));
- }
- else if (0xF0 == (0xF8 & *p)) {
- val = *p & 0x07;
- val <<= 6; p++;
- val |= (*p & 0x3F);
- val <<= 6; p++;
- val |= (*p & 0x3F);
- val <<= 6; p++;
- val |= (*p & 0x3F);
- p++;
- assert((wchar_t)val == get_val_utf82uni(pstart));
- }
- else if (0xF8 == (0xFC & *p)) {
- val = *p & 0x03;
- val <<= 6; p++;
- val |= (*p & 0x3F);
- val <<= 6; p++;
- val |= (*p & 0x3F);
- val <<= 6; p++;
- val |= (*p & 0x3F);
- val <<= 6; p++;
- val |= (*p & 0x3F);
- p++;
- assert((wchar_t)val == get_val_utf82uni(pstart));
- }
- else if (0xFC == (0xFE & *p)) {
- val = *p & 0x01;
- val <<= 6; p++;
- val |= (*p & 0x3F);
- val <<= 6; p++;
- val |= (*p & 0x3F);
- val <<= 6; p++;
- val |= (*p & 0x3F);
- val <<= 6; p++;
- val |= (*p & 0x3F);
- val <<= 6; p++;
- val |= (*p & 0x3F);
- p++;
- assert((wchar_t)val == get_val_utf82uni(pstart));
- }
- else if (0x80 == (0xC0 & *p)) {
- /* error? */
- for (; 0x80 == (0xC0 & *p); p++);
- }
- else {
- /* error */
- for (; ((0xFE & *p) > 0xFC); p++);
- }
- /*
- if (val == 0) {
- p = NULL;
- */
- /*
- }
- else if (pstart + maxlen < p) {
- p = pstart;
- if (pval) *pval = 0;
- }
- */
-
- if (pval) *pval = val;
-
- return p;
- }
-
- void usage(char* progname) {
- fprintf(stderr, "usage: %s\n", progname);
- fprintf(stderr, " read data from stdin\n");
- }
-
- void utf8_parse(const char* msg, unsigned int len) {
- uint8_t *pend = NULL;
- uint8_t *p;
- uint8_t *pre;
- wchar_t val;
- int page;
-
- pend = (uint8_t *)msg + len;
- for (pre = (uint8_t *)msg; pre < pend;) {
- val = 0;
- p = get_utf8_value(pre, &val);
- if (NULL == p) break;
- page = val / 128;
- if (val >= 256) {
- fprintf(stdout, "%d %d ", page, (val % 128));
- for (; pre < p; pre++) fprintf(stdout, "%c", *pre);
- fprintf(stdout, "\n");
- }
- pre = p;
- }
- }
-
- int load_file(FILE *fp) {
- char * buffer = NULL;
- size_t szbuf = 0;
-
- szbuf = 10000;
- buffer = (char*)malloc(szbuf);
- if (NULL == buffer) return -1;
- //pos = ftell (fp);
- while (getline( &buffer, &szbuf, fp ) > 0)
- utf8_parse((const char*)buffer, (unsigned int)strlen ((char *)buffer));
-
- free(buffer);
- return 0;
- }
-
- int main(int argc, char * argv[]) {
- if (argc > 1) {
- usage(argv[0]);
- exit(1);
- }
- load_file(stdin);
- }
|