Browse Source

Console using std::string. Added utf8-cpp dependency.

Thomas Buck 9 years ago
parent
commit
f39e9a85fe
10 changed files with 993 additions and 132 deletions
  1. 1
    1
      CMakeLists.txt
  2. 2
    0
      ChangeLog.md
  3. 2
    0
      README.md
  4. 0
    2
      TODO.md
  5. 6
    11
      include/Console.h
  6. 68
    118
      src/Console.cpp
  7. 34
    0
      src/deps/utf8-cpp/utf8.h
  8. 323
    0
      src/deps/utf8-cpp/utf8/checked.h
  9. 329
    0
      src/deps/utf8-cpp/utf8/core.h
  10. 228
    0
      src/deps/utf8-cpp/utf8/unchecked.h

+ 1
- 1
CMakeLists.txt View File

@@ -23,7 +23,7 @@ endif()
23 23
 
24 24
 # Include directories
25 25
 include_directories ("${PROJECT_SOURCE_DIR}/include")
26
-include_directories ("${PROJECT_SOURCE_DIR}/src/deps")
26
+include_directories (SYSTEM "${PROJECT_SOURCE_DIR}/src/deps")
27 27
 include_directories ("${PROJECT_BINARY_DIR}")
28 28
 
29 29
 # Include External Modules

+ 2
- 0
ChangeLog.md View File

@@ -10,6 +10,8 @@
10 10
     * Added API to Font to draw strings centered
11 11
     * Folder sorts its items alphabetically
12 12
     * Press dot key in Menu to see hidden files and folders
13
+    * Console is now using std::string instead of char *
14
+    * Added utf8-cpp dependency to allow Console to delete multi-byte chars
13 15
 
14 16
     [ 20140809 ]
15 17
     * Script Unit Test brings it’s own scripts to test

+ 2
- 0
README.md View File

@@ -177,3 +177,5 @@ See the respective files in `cmake` for their licensing.
177 177
 
178 178
 The [clibs/commander](https://github.com/clibs/commander) dependency is Copyright (c) 2012 TJ Holowaychuk (tj@vision-media.ca) and licensed under the [MIT License](http://opensource.org/licenses/MIT).
179 179
 
180
+The included [utf8-cpp](http://utfcpp.sourceforge.net) headers are Copyright (c) 2006 Nemanja Trifunovic. See the files in src/deps/utf8-cpp/ for informations about the license used.
181
+

+ 0
- 2
TODO.md View File

@@ -21,9 +21,7 @@
21 21
 
22 22
 ## Future Features
23 23
 
24
-* File system utility layer, so mixed-case on case-sensitive filesystems is working transparently
25 24
 * Add ability to play the FMVs. Format? VLC can play them!
26 25
 * Cut TGA image reader, currently only used for menu background?!
27 26
     * Need useful, always available image writer alternative for screenshots then
28
-* When cutscene rendering is working, use TR4/5 style menu?
29 27
 

+ 6
- 11
include/Console.h View File

@@ -8,6 +8,7 @@
8 8
 #ifndef _CONSOLE_H_
9 9
 #define _CONSOLE_H_
10 10
 
11
+#include <string>
11 12
 #include <vector>
12 13
 
13 14
 /*!
@@ -21,11 +22,6 @@ public:
21 22
      */
22 23
     Console();
23 24
 
24
-    /*!
25
-     * \brief Deconstructs an object of Console
26
-     */
27
-    ~Console();
28
-
29 25
     void setVisible(bool visible);
30 26
 
31 27
     bool isVisible();
@@ -45,14 +41,13 @@ private:
45 41
     void moveInHistory(bool up);
46 42
 
47 43
     bool mVisible;
48
-    char *mInputBuffer;
49
-    size_t mInputBufferPointer;
50
-    char *mPartialInput;
51
-    std::vector<char *> mHistory;
44
+    std::string mInputBuffer;
45
+    std::string mPartialInput;
46
+    std::vector<std::string> mHistory;
52 47
 
53 48
     size_t mHistoryPointer;
54
-    std::vector<char *> mCommandHistory;
55
-    char *mUnfinishedInput;
49
+    std::vector<std::string> mCommandHistory;
50
+    std::string mUnfinishedInput;
56 51
 
57 52
     unsigned int mLineOffset;
58 53
 };

+ 68
- 118
src/Console.cpp View File

@@ -6,50 +6,22 @@
6 6
  */
7 7
 
8 8
 #include <iostream>
9
-#include <cstring>
10 9
 
11 10
 #include "global.h"
12 11
 #include "Font.h"
13 12
 #include "OpenRaider.h"
13
+#include "utf8-cpp/utf8.h"
14 14
 #include "utils/strings.h"
15 15
 #include "utils/time.h"
16 16
 #include "Window.h"
17 17
 #include "Console.h"
18 18
 
19
-#define INPUT_BUFFER_SIZE 255
20
-
21 19
 Console::Console() {
22 20
     mVisible = false;
23
-    mInputBuffer = new char[INPUT_BUFFER_SIZE + 1];
24
-    mInputBuffer[INPUT_BUFFER_SIZE] = '\0';
25
-    mInputBufferPointer = 0;
26
-    mPartialInput = NULL;
27 21
     mHistoryPointer = 0;
28
-    mUnfinishedInput = NULL;
29 22
     mLineOffset = 0;
30 23
 }
31 24
 
32
-Console::~Console() {
33
-    delete [] mInputBuffer;
34
-    mInputBuffer = NULL;
35
-
36
-    delete [] mPartialInput;
37
-    mPartialInput = NULL;
38
-
39
-    delete [] mUnfinishedInput;
40
-    mUnfinishedInput = NULL;
41
-
42
-    while (mHistory.size() > 0) {
43
-        delete [] mHistory.back();
44
-        mHistory.pop_back();
45
-    }
46
-
47
-    while (mCommandHistory.size() > 0) {
48
-        delete [] mCommandHistory.back();
49
-        mCommandHistory.pop_back();
50
-    }
51
-}
52
-
53 25
 void Console::setVisible(bool visible) {
54 26
     mVisible = visible;
55 27
     getWindow().setTextInput(mVisible);
@@ -66,78 +38,76 @@ void Console::print(const char *s, ...) {
66 38
     va_end(args);
67 39
 
68 40
     if (tmp != NULL) {
69
-        mHistory.push_back(tmp);
41
+        mHistory.push_back(std::string(tmp));
70 42
 #ifdef DEBUG
71 43
         std::cout << tmp << std::endl;
72 44
 #endif
73 45
     }
46
+
47
+    delete [] tmp;
74 48
 }
75 49
 
76
-#define LINE_GEOMETRY(window) unsigned int firstLine = 35; \
77
-        unsigned int lastLine = (window.getHeight() / 2) - 55; \
78
-        unsigned int inputLine = (window.getHeight() / 2) - 30; \
79
-        unsigned int lineSteps = 20; \
80
-        unsigned int lineCount = (lastLine - firstLine + lineSteps) / lineSteps; \
81
-        while (((lineCount * lineSteps) + firstLine) < inputLine) { \
82
-            lineSteps++; \
83
-            lineCount = (lastLine - firstLine + lineSteps) / lineSteps; \
84
-        }
50
+#define LINE_GEOMETRY(window) \
51
+    unsigned int firstLine = 35; \
52
+    unsigned int lastLine = (window.getHeight() / 2) - 55; \
53
+    unsigned int inputLine = (window.getHeight() / 2) - 30; \
54
+    unsigned int lineSteps = 20; \
55
+    unsigned int lineCount = (lastLine - firstLine + lineSteps) / lineSteps; \
56
+    while (((lineCount * lineSteps) + firstLine) < inputLine) { \
57
+        lineSteps++; \
58
+        lineCount = (lastLine - firstLine + lineSteps) / lineSteps; \
59
+    }
85 60
 
86 61
 void Console::display() {
87
-    if (mVisible) {
88
-        // Calculate line drawing geometry
89
-        // Depends on window height, so recalculate every time
90
-        LINE_GEOMETRY(getWindow());
91
-
92
-        // Draw half-transparent *overlay*
93
-        glColor4f(0.0f, 0.0f, 0.0f, 0.75f);
94
-        glDisable(GL_TEXTURE_2D);
95
-        glRecti(0, 0, getWindow().getWidth(), getWindow().getHeight() / 2);
96
-        glEnable(GL_TEXTURE_2D);
97
-
98
-        unsigned long scrollIndicator;
99
-        if (mHistory.size() > lineCount) {
100
-            scrollIndicator = (mHistory.size() - lineCount - mLineOffset) * 100 / (mHistory.size() - lineCount);
101
-        } else {
102
-            scrollIndicator = 100;
103
-            mLineOffset = 0;
104
-        }
62
+    if (!mVisible)
63
+        return;
105 64
 
106
-        getFont().drawText(10, 10, 0.70f, BLUE,
107
-                "%s uptime %lus scroll %d%%", VERSION, systemTimerGet() / 1000, scrollIndicator);
65
+    // Calculate line drawing geometry
66
+    // Depends on window height, so recalculate every time
67
+    LINE_GEOMETRY(getWindow());
108 68
 
109
-        // Draw output log
110
-        long end = lineCount;
111
-        long drawOffset = 0;
112
-        long historyOffset = 0;
113
-        if (mHistory.size() < lineCount) {
114
-            end = mHistory.size();
115
-            drawOffset = lineCount - mHistory.size();
116
-        } else if (lineCount < mHistory.size()) {
117
-            historyOffset = mHistory.size() - lineCount;
118
-        }
119
-        for (int i = 0; i < end; i++) {
120
-            getFont().drawText(10, (unsigned int)((i + drawOffset) * lineSteps) + firstLine,
121
-                    0.75f, BLUE, "%s", mHistory[i + historyOffset - mLineOffset]);
122
-        }
69
+    // Draw half-transparent *overlay*
70
+    glColor4f(0.0f, 0.0f, 0.0f, 0.75f);
71
+    glDisable(GL_TEXTURE_2D);
72
+    glRecti(0, 0, getWindow().getWidth(), getWindow().getHeight() / 2);
73
+    glEnable(GL_TEXTURE_2D);
123 74
 
124
-        // Draw current input
125
-        if ((mInputBufferPointer > 0) && (mInputBuffer[0] != '\0')) {
126
-            getFont().drawText(10, inputLine, 0.75f, BLUE, "> %s", mInputBuffer);
127
-        } else {
128
-            getFont().drawText(10, inputLine, 0.75f, BLUE, ">");
129
-        }
75
+    unsigned long scrollIndicator;
76
+    if (mHistory.size() > lineCount) {
77
+        scrollIndicator = (mHistory.size() - lineCount - mLineOffset) * 100 / (mHistory.size() - lineCount);
78
+    } else {
79
+        scrollIndicator = 100;
80
+        mLineOffset = 0;
81
+    }
130 82
 
131
-        //! \todo display the current mPartialInput. The UTF-8 segfaults SDL-TTF, somehow?
83
+    getFont().drawText(10, 10, 0.70f, BLUE,
84
+            "%s uptime %lus scroll %d%%", VERSION, systemTimerGet() / 1000, scrollIndicator);
85
+
86
+    // Draw output log
87
+    long end = lineCount;
88
+    long drawOffset = 0;
89
+    long historyOffset = 0;
90
+    if (mHistory.size() < lineCount) {
91
+        end = mHistory.size();
92
+        drawOffset = lineCount - mHistory.size();
93
+    } else if (lineCount < mHistory.size()) {
94
+        historyOffset = mHistory.size() - lineCount;
95
+    }
96
+    for (int i = 0; i < end; i++) {
97
+        getFont().drawText(10, (unsigned int)((i + drawOffset) * lineSteps) + firstLine,
98
+                0.75f, BLUE, "%s", mHistory[i + historyOffset - mLineOffset].c_str());
132 99
     }
100
+
101
+    // Draw current input
102
+    getFont().drawText(10, inputLine, 0.75f, BLUE, "> %s", (mInputBuffer + mPartialInput).c_str());
133 103
 }
134 104
 
135 105
 void Console::handleKeyboard(KeyboardButton key, bool pressed) {
136 106
     if (pressed && (key == enterKey)) {
137 107
         // Execute entered command
138
-        if ((mInputBufferPointer > 0) && (mInputBuffer[0] != '\0')) {
139
-            print("> %s", mInputBuffer);
140
-            mCommandHistory.push_back(bufferString("%s", mInputBuffer));
108
+        if (mInputBuffer.length() > 0) {
109
+            print("> %s", mInputBuffer.c_str());
110
+            mCommandHistory.push_back(mInputBuffer.c_str());
141 111
             int error = getOpenRaider().command(mInputBuffer);
142 112
             if (error != 0) {
143 113
                 print("Error Code: %d", error);
@@ -147,21 +117,16 @@ void Console::handleKeyboard(KeyboardButton key, bool pressed) {
147 117
         }
148 118
 
149 119
         // Clear partial and input buffer
150
-        mInputBufferPointer = 0;
151
-        mInputBuffer[0] = '\0';
152
-        if (mPartialInput != NULL) {
153
-            delete [] mPartialInput;
154
-            mPartialInput = NULL;
155
-        }
156
-
120
+        mInputBuffer = "";
121
+        mPartialInput = "";
157 122
         mHistoryPointer = 0;
158 123
     }
159 124
 
160
-    //! \fixme only deleting the last byte is not valid for non-ASCII UTF-8 strings
161 125
     if (pressed && (key == backspaceKey)) {
162
-        if (mInputBufferPointer > 0) {
163
-            mInputBufferPointer--;
164
-            mInputBuffer[mInputBufferPointer] = '\0';
126
+        if ((mPartialInput.length() == 0)
127
+                && (mInputBuffer.length() > 0)) {
128
+            utf8::iterator<std::string::iterator> it(mInputBuffer.end(), mInputBuffer.begin(), mInputBuffer.end());
129
+            mInputBuffer.erase((--it).base(), mInputBuffer.end());
165 130
         }
166 131
     }
167 132
 
@@ -178,7 +143,7 @@ void Console::moveInHistory(bool up) {
178 143
         if (mHistoryPointer < mCommandHistory.size()) {
179 144
             mHistoryPointer++;
180 145
             if (mHistoryPointer == 1) {
181
-                mUnfinishedInput = bufferString("%s", mInputBuffer);
146
+                mUnfinishedInput = mInputBuffer;
182 147
             }
183 148
         } else {
184 149
             return;
@@ -191,17 +156,13 @@ void Console::moveInHistory(bool up) {
191 156
     }
192 157
 
193 158
     if ((mHistoryPointer > 0) && (mHistoryPointer <= mCommandHistory.size())) {
194
-        strcpy(mInputBuffer, mCommandHistory[mCommandHistory.size() - mHistoryPointer]);
195
-        mInputBufferPointer = strlen(mInputBuffer);
159
+        mInputBuffer = mCommandHistory[mCommandHistory.size() - mHistoryPointer];
196 160
     } else {
197
-        if (mUnfinishedInput != NULL) {
198
-            strcpy(mInputBuffer, mUnfinishedInput);
199
-            mInputBufferPointer = strlen(mInputBuffer);
200
-            delete [] mUnfinishedInput;
201
-            mUnfinishedInput = NULL;
161
+        if (mUnfinishedInput.length() > 0) {
162
+            mInputBuffer = mUnfinishedInput;
163
+            mUnfinishedInput = "";
202 164
         } else {
203
-            mInputBuffer[0] = '\0';
204
-            mInputBufferPointer = 0;
165
+            mInputBuffer = "";
205 166
         }
206 167
     }
207 168
 }
@@ -213,28 +174,17 @@ void Console::handleText(char *text, bool notFinished) {
213 174
     if (!notFinished) {
214 175
         // Finished entering character
215 176
         // delete previous partial character, if present
216
-        if (mPartialInput != NULL) {
217
-            delete [] mPartialInput;
218
-        }
177
+        mPartialInput = "";
219 178
 
220 179
         //! \fixme Temporary hack filtering the console activation key
221 180
         if (text[0] == '`')
222 181
             return;
223 182
 
224 183
         // Append new input to buffer
225
-        size_t length = strlen(text);
226
-        if (length > 0) {
227
-            if (((INPUT_BUFFER_SIZE - mInputBufferPointer) < length)) {
228
-                print("Console input buffer overflowed! (> %d)", INPUT_BUFFER_SIZE);
229
-                return;
230
-            }
231
-            strcpy((mInputBuffer + mInputBufferPointer), text);
232
-            mInputBufferPointer += length;
233
-            mInputBuffer[mInputBufferPointer] = '\0';
234
-        }
184
+        mInputBuffer += text;
235 185
     } else {
236 186
         // Partial character received
237
-        mPartialInput = bufferString("%s", text);
187
+        mPartialInput = text;
238 188
     }
239 189
 }
240 190
 

+ 34
- 0
src/deps/utf8-cpp/utf8.h View File

@@ -0,0 +1,34 @@
1
+// Copyright 2006 Nemanja Trifunovic
2
+
3
+/*
4
+Permission is hereby granted, free of charge, to any person or organization
5
+obtaining a copy of the software and accompanying documentation covered by
6
+this license (the "Software") to use, reproduce, display, distribute,
7
+execute, and transmit the Software, and to prepare derivative works of the
8
+Software, and to permit third-parties to whom the Software is furnished to
9
+do so, all subject to the following:
10
+
11
+The copyright notices in the Software and this entire statement, including
12
+the above license grant, this restriction and the following disclaimer,
13
+must be included in all copies of the Software, in whole or in part, and
14
+all derivative works of the Software, unless such copies or derivative
15
+works are solely in the form of machine-executable object code generated by
16
+a source language processor.
17
+
18
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
21
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
22
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
23
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24
+DEALINGS IN THE SOFTWARE.
25
+*/
26
+
27
+
28
+#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
29
+#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
30
+
31
+#include "utf8/checked.h"
32
+#include "utf8/unchecked.h"
33
+
34
+#endif // header guard

+ 323
- 0
src/deps/utf8-cpp/utf8/checked.h View File

@@ -0,0 +1,323 @@
1
+// Copyright 2006 Nemanja Trifunovic
2
+
3
+/*
4
+Permission is hereby granted, free of charge, to any person or organization
5
+obtaining a copy of the software and accompanying documentation covered by
6
+this license (the "Software") to use, reproduce, display, distribute,
7
+execute, and transmit the Software, and to prepare derivative works of the
8
+Software, and to permit third-parties to whom the Software is furnished to
9
+do so, all subject to the following:
10
+
11
+The copyright notices in the Software and this entire statement, including
12
+the above license grant, this restriction and the following disclaimer,
13
+must be included in all copies of the Software, in whole or in part, and
14
+all derivative works of the Software, unless such copies or derivative
15
+works are solely in the form of machine-executable object code generated by
16
+a source language processor.
17
+
18
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
21
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
22
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
23
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24
+DEALINGS IN THE SOFTWARE.
25
+*/
26
+
27
+
28
+#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
29
+#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
30
+
31
+#include "core.h"
32
+#include <stdexcept>
33
+
34
+namespace utf8
35
+{
36
+    // Exceptions that may be thrown from the library functions.
37
+    class invalid_code_point : public ::std::exception {
38
+        uint32_t cp;
39
+    public:
40
+        invalid_code_point(uint32_t _cp) : cp(_cp) {}
41
+        virtual const char* what() const noexcept { return "Invalid code point"; }
42
+        uint32_t code_point() const {return cp;}
43
+    };
44
+
45
+    class invalid_utf8 : public ::std::exception {
46
+        uint8_t u8;
47
+    public:
48
+        invalid_utf8 (uint8_t u) : u8(u) {}
49
+        virtual const char* what() const noexcept { return "Invalid UTF-8"; }
50
+        uint8_t utf8_octet() const {return u8;}
51
+    };
52
+
53
+    class invalid_utf16 : public ::std::exception {
54
+        uint16_t u16;
55
+    public:
56
+        invalid_utf16 (uint16_t u) : u16(u) {}
57
+        virtual const char* what() const noexcept { return "Invalid UTF-16"; }
58
+        uint16_t utf16_word() const {return u16;}
59
+    };
60
+
61
+    class not_enough_room : public ::std::exception {
62
+    public:
63
+        virtual const char* what() const noexcept { return "Not enough space"; }
64
+    };
65
+
66
+    /// The library API - functions intended to be called by the users
67
+
68
+    template <typename octet_iterator>
69
+    octet_iterator append(uint32_t cp, octet_iterator result)
70
+    {
71
+        if (!utf8::internal::is_code_point_valid(cp))
72
+            throw invalid_code_point(cp);
73
+
74
+        if (cp < 0x80)                        // one octet
75
+            *(result++) = static_cast<uint8_t>(cp);
76
+        else if (cp < 0x800) {                // two octets
77
+            *(result++) = static_cast<uint8_t>((cp >> 6)            | 0xc0);
78
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
79
+        }
80
+        else if (cp < 0x10000) {              // three octets
81
+            *(result++) = static_cast<uint8_t>((cp >> 12)           | 0xe0);
82
+            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
83
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
84
+        }
85
+        else {                                // four octets
86
+            *(result++) = static_cast<uint8_t>((cp >> 18)           | 0xf0);
87
+            *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)  | 0x80);
88
+            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
89
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
90
+        }
91
+        return result;
92
+    }
93
+
94
+    template <typename octet_iterator, typename output_iterator>
95
+    output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
96
+    {
97
+        while (start != end) {
98
+            octet_iterator sequence_start = start;
99
+            internal::utf_error err_code = utf8::internal::validate_next(start, end);
100
+            switch (err_code) {
101
+                case internal::UTF8_OK :
102
+                    for (octet_iterator it = sequence_start; it != start; ++it)
103
+                        *out++ = *it;
104
+                    break;
105
+                case internal::NOT_ENOUGH_ROOM:
106
+                    throw not_enough_room();
107
+                case internal::INVALID_LEAD:
108
+                    out = utf8::append (replacement, out);
109
+                    ++start;
110
+                    break;
111
+                case internal::INCOMPLETE_SEQUENCE:
112
+                case internal::OVERLONG_SEQUENCE:
113
+                case internal::INVALID_CODE_POINT:
114
+                    out = utf8::append (replacement, out);
115
+                    ++start;
116
+                    // just one replacement mark for the sequence
117
+                    while (start != end && utf8::internal::is_trail(*start))
118
+                        ++start;
119
+                    break;
120
+            }
121
+        }
122
+        return out;
123
+    }
124
+
125
+    template <typename octet_iterator, typename output_iterator>
126
+    inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
127
+    {
128
+        static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
129
+        return utf8::replace_invalid(start, end, out, replacement_marker);
130
+    }
131
+
132
+    template <typename octet_iterator>
133
+    uint32_t next(octet_iterator& it, octet_iterator end)
134
+    {
135
+        uint32_t cp = 0;
136
+        internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
137
+        switch (err_code) {
138
+            case internal::UTF8_OK :
139
+                break;
140
+            case internal::NOT_ENOUGH_ROOM :
141
+                throw not_enough_room();
142
+            case internal::INVALID_LEAD :
143
+            case internal::INCOMPLETE_SEQUENCE :
144
+            case internal::OVERLONG_SEQUENCE :
145
+                throw invalid_utf8(*it);
146
+            case internal::INVALID_CODE_POINT :
147
+                throw invalid_code_point(cp);
148
+        }
149
+        return cp;
150
+    }
151
+
152
+    template <typename octet_iterator>
153
+    uint32_t peek_next(octet_iterator it, octet_iterator end)
154
+    {
155
+        return utf8::next(it, end);
156
+    }
157
+
158
+    template <typename octet_iterator>
159
+    uint32_t prior(octet_iterator& it, octet_iterator start)
160
+    {
161
+        // can't do much if it == start
162
+        if (it == start)
163
+            throw not_enough_room();
164
+
165
+        octet_iterator end = it;
166
+        // Go back until we hit either a lead octet or start
167
+        while (utf8::internal::is_trail(*(--it)))
168
+            if (it == start)
169
+                throw invalid_utf8(*it); // error - no lead byte in the sequence
170
+        return utf8::peek_next(it, end);
171
+    }
172
+
173
+    /// Deprecated in versions that include "prior"
174
+    template <typename octet_iterator>
175
+    uint32_t previous(octet_iterator& it, octet_iterator pass_start)
176
+    {
177
+        octet_iterator end = it;
178
+        while (utf8::internal::is_trail(*(--it)))
179
+            if (it == pass_start)
180
+                throw invalid_utf8(*it); // error - no lead byte in the sequence
181
+        octet_iterator temp = it;
182
+        return utf8::next(temp, end);
183
+    }
184
+
185
+    template <typename octet_iterator, typename distance_type>
186
+    void advance (octet_iterator& it, distance_type n, octet_iterator end)
187
+    {
188
+        for (distance_type i = 0; i < n; ++i)
189
+            utf8::next(it, end);
190
+    }
191
+
192
+    template <typename octet_iterator>
193
+    typename std::iterator_traits<octet_iterator>::difference_type
194
+    distance (octet_iterator first, octet_iterator last)
195
+    {
196
+        typename std::iterator_traits<octet_iterator>::difference_type dist;
197
+        for (dist = 0; first < last; ++dist)
198
+            utf8::next(first, last);
199
+        return dist;
200
+    }
201
+
202
+    template <typename u16bit_iterator, typename octet_iterator>
203
+    octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
204
+    {
205
+        while (start != end) {
206
+            uint32_t cp = utf8::internal::mask16(*start++);
207
+            // Take care of surrogate pairs first
208
+            if (utf8::internal::is_lead_surrogate(cp)) {
209
+                if (start != end) {
210
+                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
211
+                    if (utf8::internal::is_trail_surrogate(trail_surrogate))
212
+                        cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
213
+                    else
214
+                        throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
215
+                }
216
+                else
217
+                    throw invalid_utf16(static_cast<uint16_t>(cp));
218
+
219
+            }
220
+            // Lone trail surrogate
221
+            else if (utf8::internal::is_trail_surrogate(cp))
222
+                throw invalid_utf16(static_cast<uint16_t>(cp));
223
+
224
+            result = utf8::append(cp, result);
225
+        }
226
+        return result;
227
+    }
228
+
229
+    template <typename u16bit_iterator, typename octet_iterator>
230
+    u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
231
+    {
232
+        while (start != end) {
233
+            uint32_t cp = utf8::next(start, end);
234
+            if (cp > 0xffff) { //make a surrogate pair
235
+                *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
236
+                *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
237
+            }
238
+            else
239
+                *result++ = static_cast<uint16_t>(cp);
240
+        }
241
+        return result;
242
+    }
243
+
244
+    template <typename octet_iterator, typename u32bit_iterator>
245
+    octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
246
+    {
247
+        while (start != end)
248
+            result = utf8::append(*(start++), result);
249
+
250
+        return result;
251
+    }
252
+
253
+    template <typename octet_iterator, typename u32bit_iterator>
254
+    u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
255
+    {
256
+        while (start != end)
257
+            (*result++) = utf8::next(start, end);
258
+
259
+        return result;
260
+    }
261
+
262
+    // The iterator class
263
+    template <typename octet_iterator>
264
+    class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
265
+      octet_iterator it;
266
+      octet_iterator range_start;
267
+      octet_iterator range_end;
268
+      public:
269
+      iterator () {}
270
+      explicit iterator (const octet_iterator& octet_it,
271
+                         const octet_iterator& _range_start,
272
+                         const octet_iterator& _range_end) :
273
+               it(octet_it), range_start(_range_start), range_end(_range_end)
274
+      {
275
+          if (it < range_start || it > range_end)
276
+              throw std::out_of_range("Invalid utf-8 iterator position");
277
+      }
278
+      // the default "big three" are OK
279
+      octet_iterator base () const { return it; }
280
+      uint32_t operator * () const
281
+      {
282
+          octet_iterator temp = it;
283
+          return utf8::next(temp, range_end);
284
+      }
285
+      bool operator == (const iterator& rhs) const
286
+      {
287
+          if (range_start != rhs.range_start || range_end != rhs.range_end)
288
+              throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
289
+          return (it == rhs.it);
290
+      }
291
+      bool operator != (const iterator& rhs) const
292
+      {
293
+          return !(operator == (rhs));
294
+      }
295
+      iterator& operator ++ ()
296
+      {
297
+          utf8::next(it, range_end);
298
+          return *this;
299
+      }
300
+      iterator operator ++ (int)
301
+      {
302
+          iterator temp = *this;
303
+          utf8::next(it, range_end);
304
+          return temp;
305
+      }
306
+      iterator& operator -- ()
307
+      {
308
+          utf8::prior(it, range_start);
309
+          return *this;
310
+      }
311
+      iterator operator -- (int)
312
+      {
313
+          iterator temp = *this;
314
+          utf8::prior(it, range_start);
315
+          return temp;
316
+      }
317
+    }; // class iterator
318
+
319
+} // namespace utf8
320
+
321
+#endif //header guard
322
+
323
+

+ 329
- 0
src/deps/utf8-cpp/utf8/core.h View File

@@ -0,0 +1,329 @@
1
+// Copyright 2006 Nemanja Trifunovic
2
+
3
+/*
4
+Permission is hereby granted, free of charge, to any person or organization
5
+obtaining a copy of the software and accompanying documentation covered by
6
+this license (the "Software") to use, reproduce, display, distribute,
7
+execute, and transmit the Software, and to prepare derivative works of the
8
+Software, and to permit third-parties to whom the Software is furnished to
9
+do so, all subject to the following:
10
+
11
+The copyright notices in the Software and this entire statement, including
12
+the above license grant, this restriction and the following disclaimer,
13
+must be included in all copies of the Software, in whole or in part, and
14
+all derivative works of the Software, unless such copies or derivative
15
+works are solely in the form of machine-executable object code generated by
16
+a source language processor.
17
+
18
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
21
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
22
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
23
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24
+DEALINGS IN THE SOFTWARE.
25
+*/
26
+
27
+
28
+#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
29
+#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
30
+
31
+#include <iterator>
32
+
33
+namespace utf8
34
+{
35
+    // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
36
+    // You may need to change them to match your system.
37
+    // These typedefs have the same names as ones from cstdint, or boost/cstdint
38
+    typedef unsigned char   uint8_t;
39
+    typedef unsigned short  uint16_t;
40
+    typedef unsigned int    uint32_t;
41
+
42
+// Helper code - not intended to be directly called by the library users. May be changed at any time
43
+namespace internal
44
+{
45
+    // Unicode constants
46
+    // Leading (high) surrogates: 0xd800 - 0xdbff
47
+    // Trailing (low) surrogates: 0xdc00 - 0xdfff
48
+    const uint16_t LEAD_SURROGATE_MIN  = 0xd800u;
49
+    const uint16_t LEAD_SURROGATE_MAX  = 0xdbffu;
50
+    const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
51
+    const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
52
+    const uint16_t LEAD_OFFSET         = LEAD_SURROGATE_MIN - (0x10000 >> 10);
53
+    const uint32_t SURROGATE_OFFSET    = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
54
+
55
+    // Maximum valid value for a Unicode code point
56
+    const uint32_t CODE_POINT_MAX      = 0x0010ffffu;
57
+
58
+    template<typename octet_type>
59
+    inline uint8_t mask8(octet_type oc)
60
+    {
61
+        return static_cast<uint8_t>(0xff & oc);
62
+    }
63
+    template<typename u16_type>
64
+    inline uint16_t mask16(u16_type oc)
65
+    {
66
+        return static_cast<uint16_t>(0xffff & oc);
67
+    }
68
+    template<typename octet_type>
69
+    inline bool is_trail(octet_type oc)
70
+    {
71
+        return ((utf8::internal::mask8(oc) >> 6) == 0x2);
72
+    }
73
+
74
+    template <typename u16>
75
+    inline bool is_lead_surrogate(u16 cp)
76
+    {
77
+        return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
78
+    }
79
+
80
+    template <typename u16>
81
+    inline bool is_trail_surrogate(u16 cp)
82
+    {
83
+        return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
84
+    }
85
+
86
+    template <typename u16>
87
+    inline bool is_surrogate(u16 cp)
88
+    {
89
+        return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
90
+    }
91
+
92
+    template <typename u32>
93
+    inline bool is_code_point_valid(u32 cp)
94
+    {
95
+        return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
96
+    }
97
+
98
+    template <typename octet_iterator>
99
+    inline typename std::iterator_traits<octet_iterator>::difference_type
100
+    sequence_length(octet_iterator lead_it)
101
+    {
102
+        uint8_t lead = utf8::internal::mask8(*lead_it);
103
+        if (lead < 0x80)
104
+            return 1;
105
+        else if ((lead >> 5) == 0x6)
106
+            return 2;
107
+        else if ((lead >> 4) == 0xe)
108
+            return 3;
109
+        else if ((lead >> 3) == 0x1e)
110
+            return 4;
111
+        else
112
+            return 0;
113
+    }
114
+
115
+    template <typename octet_difference_type>
116
+    inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
117
+    {
118
+        if (cp < 0x80) {
119
+            if (length != 1) 
120
+                return true;
121
+        }
122
+        else if (cp < 0x800) {
123
+            if (length != 2) 
124
+                return true;
125
+        }
126
+        else if (cp < 0x10000) {
127
+            if (length != 3) 
128
+                return true;
129
+        }
130
+
131
+        return false;
132
+    }
133
+
134
+    enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
135
+
136
+    /// Helper for get_sequence_x
137
+    template <typename octet_iterator>
138
+    utf_error increase_safely(octet_iterator& it, octet_iterator end)
139
+    {
140
+        if (++it == end)
141
+            return NOT_ENOUGH_ROOM;
142
+
143
+        if (!utf8::internal::is_trail(*it))
144
+            return INCOMPLETE_SEQUENCE;
145
+        
146
+        return UTF8_OK;
147
+    }
148
+
149
+    #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}    
150
+
151
+    /// get_sequence_x functions decode utf-8 sequences of the length x
152
+    template <typename octet_iterator>
153
+    utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
154
+    {
155
+        if (it == end)
156
+            return NOT_ENOUGH_ROOM;
157
+
158
+        code_point = utf8::internal::mask8(*it);
159
+
160
+        return UTF8_OK;
161
+    }
162
+
163
+    template <typename octet_iterator>
164
+    utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
165
+    {
166
+        if (it == end) 
167
+            return NOT_ENOUGH_ROOM;
168
+        
169
+        code_point = utf8::internal::mask8(*it);
170
+
171
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
172
+
173
+        code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
174
+
175
+        return UTF8_OK;
176
+    }
177
+
178
+    template <typename octet_iterator>
179
+    utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
180
+    {
181
+        if (it == end)
182
+            return NOT_ENOUGH_ROOM;
183
+            
184
+        code_point = utf8::internal::mask8(*it);
185
+
186
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
187
+
188
+        code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
189
+
190
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
191
+
192
+        code_point += (*it) & 0x3f;
193
+
194
+        return UTF8_OK;
195
+    }
196
+
197
+    template <typename octet_iterator>
198
+    utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
199
+    {
200
+        if (it == end)
201
+           return NOT_ENOUGH_ROOM;
202
+
203
+        code_point = utf8::internal::mask8(*it);
204
+
205
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
206
+
207
+        code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
208
+
209
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
210
+
211
+        code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
212
+
213
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
214
+
215
+        code_point += (*it) & 0x3f;
216
+
217
+        return UTF8_OK;
218
+    }
219
+
220
+    #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
221
+
222
+    template <typename octet_iterator>
223
+    utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
224
+    {
225
+        // Save the original value of it so we can go back in case of failure
226
+        // Of course, it does not make much sense with i.e. stream iterators
227
+        octet_iterator original_it = it;
228
+
229
+        uint32_t cp = 0;
230
+        // Determine the sequence length based on the lead octet
231
+        typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
232
+        const octet_difference_type length = utf8::internal::sequence_length(it);
233
+
234
+        // Get trail octets and calculate the code point
235
+        utf_error err = UTF8_OK;
236
+        switch (length) {
237
+            case 0: 
238
+                return INVALID_LEAD;
239
+            case 1:
240
+                err = utf8::internal::get_sequence_1(it, end, cp);
241
+                break;
242
+            case 2:
243
+                err = utf8::internal::get_sequence_2(it, end, cp);
244
+            break;
245
+            case 3:
246
+                err = utf8::internal::get_sequence_3(it, end, cp);
247
+            break;
248
+            case 4:
249
+                err = utf8::internal::get_sequence_4(it, end, cp);
250
+            break;
251
+        }
252
+
253
+        if (err == UTF8_OK) {
254
+            // Decoding succeeded. Now, security checks...
255
+            if (utf8::internal::is_code_point_valid(cp)) {
256
+                if (!utf8::internal::is_overlong_sequence(cp, length)){
257
+                    // Passed! Return here.
258
+                    code_point = cp;
259
+                    ++it;
260
+                    return UTF8_OK;
261
+                }
262
+                else
263
+                    err = OVERLONG_SEQUENCE;
264
+            }
265
+            else 
266
+                err = INVALID_CODE_POINT;
267
+        }
268
+
269
+        // Failure branch - restore the original value of the iterator
270
+        it = original_it;
271
+        return err;
272
+    }
273
+
274
+    template <typename octet_iterator>
275
+    inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
276
+        uint32_t ignored;
277
+        return utf8::internal::validate_next(it, end, ignored);
278
+    }
279
+
280
+} // namespace internal
281
+
282
+    /// The library API - functions intended to be called by the users
283
+
284
+    // Byte order mark
285
+    const uint8_t bom[] = {0xef, 0xbb, 0xbf};
286
+
287
+    template <typename octet_iterator>
288
+    octet_iterator find_invalid(octet_iterator start, octet_iterator end)
289
+    {
290
+        octet_iterator result = start;
291
+        while (result != end) {
292
+            utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
293
+            if (err_code != internal::UTF8_OK)
294
+                return result;
295
+        }
296
+        return result;
297
+    }
298
+
299
+    template <typename octet_iterator>
300
+    inline bool is_valid(octet_iterator start, octet_iterator end)
301
+    {
302
+        return (utf8::find_invalid(start, end) == end);
303
+    }
304
+
305
+    template <typename octet_iterator>
306
+    inline bool starts_with_bom (octet_iterator it, octet_iterator end)
307
+    {
308
+        return (
309
+            ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
310
+            ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
311
+            ((it != end) && (utf8::internal::mask8(*it))   == bom[2])
312
+           );
313
+    }
314
+	
315
+    //Deprecated in release 2.3 
316
+    template <typename octet_iterator>
317
+    inline bool is_bom (octet_iterator it)
318
+    {
319
+        return (
320
+            (utf8::internal::mask8(*it++)) == bom[0] &&
321
+            (utf8::internal::mask8(*it++)) == bom[1] &&
322
+            (utf8::internal::mask8(*it))   == bom[2]
323
+           );
324
+    }
325
+} // namespace utf8
326
+
327
+#endif // header guard
328
+
329
+

+ 228
- 0
src/deps/utf8-cpp/utf8/unchecked.h View File

@@ -0,0 +1,228 @@
1
+// Copyright 2006 Nemanja Trifunovic
2
+
3
+/*
4
+Permission is hereby granted, free of charge, to any person or organization
5
+obtaining a copy of the software and accompanying documentation covered by
6
+this license (the "Software") to use, reproduce, display, distribute,
7
+execute, and transmit the Software, and to prepare derivative works of the
8
+Software, and to permit third-parties to whom the Software is furnished to
9
+do so, all subject to the following:
10
+
11
+The copyright notices in the Software and this entire statement, including
12
+the above license grant, this restriction and the following disclaimer,
13
+must be included in all copies of the Software, in whole or in part, and
14
+all derivative works of the Software, unless such copies or derivative
15
+works are solely in the form of machine-executable object code generated by
16
+a source language processor.
17
+
18
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
21
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
22
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
23
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24
+DEALINGS IN THE SOFTWARE.
25
+*/
26
+
27
+
28
+#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
29
+#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
30
+
31
+#include "core.h"
32
+
33
+namespace utf8
34
+{
35
+    namespace unchecked 
36
+    {
37
+        template <typename octet_iterator>
38
+        octet_iterator append(uint32_t cp, octet_iterator result)
39
+        {
40
+            if (cp < 0x80)                        // one octet
41
+                *(result++) = static_cast<uint8_t>(cp);  
42
+            else if (cp < 0x800) {                // two octets
43
+                *(result++) = static_cast<uint8_t>((cp >> 6)          | 0xc0);
44
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
45
+            }
46
+            else if (cp < 0x10000) {              // three octets
47
+                *(result++) = static_cast<uint8_t>((cp >> 12)         | 0xe0);
48
+                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
49
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
50
+            }
51
+            else {                                // four octets
52
+                *(result++) = static_cast<uint8_t>((cp >> 18)         | 0xf0);
53
+                *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
54
+                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
55
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
56
+            }
57
+            return result;
58
+        }
59
+
60
+        template <typename octet_iterator>
61
+        uint32_t next(octet_iterator& it)
62
+        {
63
+            uint32_t cp = utf8::internal::mask8(*it);
64
+            typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
65
+            switch (length) {
66
+                case 1:
67
+                    break;
68
+                case 2:
69
+                    it++;
70
+                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
71
+                    break;
72
+                case 3:
73
+                    ++it; 
74
+                    cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
75
+                    ++it;
76
+                    cp += (*it) & 0x3f;
77
+                    break;
78
+                case 4:
79
+                    ++it;
80
+                    cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);                
81
+                    ++it;
82
+                    cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
83
+                    ++it;
84
+                    cp += (*it) & 0x3f; 
85
+                    break;
86
+            }
87
+            ++it;
88
+            return cp;        
89
+        }
90
+
91
+        template <typename octet_iterator>
92
+        uint32_t peek_next(octet_iterator it)
93
+        {
94
+            return utf8::unchecked::next(it);    
95
+        }
96
+
97
+        template <typename octet_iterator>
98
+        uint32_t prior(octet_iterator& it)
99
+        {
100
+            while (utf8::internal::is_trail(*(--it))) ;
101
+            octet_iterator temp = it;
102
+            return utf8::unchecked::next(temp);
103
+        }
104
+
105
+        // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
106
+        template <typename octet_iterator>
107
+        inline uint32_t previous(octet_iterator& it)
108
+        {
109
+            return utf8::unchecked::prior(it);
110
+        }
111
+
112
+        template <typename octet_iterator, typename distance_type>
113
+        void advance (octet_iterator& it, distance_type n)
114
+        {
115
+            for (distance_type i = 0; i < n; ++i)
116
+                utf8::unchecked::next(it);
117
+        }
118
+
119
+        template <typename octet_iterator>
120
+        typename std::iterator_traits<octet_iterator>::difference_type
121
+        distance (octet_iterator first, octet_iterator last)
122
+        {
123
+            typename std::iterator_traits<octet_iterator>::difference_type dist;
124
+            for (dist = 0; first < last; ++dist) 
125
+                utf8::unchecked::next(first);
126
+            return dist;
127
+        }
128
+
129
+        template <typename u16bit_iterator, typename octet_iterator>
130
+        octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
131
+        {       
132
+            while (start != end) {
133
+                uint32_t cp = utf8::internal::mask16(*start++);
134
+            // Take care of surrogate pairs first
135
+                if (utf8::internal::is_lead_surrogate(cp)) {
136
+                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
137
+                    cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
138
+                }
139
+                result = utf8::unchecked::append(cp, result);
140
+            }
141
+            return result;         
142
+        }
143
+
144
+        template <typename u16bit_iterator, typename octet_iterator>
145
+        u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
146
+        {
147
+            while (start < end) {
148
+                uint32_t cp = utf8::unchecked::next(start);
149
+                if (cp > 0xffff) { //make a surrogate pair
150
+                    *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
151
+                    *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
152
+                }
153
+                else
154
+                    *result++ = static_cast<uint16_t>(cp);
155
+            }
156
+            return result;
157
+        }
158
+
159
+        template <typename octet_iterator, typename u32bit_iterator>
160
+        octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
161
+        {
162
+            while (start != end)
163
+                result = utf8::unchecked::append(*(start++), result);
164
+
165
+            return result;
166
+        }
167
+
168
+        template <typename octet_iterator, typename u32bit_iterator>
169
+        u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
170
+        {
171
+            while (start < end)
172
+                (*result++) = utf8::unchecked::next(start);
173
+
174
+            return result;
175
+        }
176
+
177
+        // The iterator class
178
+        template <typename octet_iterator>
179
+          class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> { 
180
+            octet_iterator it;
181
+            public:
182
+            iterator () {}
183
+            explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
184
+            // the default "big three" are OK
185
+            octet_iterator base () const { return it; }
186
+            uint32_t operator * () const
187
+            {
188
+                octet_iterator temp = it;
189
+                return utf8::unchecked::next(temp);
190
+            }
191
+            bool operator == (const iterator& rhs) const 
192
+            { 
193
+                return (it == rhs.it);
194
+            }
195
+            bool operator != (const iterator& rhs) const
196
+            {
197
+                return !(operator == (rhs));
198
+            }
199
+            iterator& operator ++ () 
200
+            {
201
+                ::std::advance(it, utf8::internal::sequence_length(it));
202
+                return *this;
203
+            }
204
+            iterator operator ++ (int)
205
+            {
206
+                iterator temp = *this;
207
+                ::std::advance(it, utf8::internal::sequence_length(it));
208
+                return temp;
209
+            }  
210
+            iterator& operator -- ()
211
+            {
212
+                utf8::unchecked::prior(it);
213
+                return *this;
214
+            }
215
+            iterator operator -- (int)
216
+            {
217
+                iterator temp = *this;
218
+                utf8::unchecked::prior(it);
219
+                return temp;
220
+            }
221
+          }; // class iterator
222
+
223
+    } // namespace utf8::unchecked
224
+} // namespace utf8 
225
+
226
+
227
+#endif // header guard
228
+

Loading…
Cancel
Save