|
1 /* |
|
2 * Copyright © 2011,2012,2014 Google, Inc. |
|
3 * |
|
4 * This is part of HarfBuzz, a text shaping library. |
|
5 * |
|
6 * Permission is hereby granted, without written agreement and without |
|
7 * license or royalty fees, to use, copy, modify, and distribute this |
|
8 * software and its documentation for any purpose, provided that the |
|
9 * above copyright notice and the following two paragraphs appear in |
|
10 * all copies of this software. |
|
11 * |
|
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
|
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
|
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
|
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
|
16 * DAMAGE. |
|
17 * |
|
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
|
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
|
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
|
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
|
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
|
23 * |
|
24 * Google Author(s): Behdad Esfahbod |
|
25 */ |
|
26 |
|
27 #ifndef HB_UTF_PRIVATE_HH |
|
28 #define HB_UTF_PRIVATE_HH |
|
29 |
|
30 #include "hb-private.hh" |
|
31 |
|
32 |
|
33 struct hb_utf8_t |
|
34 { |
|
35 typedef uint8_t codepoint_t; |
|
36 |
|
37 static inline const uint8_t * |
|
38 next (const uint8_t *text, |
|
39 const uint8_t *end, |
|
40 hb_codepoint_t *unicode, |
|
41 hb_codepoint_t replacement) |
|
42 { |
|
43 /* Written to only accept well-formed sequences. |
|
44 * Based on ideas from ICU's U8_NEXT. |
|
45 * Generates one "replacement" for each ill-formed byte. */ |
|
46 |
|
47 hb_codepoint_t c = *text++; |
|
48 |
|
49 if (c > 0x7Fu) |
|
50 { |
|
51 if (hb_in_range (c, 0xC2u, 0xDFu)) /* Two-byte */ |
|
52 { |
|
53 unsigned int t1; |
|
54 if (likely (text < end && |
|
55 (t1 = text[0] - 0x80u) <= 0x3Fu)) |
|
56 { |
|
57 c = ((c&0x1Fu)<<6) | t1; |
|
58 text++; |
|
59 } |
|
60 else |
|
61 goto error; |
|
62 } |
|
63 else if (hb_in_range (c, 0xE0u, 0xEFu)) /* Three-byte */ |
|
64 { |
|
65 unsigned int t1, t2; |
|
66 if (likely (1 < end - text && |
|
67 (t1 = text[0] - 0x80u) <= 0x3Fu && |
|
68 (t2 = text[1] - 0x80u) <= 0x3Fu)) |
|
69 { |
|
70 c = ((c&0xFu)<<12) | (t1<<6) | t2; |
|
71 if (unlikely (c < 0x0800u || hb_in_range (c, 0xD800u, 0xDFFFu))) |
|
72 goto error; |
|
73 text += 2; |
|
74 } |
|
75 else |
|
76 goto error; |
|
77 } |
|
78 else if (hb_in_range (c, 0xF0u, 0xF4u)) /* Four-byte */ |
|
79 { |
|
80 unsigned int t1, t2, t3; |
|
81 if (likely (2 < end - text && |
|
82 (t1 = text[0] - 0x80u) <= 0x3Fu && |
|
83 (t2 = text[1] - 0x80u) <= 0x3Fu && |
|
84 (t3 = text[2] - 0x80u) <= 0x3Fu)) |
|
85 { |
|
86 c = ((c&0x7u)<<18) | (t1<<12) | (t2<<6) | t3; |
|
87 if (unlikely (!hb_in_range (c, 0x10000u, 0x10FFFFu))) |
|
88 goto error; |
|
89 text += 3; |
|
90 } |
|
91 else |
|
92 goto error; |
|
93 } |
|
94 else |
|
95 goto error; |
|
96 } |
|
97 |
|
98 *unicode = c; |
|
99 return text; |
|
100 |
|
101 error: |
|
102 *unicode = replacement; |
|
103 return text; |
|
104 } |
|
105 |
|
106 static inline const uint8_t * |
|
107 prev (const uint8_t *text, |
|
108 const uint8_t *start, |
|
109 hb_codepoint_t *unicode, |
|
110 hb_codepoint_t replacement) |
|
111 { |
|
112 const uint8_t *end = text--; |
|
113 while (start < text && (*text & 0xc0) == 0x80 && end - text < 4) |
|
114 text--; |
|
115 |
|
116 if (likely (next (text, end, unicode, replacement) == end)) |
|
117 return text; |
|
118 |
|
119 *unicode = replacement; |
|
120 return end - 1; |
|
121 } |
|
122 |
|
123 static inline unsigned int |
|
124 strlen (const uint8_t *text) |
|
125 { |
|
126 return ::strlen ((const char *) text); |
|
127 } |
|
128 }; |
|
129 |
|
130 |
|
131 struct hb_utf16_t |
|
132 { |
|
133 typedef uint16_t codepoint_t; |
|
134 |
|
135 static inline const uint16_t * |
|
136 next (const uint16_t *text, |
|
137 const uint16_t *end, |
|
138 hb_codepoint_t *unicode, |
|
139 hb_codepoint_t replacement) |
|
140 { |
|
141 hb_codepoint_t c = *text++; |
|
142 |
|
143 if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu))) |
|
144 { |
|
145 *unicode = c; |
|
146 return text; |
|
147 } |
|
148 |
|
149 if (likely (hb_in_range (c, 0xD800u, 0xDBFFu))) |
|
150 { |
|
151 /* High-surrogate in c */ |
|
152 hb_codepoint_t l; |
|
153 if (text < end && ((l = *text), likely (hb_in_range (l, 0xDC00u, 0xDFFFu)))) |
|
154 { |
|
155 /* Low-surrogate in l */ |
|
156 *unicode = (c << 10) + l - ((0xD800u << 10) - 0x10000u + 0xDC00u); |
|
157 text++; |
|
158 return text; |
|
159 } |
|
160 } |
|
161 |
|
162 /* Lonely / out-of-order surrogate. */ |
|
163 *unicode = replacement; |
|
164 return text; |
|
165 } |
|
166 |
|
167 static inline const uint16_t * |
|
168 prev (const uint16_t *text, |
|
169 const uint16_t *start, |
|
170 hb_codepoint_t *unicode, |
|
171 hb_codepoint_t replacement) |
|
172 { |
|
173 const uint16_t *end = text--; |
|
174 hb_codepoint_t c = *text; |
|
175 |
|
176 if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu))) |
|
177 { |
|
178 *unicode = c; |
|
179 return text; |
|
180 } |
|
181 |
|
182 if (likely (start < text && hb_in_range (c, 0xDC00u, 0xDFFFu))) |
|
183 text--; |
|
184 |
|
185 if (likely (next (text, end, unicode, replacement) == end)) |
|
186 return text; |
|
187 |
|
188 *unicode = replacement; |
|
189 return end - 1; |
|
190 } |
|
191 |
|
192 |
|
193 static inline unsigned int |
|
194 strlen (const uint16_t *text) |
|
195 { |
|
196 unsigned int l = 0; |
|
197 while (*text++) l++; |
|
198 return l; |
|
199 } |
|
200 }; |
|
201 |
|
202 |
|
203 template <bool validate=true> |
|
204 struct hb_utf32_t |
|
205 { |
|
206 typedef uint32_t codepoint_t; |
|
207 |
|
208 static inline const uint32_t * |
|
209 next (const uint32_t *text, |
|
210 const uint32_t *end HB_UNUSED, |
|
211 hb_codepoint_t *unicode, |
|
212 hb_codepoint_t replacement) |
|
213 { |
|
214 hb_codepoint_t c = *text++; |
|
215 if (validate && unlikely (c > 0x10FFFFu || hb_in_range (c, 0xD800u, 0xDFFFu))) |
|
216 goto error; |
|
217 *unicode = c; |
|
218 return text; |
|
219 |
|
220 error: |
|
221 *unicode = replacement; |
|
222 return text; |
|
223 } |
|
224 |
|
225 static inline const uint32_t * |
|
226 prev (const uint32_t *text, |
|
227 const uint32_t *start HB_UNUSED, |
|
228 hb_codepoint_t *unicode, |
|
229 hb_codepoint_t replacement) |
|
230 { |
|
231 next (text - 1, text, unicode, replacement); |
|
232 return text - 1; |
|
233 } |
|
234 |
|
235 static inline unsigned int |
|
236 strlen (const uint32_t *text) |
|
237 { |
|
238 unsigned int l = 0; |
|
239 while (*text++) l++; |
|
240 return l; |
|
241 } |
|
242 }; |
|
243 |
|
244 |
|
245 struct hb_latin1_t |
|
246 { |
|
247 typedef uint8_t codepoint_t; |
|
248 |
|
249 static inline const uint8_t * |
|
250 next (const uint8_t *text, |
|
251 const uint8_t *end HB_UNUSED, |
|
252 hb_codepoint_t *unicode, |
|
253 hb_codepoint_t replacement HB_UNUSED) |
|
254 { |
|
255 *unicode = *text++; |
|
256 return text; |
|
257 } |
|
258 |
|
259 static inline const uint8_t * |
|
260 prev (const uint8_t *text, |
|
261 const uint8_t *start HB_UNUSED, |
|
262 hb_codepoint_t *unicode, |
|
263 hb_codepoint_t replacement) |
|
264 { |
|
265 *unicode = *--text; |
|
266 return text; |
|
267 } |
|
268 |
|
269 static inline unsigned int |
|
270 strlen (const uint8_t *text) |
|
271 { |
|
272 unsigned int l = 0; |
|
273 while (*text++) l++; |
|
274 return l; |
|
275 } |
|
276 }; |
|
277 |
|
278 #endif /* HB_UTF_PRIVATE_HH */ |