|
1 /* |
|
2 * Copyright © 2012 Google, Inc. |
|
3 * |
|
4 * This is part of HarfBuzz, a text shaping library. |
|
5 * |
|
6 * Permission is hereby granted, without written agreement and without |
|
7 * license or royalty fees, to use, copy, modify, and distribute this |
|
8 * software and its documentation for any purpose, provided that the |
|
9 * above copyright notice and the following two paragraphs appear in |
|
10 * all copies of this software. |
|
11 * |
|
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
|
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
|
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
|
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
|
16 * DAMAGE. |
|
17 * |
|
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
|
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
|
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
|
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
|
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
|
23 * |
|
24 * Google Author(s): Behdad Esfahbod |
|
25 */ |
|
26 |
|
27 #ifndef HB_OT_SHAPE_COMPLEX_INDIC_HH |
|
28 #define HB_OT_SHAPE_COMPLEX_INDIC_HH |
|
29 |
|
30 #include "hb.hh" |
|
31 |
|
32 #include "hb-ot-shape-complex.hh" |
|
33 |
|
34 |
|
35 /* buffer var allocations */ |
|
36 #define indic_category() complex_var_u8_0() /* indic_category_t */ |
|
37 #define indic_position() complex_var_u8_1() /* indic_position_t */ |
|
38 |
|
39 |
|
40 #define INDIC_TABLE_ELEMENT_TYPE uint16_t |
|
41 |
|
42 /* Cateories used in the OpenType spec: |
|
43 * https://docs.microsoft.com/en-us/typography/script-development/devanagari |
|
44 */ |
|
45 /* Note: This enum is duplicated in the -machine.rl source file. |
|
46 * Not sure how to avoid duplication. */ |
|
47 enum indic_category_t { |
|
48 OT_X = 0, |
|
49 OT_C = 1, |
|
50 OT_V = 2, |
|
51 OT_N = 3, |
|
52 OT_H = 4, |
|
53 OT_ZWNJ = 5, |
|
54 OT_ZWJ = 6, |
|
55 OT_M = 7, |
|
56 OT_SM = 8, |
|
57 /* OT_VD = 9, UNUSED; we use OT_A instead. */ |
|
58 OT_A = 10, |
|
59 OT_PLACEHOLDER = 11, |
|
60 OT_DOTTEDCIRCLE = 12, |
|
61 OT_RS = 13, /* Register Shifter, used in Khmer OT spec. */ |
|
62 OT_Coeng = 14, /* Khmer-style Virama. */ |
|
63 OT_Repha = 15, /* Atomically-encoded logical or visual repha. */ |
|
64 OT_Ra = 16, |
|
65 OT_CM = 17, /* Consonant-Medial; Unused by Indic shaper. */ |
|
66 OT_Symbol = 18, /* Avagraha, etc that take marks (SM,A,VD). */ |
|
67 OT_CS = 19 |
|
68 }; |
|
69 |
|
70 /* Note: |
|
71 * |
|
72 * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels |
|
73 * cannot happen in a consonant syllable. The plus side however is, we can call the |
|
74 * consonant syllable logic from the vowel syllable function and get it all right! */ |
|
75 #define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_CS) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_PLACEHOLDER) | FLAG (OT_DOTTEDCIRCLE)) |
|
76 #define JOINER_FLAGS (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ)) |
|
77 |
|
78 |
|
79 /* Visual positions in a syllable from left to right. */ |
|
80 enum indic_position_t { |
|
81 POS_START = 0, |
|
82 |
|
83 POS_RA_TO_BECOME_REPH = 1, |
|
84 POS_PRE_M = 2, |
|
85 POS_PRE_C = 3, |
|
86 |
|
87 POS_BASE_C = 4, |
|
88 POS_AFTER_MAIN = 5, |
|
89 |
|
90 POS_ABOVE_C = 6, |
|
91 |
|
92 POS_BEFORE_SUB = 7, |
|
93 POS_BELOW_C = 8, |
|
94 POS_AFTER_SUB = 9, |
|
95 |
|
96 POS_BEFORE_POST = 10, |
|
97 POS_POST_C = 11, |
|
98 POS_AFTER_POST = 12, |
|
99 |
|
100 POS_FINAL_C = 13, |
|
101 POS_SMVD = 14, |
|
102 |
|
103 POS_END = 15 |
|
104 }; |
|
105 |
|
106 /* Categories used in IndicSyllabicCategory.txt from UCD. */ |
|
107 enum indic_syllabic_category_t { |
|
108 INDIC_SYLLABIC_CATEGORY_OTHER = OT_X, |
|
109 |
|
110 INDIC_SYLLABIC_CATEGORY_AVAGRAHA = OT_Symbol, |
|
111 INDIC_SYLLABIC_CATEGORY_BINDU = OT_SM, |
|
112 INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER = OT_PLACEHOLDER, /* Don't care. */ |
|
113 INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK = OT_A, |
|
114 INDIC_SYLLABIC_CATEGORY_CONSONANT = OT_C, |
|
115 INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD = OT_C, |
|
116 INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL = OT_CM, |
|
117 INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER = OT_C, |
|
118 INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER = OT_M, /* U+17CD only. */ |
|
119 INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_CM, |
|
120 INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_PLACEHOLDER, |
|
121 INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA = OT_Repha, |
|
122 INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED = OT_X, /* Don't care. */ |
|
123 INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_CM, |
|
124 INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA = OT_CM, |
|
125 INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER = OT_CS, |
|
126 INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK = OT_SM, /* https://github.com/harfbuzz/harfbuzz/issues/552 */ |
|
127 INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER = OT_Coeng, |
|
128 INDIC_SYLLABIC_CATEGORY_JOINER = OT_ZWJ, |
|
129 INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X, |
|
130 INDIC_SYLLABIC_CATEGORY_NON_JOINER = OT_ZWNJ, |
|
131 INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N, |
|
132 INDIC_SYLLABIC_CATEGORY_NUMBER = OT_PLACEHOLDER, |
|
133 INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER = OT_PLACEHOLDER, /* Don't care. */ |
|
134 INDIC_SYLLABIC_CATEGORY_PURE_KILLER = OT_M, /* Is like a vowel matra. */ |
|
135 INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS, |
|
136 INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER = OT_SM, |
|
137 INDIC_SYLLABIC_CATEGORY_TONE_LETTER = OT_X, |
|
138 INDIC_SYLLABIC_CATEGORY_TONE_MARK = OT_N, |
|
139 INDIC_SYLLABIC_CATEGORY_VIRAMA = OT_H, |
|
140 INDIC_SYLLABIC_CATEGORY_VISARGA = OT_SM, |
|
141 INDIC_SYLLABIC_CATEGORY_VOWEL = OT_V, |
|
142 INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT = OT_M, |
|
143 INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT = OT_V |
|
144 }; |
|
145 |
|
146 /* Categories used in IndicSMatraCategory.txt from UCD */ |
|
147 enum indic_matra_category_t { |
|
148 INDIC_MATRA_CATEGORY_NOT_APPLICABLE = POS_END, |
|
149 |
|
150 INDIC_MATRA_CATEGORY_LEFT = POS_PRE_C, |
|
151 INDIC_MATRA_CATEGORY_TOP = POS_ABOVE_C, |
|
152 INDIC_MATRA_CATEGORY_BOTTOM = POS_BELOW_C, |
|
153 INDIC_MATRA_CATEGORY_RIGHT = POS_POST_C, |
|
154 |
|
155 /* These should resolve to the position of the last part of the split sequence. */ |
|
156 INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, |
|
157 INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, |
|
158 INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM = INDIC_MATRA_CATEGORY_BOTTOM, |
|
159 INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, |
|
160 INDIC_MATRA_CATEGORY_TOP_AND_LEFT = INDIC_MATRA_CATEGORY_TOP, |
|
161 INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, |
|
162 INDIC_MATRA_CATEGORY_TOP_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, |
|
163 |
|
164 INDIC_MATRA_CATEGORY_OVERSTRUCK = POS_AFTER_MAIN, |
|
165 INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT = POS_PRE_M |
|
166 }; |
|
167 |
|
168 #define INDIC_COMBINE_CATEGORIES(S,M) \ |
|
169 ( \ |
|
170 ASSERT_STATIC_EXPR_ZERO (S < 255 && M < 255) + \ |
|
171 ( S | \ |
|
172 ( \ |
|
173 ( \ |
|
174 S == INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL || \ |
|
175 S == INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK || \ |
|
176 S == INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER || \ |
|
177 S == INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA || \ |
|
178 S == INDIC_SYLLABIC_CATEGORY_VIRAMA || \ |
|
179 S == INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT || \ |
|
180 false \ |
|
181 ? M : INDIC_MATRA_CATEGORY_NOT_APPLICABLE \ |
|
182 ) << 8 \ |
|
183 ) \ |
|
184 ) \ |
|
185 ) |
|
186 |
|
187 HB_INTERNAL INDIC_TABLE_ELEMENT_TYPE |
|
188 hb_indic_get_categories (hb_codepoint_t u); |
|
189 |
|
190 |
|
191 static inline bool |
|
192 is_one_of (const hb_glyph_info_t &info, unsigned int flags) |
|
193 { |
|
194 /* If it ligated, all bets are off. */ |
|
195 if (_hb_glyph_info_ligated (&info)) return false; |
|
196 return !!(FLAG_UNSAFE (info.indic_category()) & flags); |
|
197 } |
|
198 |
|
199 static inline bool |
|
200 is_joiner (const hb_glyph_info_t &info) |
|
201 { |
|
202 return is_one_of (info, JOINER_FLAGS); |
|
203 } |
|
204 |
|
205 static inline bool |
|
206 is_consonant (const hb_glyph_info_t &info) |
|
207 { |
|
208 return is_one_of (info, CONSONANT_FLAGS); |
|
209 } |
|
210 |
|
211 static inline bool |
|
212 is_halant (const hb_glyph_info_t &info) |
|
213 { |
|
214 return is_one_of (info, FLAG (OT_H)); |
|
215 } |
|
216 |
|
217 #define IN_HALF_BLOCK(u, Base) (((u) & ~0x7Fu) == (Base)) |
|
218 |
|
219 #define IS_DEVA(u) (IN_HALF_BLOCK (u, 0x0900u)) |
|
220 #define IS_BENG(u) (IN_HALF_BLOCK (u, 0x0980u)) |
|
221 #define IS_GURU(u) (IN_HALF_BLOCK (u, 0x0A00u)) |
|
222 #define IS_GUJR(u) (IN_HALF_BLOCK (u, 0x0A80u)) |
|
223 #define IS_ORYA(u) (IN_HALF_BLOCK (u, 0x0B00u)) |
|
224 #define IS_TAML(u) (IN_HALF_BLOCK (u, 0x0B80u)) |
|
225 #define IS_TELU(u) (IN_HALF_BLOCK (u, 0x0C00u)) |
|
226 #define IS_KNDA(u) (IN_HALF_BLOCK (u, 0x0C80u)) |
|
227 #define IS_MLYM(u) (IN_HALF_BLOCK (u, 0x0D00u)) |
|
228 #define IS_SINH(u) (IN_HALF_BLOCK (u, 0x0D80u)) |
|
229 |
|
230 |
|
231 #define MATRA_POS_LEFT(u) POS_PRE_M |
|
232 #define MATRA_POS_RIGHT(u) ( \ |
|
233 IS_DEVA(u) ? POS_AFTER_SUB : \ |
|
234 IS_BENG(u) ? POS_AFTER_POST : \ |
|
235 IS_GURU(u) ? POS_AFTER_POST : \ |
|
236 IS_GUJR(u) ? POS_AFTER_POST : \ |
|
237 IS_ORYA(u) ? POS_AFTER_POST : \ |
|
238 IS_TAML(u) ? POS_AFTER_POST : \ |
|
239 IS_TELU(u) ? (u <= 0x0C42u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ |
|
240 IS_KNDA(u) ? (u < 0x0CC3u || u > 0xCD6u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ |
|
241 IS_MLYM(u) ? POS_AFTER_POST : \ |
|
242 IS_SINH(u) ? POS_AFTER_SUB : \ |
|
243 /*default*/ POS_AFTER_SUB \ |
|
244 ) |
|
245 #define MATRA_POS_TOP(u) ( /* BENG and MLYM don't have top matras. */ \ |
|
246 IS_DEVA(u) ? POS_AFTER_SUB : \ |
|
247 IS_GURU(u) ? POS_AFTER_POST : /* Deviate from spec */ \ |
|
248 IS_GUJR(u) ? POS_AFTER_SUB : \ |
|
249 IS_ORYA(u) ? POS_AFTER_MAIN : \ |
|
250 IS_TAML(u) ? POS_AFTER_SUB : \ |
|
251 IS_TELU(u) ? POS_BEFORE_SUB : \ |
|
252 IS_KNDA(u) ? POS_BEFORE_SUB : \ |
|
253 IS_SINH(u) ? POS_AFTER_SUB : \ |
|
254 /*default*/ POS_AFTER_SUB \ |
|
255 ) |
|
256 #define MATRA_POS_BOTTOM(u) ( \ |
|
257 IS_DEVA(u) ? POS_AFTER_SUB : \ |
|
258 IS_BENG(u) ? POS_AFTER_SUB : \ |
|
259 IS_GURU(u) ? POS_AFTER_POST : \ |
|
260 IS_GUJR(u) ? POS_AFTER_POST : \ |
|
261 IS_ORYA(u) ? POS_AFTER_SUB : \ |
|
262 IS_TAML(u) ? POS_AFTER_POST : \ |
|
263 IS_TELU(u) ? POS_BEFORE_SUB : \ |
|
264 IS_KNDA(u) ? POS_BEFORE_SUB : \ |
|
265 IS_MLYM(u) ? POS_AFTER_POST : \ |
|
266 IS_SINH(u) ? POS_AFTER_SUB : \ |
|
267 /*default*/ POS_AFTER_SUB \ |
|
268 ) |
|
269 |
|
270 static inline indic_position_t |
|
271 matra_position_indic (hb_codepoint_t u, indic_position_t side) |
|
272 { |
|
273 switch ((int) side) |
|
274 { |
|
275 case POS_PRE_C: return MATRA_POS_LEFT (u); |
|
276 case POS_POST_C: return MATRA_POS_RIGHT (u); |
|
277 case POS_ABOVE_C: return MATRA_POS_TOP (u); |
|
278 case POS_BELOW_C: return MATRA_POS_BOTTOM (u); |
|
279 }; |
|
280 return side; |
|
281 } |
|
282 |
|
283 /* XXX |
|
284 * This is a hack for now. We should move this data into the main Indic table. |
|
285 * Or completely remove it and just check in the tables. |
|
286 */ |
|
287 static const hb_codepoint_t ra_chars[] = { |
|
288 0x0930u, /* Devanagari */ |
|
289 0x09B0u, /* Bengali */ |
|
290 0x09F0u, /* Bengali */ |
|
291 0x0A30u, /* Gurmukhi */ /* No Reph */ |
|
292 0x0AB0u, /* Gujarati */ |
|
293 0x0B30u, /* Oriya */ |
|
294 0x0BB0u, /* Tamil */ /* No Reph */ |
|
295 0x0C30u, /* Telugu */ /* Reph formed only with ZWJ */ |
|
296 0x0CB0u, /* Kannada */ |
|
297 0x0D30u, /* Malayalam */ /* No Reph, Logical Repha */ |
|
298 |
|
299 0x0DBBu, /* Sinhala */ /* Reph formed only with ZWJ */ |
|
300 |
|
301 0x179Au, /* Khmer */ |
|
302 }; |
|
303 |
|
304 static inline bool |
|
305 is_ra (hb_codepoint_t u) |
|
306 { |
|
307 for (unsigned int i = 0; i < ARRAY_LENGTH (ra_chars); i++) |
|
308 if (u == ra_chars[i]) |
|
309 return true; |
|
310 return false; |
|
311 } |
|
312 |
|
313 static inline void |
|
314 set_indic_properties (hb_glyph_info_t &info) |
|
315 { |
|
316 hb_codepoint_t u = info.codepoint; |
|
317 unsigned int type = hb_indic_get_categories (u); |
|
318 indic_category_t cat = (indic_category_t) (type & 0x7Fu); |
|
319 indic_position_t pos = (indic_position_t) (type >> 8); |
|
320 |
|
321 |
|
322 /* |
|
323 * Re-assign category |
|
324 */ |
|
325 |
|
326 /* The following act more like the Bindus. */ |
|
327 if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0953u, 0x0954u))) |
|
328 cat = OT_SM; |
|
329 /* The following act like consonants. */ |
|
330 else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0x0A72u, 0x0A73u, |
|
331 0x1CF5u, 0x1CF6u))) |
|
332 cat = OT_C; |
|
333 /* TODO: The following should only be allowed after a Visarga. |
|
334 * For now, just treat them like regular tone marks. */ |
|
335 else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1CE2u, 0x1CE8u))) |
|
336 cat = OT_A; |
|
337 /* TODO: The following should only be allowed after some of |
|
338 * the nasalization marks, maybe only for U+1CE9..U+1CF1. |
|
339 * For now, just treat them like tone marks. */ |
|
340 else if (unlikely (u == 0x1CEDu)) |
|
341 cat = OT_A; |
|
342 /* The following take marks in standalone clusters, similar to Avagraha. */ |
|
343 else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0xA8F2u, 0xA8F7u, |
|
344 0x1CE9u, 0x1CECu, |
|
345 0x1CEEu, 0x1CF1u))) |
|
346 { |
|
347 cat = OT_Symbol; |
|
348 static_assert (((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol), ""); |
|
349 } |
|
350 else if (unlikely (u == 0x0A51u)) |
|
351 { |
|
352 /* https://github.com/harfbuzz/harfbuzz/issues/524 */ |
|
353 cat = OT_M; |
|
354 pos = POS_BELOW_C; |
|
355 } |
|
356 |
|
357 /* According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil, |
|
358 * so the Indic shaper needs to know their categories. */ |
|
359 else if (unlikely (u == 0x11301u || u == 0x11303u)) cat = OT_SM; |
|
360 else if (unlikely (u == 0x1133cu)) cat = OT_N; |
|
361 |
|
362 else if (unlikely (u == 0x0AFBu)) cat = OT_N; /* https://github.com/harfbuzz/harfbuzz/issues/552 */ |
|
363 |
|
364 else if (unlikely (u == 0x0980u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/issues/538 */ |
|
365 else if (unlikely (u == 0x0C80u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/623 */ |
|
366 else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) |
|
367 cat = OT_PLACEHOLDER; |
|
368 else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; |
|
369 |
|
370 |
|
371 /* |
|
372 * Re-assign position. |
|
373 */ |
|
374 |
|
375 if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS)) |
|
376 { |
|
377 pos = POS_BASE_C; |
|
378 if (is_ra (u)) |
|
379 cat = OT_Ra; |
|
380 } |
|
381 else if (cat == OT_M) |
|
382 { |
|
383 pos = matra_position_indic (u, pos); |
|
384 } |
|
385 else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) /* | FLAG (OT_VD) */ | FLAG (OT_A) | FLAG (OT_Symbol)))) |
|
386 { |
|
387 pos = POS_SMVD; |
|
388 } |
|
389 |
|
390 if (unlikely (u == 0x0B01u)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */ |
|
391 |
|
392 |
|
393 |
|
394 info.indic_category() = cat; |
|
395 info.indic_position() = pos; |
|
396 } |
|
397 |
|
398 |
|
399 #endif /* HB_OT_SHAPE_COMPLEX_INDIC_HH */ |