equal
deleted
inserted
replaced
21 |
21 |
22 typedef struct { |
22 typedef struct { |
23 unsigned char category; |
23 unsigned char category; |
24 unsigned char combining; |
24 unsigned char combining; |
25 unsigned char bidi_class; |
25 unsigned char bidi_class; |
26 unsigned char mirrored; |
|
27 unsigned char east_asian_width; |
26 unsigned char east_asian_width; |
28 unsigned char script; |
27 unsigned char script; |
29 unsigned char linebreak_class; |
28 unsigned char linebreak_class; |
30 } UCDRecord; |
29 } UCDRecord; |
31 |
30 |
41 typedef struct { |
40 typedef struct { |
42 unsigned int start; |
41 unsigned int start; |
43 short count, index; |
42 short count, index; |
44 } Reindex; |
43 } Reindex; |
45 |
44 |
46 #include "unicodedata_db.h" |
45 #include "ucdn_db.h" |
47 |
46 |
48 /* constants required for Hangul (de)composition */ |
47 /* constants required for Hangul (de)composition */ |
49 #define SBASE 0xAC00 |
48 #define SBASE 0xAC00 |
50 #define LBASE 0x1100 |
49 #define LBASE 0x1100 |
51 #define VBASE 0x1161 |
50 #define VBASE 0x1161 |
89 } |
88 } |
90 |
89 |
91 return &decomp_data[index]; |
90 return &decomp_data[index]; |
92 } |
91 } |
93 |
92 |
94 static int get_comp_index(uint32_t code, const Reindex *idx) |
93 static int compare_reindex(const void *a, const void *b) |
95 { |
94 { |
96 int i; |
95 Reindex *ra = (Reindex *)a; |
97 |
96 Reindex *rb = (Reindex *)b; |
98 for (i = 0; idx[i].start; i++) { |
97 |
99 const Reindex *cur = &idx[i]; |
98 if (ra->start < rb->start) |
100 if (code < cur->start) |
99 return -1; |
101 return -1; |
100 else if (ra->start > (rb->start + rb->count)) |
102 if (code <= cur->start + cur->count) { |
101 return 1; |
103 return cur->index + (code - cur->start); |
102 else |
104 } |
103 return 0; |
105 } |
104 } |
106 |
105 |
107 return -1; |
106 static int get_comp_index(uint32_t code, const Reindex *idx, size_t len) |
|
107 { |
|
108 Reindex *res; |
|
109 Reindex r = {0, 0, 0}; |
|
110 r.start = code; |
|
111 res = (Reindex *) bsearch(&r, idx, len, sizeof(Reindex), compare_reindex); |
|
112 |
|
113 if (res != NULL) |
|
114 return res->index + (code - res->start); |
|
115 else |
|
116 return -1; |
108 } |
117 } |
109 |
118 |
110 static int compare_mp(const void *a, const void *b) |
119 static int compare_mp(const void *a, const void *b) |
111 { |
120 { |
112 MirrorPair *mpa = (MirrorPair *)a; |
121 MirrorPair *mpa = (MirrorPair *)a; |
125 { |
134 { |
126 BracketPair bp = {0,0,2}; |
135 BracketPair bp = {0,0,2}; |
127 BracketPair *res; |
136 BracketPair *res; |
128 |
137 |
129 bp.from = code; |
138 bp.from = code; |
130 res = bsearch(&bp, bracket_pairs, BIDI_BRACKET_LEN, sizeof(BracketPair), |
139 res = (BracketPair *) bsearch(&bp, bracket_pairs, BIDI_BRACKET_LEN, |
131 compare_bp); |
140 sizeof(BracketPair), compare_bp); |
132 return res; |
141 return res; |
133 } |
142 } |
134 |
143 |
135 static int hangul_pair_decompose(uint32_t code, uint32_t *a, uint32_t *b) |
144 static int hangul_pair_decompose(uint32_t code, uint32_t *a, uint32_t *b) |
136 { |
145 { |
152 } |
161 } |
153 } |
162 } |
154 |
163 |
155 static int hangul_pair_compose(uint32_t *code, uint32_t a, uint32_t b) |
164 static int hangul_pair_compose(uint32_t *code, uint32_t a, uint32_t b) |
156 { |
165 { |
157 if (b < VBASE || b >= (TBASE + TCOUNT)) |
166 if (a >= SBASE && a < (SBASE + SCOUNT) && b >= TBASE && b < (TBASE + TCOUNT)) { |
158 return 0; |
|
159 |
|
160 if ((a < LBASE || a >= (LBASE + LCOUNT)) |
|
161 && (a < SBASE || a >= (SBASE + SCOUNT))) |
|
162 return 0; |
|
163 |
|
164 if (a >= SBASE) { |
|
165 /* LV,T */ |
167 /* LV,T */ |
166 *code = a + (b - TBASE); |
168 *code = a + (b - TBASE); |
167 return 3; |
169 return 3; |
168 } else { |
170 } else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT)) { |
169 /* L,V */ |
171 /* L,V */ |
170 int li = a - LBASE; |
172 int li = a - LBASE; |
171 int vi = b - VBASE; |
173 int vi = b - VBASE; |
172 *code = SBASE + li * NCOUNT + vi * TCOUNT; |
174 *code = SBASE + li * NCOUNT + vi * TCOUNT; |
173 return 2; |
175 return 2; |
|
176 } else { |
|
177 return 0; |
174 } |
178 } |
175 } |
179 } |
176 |
180 |
177 static uint32_t decode_utf16(const unsigned short **code_ptr) |
181 static uint32_t decode_utf16(const unsigned short **code_ptr) |
178 { |
182 { |
179 const unsigned short *code = *code_ptr; |
183 const unsigned short *code = *code_ptr; |
180 |
184 |
181 if ((code[0] & 0xd800) != 0xd800) { |
185 if (code[0] < 0xd800 || code[0] > 0xdc00) { |
182 *code_ptr += 1; |
186 *code_ptr += 1; |
183 return (uint32_t)code[0]; |
187 return (uint32_t)code[0]; |
184 } else { |
188 } else { |
185 *code_ptr += 2; |
189 *code_ptr += 2; |
186 return 0x10000 + ((uint32_t)code[1] - 0xdc00) + |
190 return 0x10000 + ((uint32_t)code[1] - 0xdc00) + |
213 return get_ucd_record(code)->bidi_class; |
217 return get_ucd_record(code)->bidi_class; |
214 } |
218 } |
215 |
219 |
216 int ucdn_get_mirrored(uint32_t code) |
220 int ucdn_get_mirrored(uint32_t code) |
217 { |
221 { |
218 return get_ucd_record(code)->mirrored; |
222 return ucdn_mirror(code) != code; |
219 } |
223 } |
220 |
224 |
221 int ucdn_get_script(uint32_t code) |
225 int ucdn_get_script(uint32_t code) |
222 { |
226 { |
223 return get_ucd_record(code)->script; |
227 return get_ucd_record(code)->script; |
262 uint32_t ucdn_mirror(uint32_t code) |
266 uint32_t ucdn_mirror(uint32_t code) |
263 { |
267 { |
264 MirrorPair mp = {0}; |
268 MirrorPair mp = {0}; |
265 MirrorPair *res; |
269 MirrorPair *res; |
266 |
270 |
267 if (get_ucd_record(code)->mirrored == 0) |
|
268 return code; |
|
269 |
|
270 mp.from = code; |
271 mp.from = code; |
271 res = bsearch(&mp, mirror_pairs, BIDI_MIRROR_LEN, sizeof(MirrorPair), |
272 res = (MirrorPair *) bsearch(&mp, mirror_pairs, BIDI_MIRROR_LEN, |
272 compare_mp); |
273 sizeof(MirrorPair), compare_mp); |
273 |
274 |
274 if (res == NULL) |
275 if (res == NULL) |
275 return code; |
276 return code; |
276 else |
277 else |
277 return res->to; |
278 return res->to; |
324 int l, r, index, indexi, offset; |
325 int l, r, index, indexi, offset; |
325 |
326 |
326 if (hangul_pair_compose(code, a, b)) |
327 if (hangul_pair_compose(code, a, b)) |
327 return 1; |
328 return 1; |
328 |
329 |
329 l = get_comp_index(a, nfc_first); |
330 l = get_comp_index(a, nfc_first, sizeof(nfc_first) / sizeof(Reindex)); |
330 r = get_comp_index(b, nfc_last); |
331 r = get_comp_index(b, nfc_last, sizeof(nfc_last) / sizeof(Reindex)); |
331 |
332 |
332 if (l < 0 || r < 0) |
333 if (l < 0 || r < 0) |
333 return 0; |
334 return 0; |
334 |
335 |
335 indexi = l * TOTAL_LAST + r; |
336 indexi = l * TOTAL_LAST + r; |