author | erikj |
Tue, 12 Sep 2017 19:03:39 +0200 | |
changeset 47216 | 71c04702a3d5 |
parent 35667 | jdk/src/java.desktop/share/classes/sun/font/ScriptRun.java@ed476aba94de |
child 52248 | 2e330da7cbf4 |
permissions | -rw-r--r-- |
2 | 1 |
/* |
2 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
3 |
* |
|
4 |
* This code is free software; you can redistribute it and/or modify it |
|
5 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 6 |
* published by the Free Software Foundation. Oracle designates this |
2 | 7 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 8 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 9 |
* |
10 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
11 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
12 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
13 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
14 |
* accompanied this code). |
|
15 |
* |
|
16 |
* You should have received a copy of the GNU General Public License version |
|
17 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
18 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
19 |
* |
|
5506 | 20 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
21 |
* or visit www.oracle.com if you need additional information or have any |
|
22 |
* questions. |
|
2 | 23 |
* |
24 |
*/ |
|
25 |
||
26 |
/* |
|
27 |
******************************************************************************* |
|
28 |
* |
|
29 |
* Copyright (C) 1999-2003, International Business Machines |
|
30 |
* Corporation and others. All Rights Reserved. |
|
31 |
* |
|
32 |
******************************************************************************* |
|
33 |
*/ |
|
34 |
||
35 |
package sun.font; |
|
36 |
||
37 |
/** |
|
35667 | 38 |
* {@code ScriptRun} is used to find runs of characters in |
39 |
* the same script, as defined in the {@code Script} class. |
|
2 | 40 |
* It implements a simple iterator over an array of characters. |
35667 | 41 |
* The iterator will assign {@code COMMON} and {@code INHERITED} |
21278 | 42 |
* characters to the same script as the preceding characters. If the |
2 | 43 |
* COMMON and INHERITED characters are first, they will be assigned to |
44 |
* the same script as the following characters. |
|
45 |
* |
|
46 |
* The iterator will try to match paired punctuation. If it sees an |
|
47 |
* opening punctuation character, it will remember the script that |
|
48 |
* was assigned to that character, and assign the same script to the |
|
49 |
* matching closing punctuation. |
|
50 |
* |
|
51 |
* No attempt is made to combine related scripts into a single run. In |
|
52 |
* particular, Hiragana, Katakana, and Han characters will appear in seperate |
|
53 |
* runs. |
|
54 |
||
55 |
* Here is an example of how to iterate over script runs: |
|
56 |
* <pre> |
|
57 |
* void printScriptRuns(char[] text) |
|
58 |
* { |
|
59 |
* ScriptRun scriptRun = new ScriptRun(text, 0, text.length); |
|
60 |
* |
|
61 |
* while (scriptRun.next()) { |
|
62 |
* int start = scriptRun.getScriptStart(); |
|
63 |
* int limit = scriptRun.getScriptLimit(); |
|
64 |
* int script = scriptRun.getScriptCode(); |
|
65 |
* |
|
66 |
* System.out.println("Script \"" + Script.getName(script) + "\" from " + |
|
67 |
* start + " to " + limit + "."); |
|
68 |
* } |
|
69 |
* } |
|
70 |
* </pre> |
|
71 |
* |
|
72 |
*/ |
|
73 |
public final class ScriptRun |
|
74 |
{ |
|
75 |
private char[] text; // fixed once set by constructor |
|
76 |
private int textStart; |
|
77 |
private int textLimit; |
|
78 |
||
79 |
private int scriptStart; // change during iteration |
|
80 |
private int scriptLimit; |
|
81 |
private int scriptCode; |
|
82 |
||
83 |
private int stack[]; // stack used to handle paired punctuation if encountered |
|
84 |
private int parenSP; |
|
85 |
||
86 |
public ScriptRun() { |
|
87 |
// must call init later or we die. |
|
88 |
} |
|
89 |
||
90 |
/** |
|
35667 | 91 |
* Construct a {@code ScriptRun} object which iterates over a subrange |
2 | 92 |
* of the given characetrs. |
93 |
* |
|
94 |
* @param chars the array of characters over which to iterate. |
|
95 |
* @param start the index of the first character over which to iterate |
|
96 |
* @param count the number of characters over which to iterate |
|
97 |
*/ |
|
98 |
public ScriptRun(char[] chars, int start, int count) |
|
99 |
{ |
|
100 |
init(chars, start, count); |
|
101 |
} |
|
102 |
||
103 |
public void init(char[] chars, int start, int count) |
|
104 |
{ |
|
105 |
if (chars == null || start < 0 || count < 0 || count > chars.length - start) { |
|
106 |
throw new IllegalArgumentException(); |
|
107 |
} |
|
108 |
||
109 |
text = chars; |
|
110 |
textStart = start; |
|
111 |
textLimit = start + count; |
|
112 |
||
113 |
scriptStart = textStart; |
|
114 |
scriptLimit = textStart; |
|
115 |
scriptCode = Script.INVALID_CODE; |
|
116 |
parenSP = 0; |
|
117 |
} |
|
118 |
||
119 |
/** |
|
120 |
* Get the starting index of the current script run. |
|
121 |
* |
|
122 |
* @return the index of the first character in the current script run. |
|
123 |
*/ |
|
26004
7507a1b93f67
6521783: Unnecessary final modifier for a method in a final class
serb
parents:
21278
diff
changeset
|
124 |
public int getScriptStart() { |
2 | 125 |
return scriptStart; |
126 |
} |
|
127 |
||
128 |
/** |
|
129 |
* Get the index of the first character after the current script run. |
|
130 |
* |
|
131 |
* @return the index of the first character after the current script run. |
|
132 |
*/ |
|
26004
7507a1b93f67
6521783: Unnecessary final modifier for a method in a final class
serb
parents:
21278
diff
changeset
|
133 |
public int getScriptLimit() { |
2 | 134 |
return scriptLimit; |
135 |
} |
|
136 |
||
137 |
/** |
|
138 |
* Get the script code for the script of the current script run. |
|
139 |
* |
|
140 |
* @return the script code for the script of the current script run. |
|
30948
0a0972d3b58d
6587235: Incorrect javadoc: "no parameter" in 2d source code
serb
parents:
26037
diff
changeset
|
141 |
* @see Script |
2 | 142 |
*/ |
26004
7507a1b93f67
6521783: Unnecessary final modifier for a method in a final class
serb
parents:
21278
diff
changeset
|
143 |
public int getScriptCode() { |
2 | 144 |
return scriptCode; |
145 |
} |
|
146 |
||
147 |
/** |
|
35667 | 148 |
* Find the next script run. Returns {@code false} if there |
149 |
* isn't another run, returns {@code true} if there is. |
|
2 | 150 |
* |
35667 | 151 |
* @return {@code false} if there isn't another run, {@code true} if there is. |
2 | 152 |
*/ |
26004
7507a1b93f67
6521783: Unnecessary final modifier for a method in a final class
serb
parents:
21278
diff
changeset
|
153 |
public boolean next() { |
2 | 154 |
int startSP = parenSP; // used to find the first new open character |
155 |
||
156 |
// if we've fallen off the end of the text, we're done |
|
157 |
if (scriptLimit >= textLimit) { |
|
158 |
return false; |
|
159 |
} |
|
160 |
||
161 |
scriptCode = Script.COMMON; |
|
162 |
scriptStart = scriptLimit; |
|
163 |
||
164 |
int ch; |
|
165 |
||
166 |
while ((ch = nextCodePoint()) != DONE) { |
|
167 |
int sc = ScriptRunData.getScript(ch); |
|
168 |
int pairIndex = sc == Script.COMMON ? getPairIndex(ch) : -1; |
|
169 |
||
170 |
// Paired character handling: |
|
171 |
// |
|
172 |
// if it's an open character, push it onto the stack. |
|
173 |
// if it's a close character, find the matching open on the |
|
174 |
// stack, and use that script code. Any non-matching open |
|
175 |
// characters above it on the stack will be popped. |
|
176 |
if (pairIndex >= 0) { |
|
177 |
if ((pairIndex & 1) == 0) { |
|
178 |
if (stack == null) { |
|
179 |
stack = new int[32]; |
|
180 |
} else if (parenSP == stack.length) { |
|
181 |
int[] newstack = new int[stack.length + 32]; |
|
182 |
System.arraycopy(stack, 0, newstack, 0, stack.length); |
|
183 |
stack = newstack; |
|
184 |
} |
|
185 |
||
186 |
stack[parenSP++] = pairIndex; |
|
187 |
stack[parenSP++] = scriptCode; |
|
188 |
} else if (parenSP > 0) { |
|
189 |
int pi = pairIndex & ~1; |
|
190 |
||
191 |
while ((parenSP -= 2) >= 0 && stack[parenSP] != pi); |
|
192 |
||
193 |
if (parenSP >= 0) { |
|
194 |
sc = stack[parenSP+1]; |
|
195 |
} else { |
|
196 |
parenSP = 0; |
|
197 |
} |
|
198 |
if (parenSP < startSP) { |
|
199 |
startSP = parenSP; |
|
200 |
} |
|
201 |
} |
|
202 |
} |
|
203 |
||
204 |
if (sameScript(scriptCode, sc)) { |
|
205 |
if (scriptCode <= Script.INHERITED && sc > Script.INHERITED) { |
|
206 |
scriptCode = sc; |
|
207 |
||
208 |
// now that we have a final script code, fix any open |
|
209 |
// characters we pushed before we knew the script code. |
|
210 |
while (startSP < parenSP) { |
|
211 |
stack[startSP+1] = scriptCode; |
|
212 |
startSP += 2; |
|
213 |
} |
|
214 |
} |
|
215 |
||
216 |
// if this character is a close paired character, |
|
217 |
// pop it from the stack |
|
218 |
if (pairIndex > 0 && (pairIndex & 1) != 0 && parenSP > 0) { |
|
219 |
parenSP -= 2; |
|
220 |
} |
|
221 |
} else { |
|
222 |
// We've just seen the first character of |
|
223 |
// the next run. Back over it so we'll see |
|
224 |
// it again the next time. |
|
225 |
pushback(ch); |
|
226 |
||
227 |
// we're outta here |
|
228 |
break; |
|
229 |
} |
|
230 |
} |
|
231 |
||
232 |
return true; |
|
233 |
} |
|
234 |
||
235 |
static final int SURROGATE_START = 0x10000; |
|
236 |
static final int LEAD_START = 0xd800; |
|
237 |
static final int LEAD_LIMIT = 0xdc00; |
|
238 |
static final int TAIL_START = 0xdc00; |
|
239 |
static final int TAIL_LIMIT = 0xe000; |
|
240 |
static final int LEAD_SURROGATE_SHIFT = 10; |
|
241 |
static final int SURROGATE_OFFSET = SURROGATE_START - (LEAD_START << LEAD_SURROGATE_SHIFT) - TAIL_START; |
|
242 |
||
243 |
static final int DONE = -1; |
|
244 |
||
26004
7507a1b93f67
6521783: Unnecessary final modifier for a method in a final class
serb
parents:
21278
diff
changeset
|
245 |
private int nextCodePoint() { |
2 | 246 |
if (scriptLimit >= textLimit) { |
247 |
return DONE; |
|
248 |
} |
|
249 |
int ch = text[scriptLimit++]; |
|
250 |
if (ch >= LEAD_START && ch < LEAD_LIMIT && scriptLimit < textLimit) { |
|
251 |
int nch = text[scriptLimit]; |
|
252 |
if (nch >= TAIL_START && nch < TAIL_LIMIT) { |
|
253 |
++scriptLimit; |
|
254 |
ch = (ch << LEAD_SURROGATE_SHIFT) + nch + SURROGATE_OFFSET; |
|
255 |
} |
|
256 |
} |
|
257 |
return ch; |
|
258 |
} |
|
259 |
||
26004
7507a1b93f67
6521783: Unnecessary final modifier for a method in a final class
serb
parents:
21278
diff
changeset
|
260 |
private void pushback(int ch) { |
2 | 261 |
if (ch >= 0) { |
262 |
if (ch >= 0x10000) { |
|
263 |
scriptLimit -= 2; |
|
264 |
} else { |
|
265 |
scriptLimit -= 1; |
|
266 |
} |
|
267 |
} |
|
268 |
} |
|
269 |
||
270 |
/** |
|
271 |
* Compare two script codes to see if they are in the same script. If one script is |
|
272 |
* a strong script, and the other is INHERITED or COMMON, it will compare equal. |
|
273 |
* |
|
274 |
* @param scriptOne one of the script codes. |
|
275 |
* @param scriptTwo the other script code. |
|
35667 | 276 |
* @return {@code true} if the two scripts are the same. |
30948
0a0972d3b58d
6587235: Incorrect javadoc: "no parameter" in 2d source code
serb
parents:
26037
diff
changeset
|
277 |
* @see Script |
2 | 278 |
*/ |
279 |
private static boolean sameScript(int scriptOne, int scriptTwo) { |
|
280 |
return scriptOne == scriptTwo || scriptOne <= Script.INHERITED || scriptTwo <= Script.INHERITED; |
|
281 |
} |
|
282 |
||
283 |
/** |
|
284 |
* Find the highest bit that's set in a word. Uses a binary search through |
|
285 |
* the bits. |
|
286 |
* |
|
287 |
* @param n the word in which to find the highest bit that's set. |
|
288 |
* @return the bit number (counting from the low order bit) of the highest bit. |
|
289 |
*/ |
|
26004
7507a1b93f67
6521783: Unnecessary final modifier for a method in a final class
serb
parents:
21278
diff
changeset
|
290 |
private static byte highBit(int n) |
2 | 291 |
{ |
292 |
if (n <= 0) { |
|
293 |
return -32; |
|
294 |
} |
|
295 |
||
296 |
byte bit = 0; |
|
297 |
||
298 |
if (n >= 1 << 16) { |
|
299 |
n >>= 16; |
|
300 |
bit += 16; |
|
301 |
} |
|
302 |
||
303 |
if (n >= 1 << 8) { |
|
304 |
n >>= 8; |
|
305 |
bit += 8; |
|
306 |
} |
|
307 |
||
308 |
if (n >= 1 << 4) { |
|
309 |
n >>= 4; |
|
310 |
bit += 4; |
|
311 |
} |
|
312 |
||
313 |
if (n >= 1 << 2) { |
|
314 |
n >>= 2; |
|
315 |
bit += 2; |
|
316 |
} |
|
317 |
||
318 |
if (n >= 1 << 1) { |
|
319 |
n >>= 1; |
|
320 |
bit += 1; |
|
321 |
} |
|
322 |
||
323 |
return bit; |
|
324 |
} |
|
325 |
||
326 |
/** |
|
327 |
* Search the pairedChars array for the given character. |
|
328 |
* |
|
329 |
* @param ch the character for which to search. |
|
330 |
* @return the index of the character in the table, or -1 if it's not there. |
|
331 |
*/ |
|
332 |
private static int getPairIndex(int ch) |
|
333 |
{ |
|
334 |
int probe = pairedCharPower; |
|
335 |
int index = 0; |
|
336 |
||
337 |
if (ch >= pairedChars[pairedCharExtra]) { |
|
338 |
index = pairedCharExtra; |
|
339 |
} |
|
340 |
||
341 |
while (probe > (1 << 0)) { |
|
342 |
probe >>= 1; |
|
343 |
||
344 |
if (ch >= pairedChars[index + probe]) { |
|
345 |
index += probe; |
|
346 |
} |
|
347 |
} |
|
348 |
||
349 |
if (pairedChars[index] != ch) { |
|
350 |
index = -1; |
|
351 |
} |
|
352 |
||
353 |
return index; |
|
354 |
} |
|
355 |
||
356 |
// all common |
|
357 |
private static int pairedChars[] = { |
|
358 |
0x0028, 0x0029, // ascii paired punctuation // common |
|
359 |
0x003c, 0x003e, // common |
|
360 |
0x005b, 0x005d, // common |
|
361 |
0x007b, 0x007d, // common |
|
362 |
0x00ab, 0x00bb, // guillemets // common |
|
363 |
0x2018, 0x2019, // general punctuation // common |
|
364 |
0x201c, 0x201d, // common |
|
365 |
0x2039, 0x203a, // common |
|
366 |
0x3008, 0x3009, // chinese paired punctuation // common |
|
367 |
0x300a, 0x300b, |
|
368 |
0x300c, 0x300d, |
|
369 |
0x300e, 0x300f, |
|
370 |
0x3010, 0x3011, |
|
371 |
0x3014, 0x3015, |
|
372 |
0x3016, 0x3017, |
|
373 |
0x3018, 0x3019, |
|
374 |
0x301a, 0x301b |
|
375 |
}; |
|
376 |
||
377 |
private static final int pairedCharPower = 1 << highBit(pairedChars.length); |
|
378 |
private static final int pairedCharExtra = pairedChars.length - pairedCharPower; |
|
379 |
||
380 |
} |