author | prappo |
Tue, 13 Nov 2018 12:24:34 +0000 | |
changeset 52499 | 768b1c612100 |
parent 47216 | 71c04702a3d5 |
permissions | -rw-r--r-- |
2 | 1 |
/* |
19069 | 2 |
* Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
package java.net; |
|
26 |
||
27 |
import java.io.InputStream; |
|
28 |
import java.io.IOException; |
|
29 |
import java.security.AccessController; |
|
30 |
import java.security.PrivilegedAction; |
|
31 |
||
32 |
import sun.net.idn.StringPrep; |
|
33 |
import sun.net.idn.Punycode; |
|
34 |
import sun.text.normalizer.UCharacterIterator; |
|
35 |
||
36 |
/** |
|
37 |
* Provides methods to convert internationalized domain names (IDNs) between |
|
38 |
* a normal Unicode representation and an ASCII Compatible Encoding (ACE) representation. |
|
39 |
* Internationalized domain names can use characters from the entire range of |
|
40 |
* Unicode, while traditional domain names are restricted to ASCII characters. |
|
41 |
* ACE is an encoding of Unicode strings that uses only ASCII characters and |
|
42 |
* can be used with software (such as the Domain Name System) that only |
|
43 |
* understands traditional domain names. |
|
44 |
* |
|
45 |
* <p>Internationalized domain names are defined in <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>. |
|
46 |
* RFC 3490 defines two operations: ToASCII and ToUnicode. These 2 operations employ |
|
47 |
* <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a> algorithm, which is a |
|
48 |
* profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a>, and |
|
49 |
* <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a> algorithm to convert |
|
50 |
* domain name string back and forth. |
|
51 |
* |
|
52 |
* <p>The behavior of aforementioned conversion process can be adjusted by various flags: |
|
53 |
* <ul> |
|
54 |
* <li>If the ALLOW_UNASSIGNED flag is used, the domain name string to be converted |
|
55 |
* can contain code points that are unassigned in Unicode 3.2, which is the |
|
56 |
* Unicode version on which IDN conversion is based. If the flag is not used, |
|
57 |
* the presence of such unassigned code points is treated as an error. |
|
58 |
* <li>If the USE_STD3_ASCII_RULES flag is used, ASCII strings are checked against <a href="http://www.ietf.org/rfc/rfc1122.txt">RFC 1122</a> and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC 1123</a>. |
|
59 |
* It is an error if they don't meet the requirements. |
|
60 |
* </ul> |
|
61 |
* These flags can be logically OR'ed together. |
|
62 |
* |
|
63 |
* <p>The security consideration is important with respect to internationalization |
|
64 |
* domain name support. For example, English domain names may be <i>homographed</i> |
|
65 |
* - maliciously misspelled by substitution of non-Latin letters. |
|
66 |
* <a href="http://www.unicode.org/reports/tr36/">Unicode Technical Report #36</a> |
|
67 |
* discusses security issues of IDN support as well as possible solutions. |
|
68 |
* Applications are responsible for taking adequate security measures when using |
|
69 |
* international domain names. |
|
70 |
* |
|
71 |
* @author Edward Wang |
|
72 |
* @since 1.6 |
|
73 |
* |
|
74 |
*/ |
|
75 |
public final class IDN { |
|
76 |
/** |
|
77 |
* Flag to allow processing of unassigned code points |
|
78 |
*/ |
|
79 |
public static final int ALLOW_UNASSIGNED = 0x01; |
|
80 |
||
81 |
/** |
|
82 |
* Flag to turn on the check against STD-3 ASCII rules |
|
83 |
*/ |
|
84 |
public static final int USE_STD3_ASCII_RULES = 0x02; |
|
85 |
||
86 |
||
87 |
/** |
|
88 |
* Translates a string from Unicode to ASCII Compatible Encoding (ACE), |
|
89 |
* as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>. |
|
90 |
* |
|
91 |
* <p>ToASCII operation can fail. ToASCII fails if any step of it fails. |
|
92 |
* If ToASCII operation fails, an IllegalArgumentException will be thrown. |
|
93 |
* In this case, the input string should not be used in an internationalized domain name. |
|
94 |
* |
|
95 |
* <p> A label is an individual part of a domain name. The original ToASCII operation, |
|
96 |
* as defined in RFC 3490, only operates on a single label. This method can handle |
|
97 |
* both label and entire domain name, by assuming that labels in a domain name are |
|
98 |
* always separated by dots. The following characters are recognized as dots: |
|
99 |
* \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop), |
|
100 |
* and \uFF61 (halfwidth ideographic full stop). if dots are |
|
101 |
* used as label separators, this method also changes all of them to \u002E (full stop) |
|
102 |
* in output translated string. |
|
103 |
* |
|
104 |
* @param input the string to be processed |
|
105 |
* @param flag process flag; can be 0 or any logical OR of possible flags |
|
106 |
* |
|
19069 | 107 |
* @return the translated {@code String} |
2 | 108 |
* |
109 |
* @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification |
|
110 |
*/ |
|
111 |
public static String toASCII(String input, int flag) |
|
112 |
{ |
|
113 |
int p = 0, q = 0; |
|
24969
afa6934dd8e8
8041679: Replace uses of StringBuffer with StringBuilder within core library classes
psandoz
parents:
19790
diff
changeset
|
114 |
StringBuilder out = new StringBuilder(); |
2 | 115 |
|
19440
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
116 |
if (isRootLabel(input)) { |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
117 |
return "."; |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
118 |
} |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
119 |
|
2 | 120 |
while (p < input.length()) { |
121 |
q = searchDots(input, p); |
|
122 |
out.append(toASCIIInternal(input.substring(p, q), flag)); |
|
19440
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
123 |
if (q != (input.length())) { |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
124 |
// has more labels, or keep the trailing dot as at present |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
125 |
out.append('.'); |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
126 |
} |
2 | 127 |
p = q + 1; |
128 |
} |
|
129 |
||
130 |
return out.toString(); |
|
131 |
} |
|
132 |
||
133 |
||
134 |
/** |
|
135 |
* Translates a string from Unicode to ASCII Compatible Encoding (ACE), |
|
136 |
* as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>. |
|
137 |
* |
|
138 |
* <p> This convenience method works as if by invoking the |
|
139 |
* two-argument counterpart as follows: |
|
19069 | 140 |
* <blockquote> |
2 | 141 |
* {@link #toASCII(String, int) toASCII}(input, 0); |
19069 | 142 |
* </blockquote> |
2 | 143 |
* |
144 |
* @param input the string to be processed |
|
145 |
* |
|
19069 | 146 |
* @return the translated {@code String} |
2 | 147 |
* |
148 |
* @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification |
|
149 |
*/ |
|
150 |
public static String toASCII(String input) { |
|
151 |
return toASCII(input, 0); |
|
152 |
} |
|
153 |
||
154 |
||
155 |
/** |
|
156 |
* Translates a string from ASCII Compatible Encoding (ACE) to Unicode, |
|
157 |
* as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>. |
|
158 |
* |
|
159 |
* <p>ToUnicode never fails. In case of any error, the input string is returned unmodified. |
|
160 |
* |
|
161 |
* <p> A label is an individual part of a domain name. The original ToUnicode operation, |
|
162 |
* as defined in RFC 3490, only operates on a single label. This method can handle |
|
163 |
* both label and entire domain name, by assuming that labels in a domain name are |
|
164 |
* always separated by dots. The following characters are recognized as dots: |
|
165 |
* \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop), |
|
166 |
* and \uFF61 (halfwidth ideographic full stop). |
|
167 |
* |
|
168 |
* @param input the string to be processed |
|
169 |
* @param flag process flag; can be 0 or any logical OR of possible flags |
|
170 |
* |
|
19069 | 171 |
* @return the translated {@code String} |
2 | 172 |
*/ |
173 |
public static String toUnicode(String input, int flag) { |
|
174 |
int p = 0, q = 0; |
|
24969
afa6934dd8e8
8041679: Replace uses of StringBuffer with StringBuilder within core library classes
psandoz
parents:
19790
diff
changeset
|
175 |
StringBuilder out = new StringBuilder(); |
2 | 176 |
|
19440
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
177 |
if (isRootLabel(input)) { |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
178 |
return "."; |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
179 |
} |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
180 |
|
2 | 181 |
while (p < input.length()) { |
182 |
q = searchDots(input, p); |
|
183 |
out.append(toUnicodeInternal(input.substring(p, q), flag)); |
|
19440
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
184 |
if (q != (input.length())) { |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
185 |
// has more labels, or keep the trailing dot as at present |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
186 |
out.append('.'); |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
187 |
} |
2 | 188 |
p = q + 1; |
189 |
} |
|
190 |
||
191 |
return out.toString(); |
|
192 |
} |
|
193 |
||
194 |
||
195 |
/** |
|
196 |
* Translates a string from ASCII Compatible Encoding (ACE) to Unicode, |
|
197 |
* as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>. |
|
198 |
* |
|
199 |
* <p> This convenience method works as if by invoking the |
|
200 |
* two-argument counterpart as follows: |
|
19069 | 201 |
* <blockquote> |
2 | 202 |
* {@link #toUnicode(String, int) toUnicode}(input, 0); |
19069 | 203 |
* </blockquote> |
2 | 204 |
* |
205 |
* @param input the string to be processed |
|
206 |
* |
|
19069 | 207 |
* @return the translated {@code String} |
2 | 208 |
*/ |
209 |
public static String toUnicode(String input) { |
|
210 |
return toUnicode(input, 0); |
|
211 |
} |
|
212 |
||
213 |
||
214 |
/* ---------------- Private members -------------- */ |
|
215 |
||
216 |
// ACE Prefix is "xn--" |
|
217 |
private static final String ACE_PREFIX = "xn--"; |
|
218 |
private static final int ACE_PREFIX_LENGTH = ACE_PREFIX.length(); |
|
219 |
||
220 |
private static final int MAX_LABEL_LENGTH = 63; |
|
221 |
||
222 |
// single instance of nameprep |
|
223 |
private static StringPrep namePrep = null; |
|
224 |
||
225 |
static { |
|
226 |
InputStream stream = null; |
|
227 |
||
228 |
try { |
|
229 |
final String IDN_PROFILE = "uidna.spp"; |
|
230 |
if (System.getSecurityManager() != null) { |
|
29986
97167d851fc4
8078467: Update core libraries to use diamond with anonymous classes
darcy
parents:
25859
diff
changeset
|
231 |
stream = AccessController.doPrivileged(new PrivilegedAction<>() { |
2 | 232 |
public InputStream run() { |
233 |
return StringPrep.class.getResourceAsStream(IDN_PROFILE); |
|
234 |
} |
|
235 |
}); |
|
236 |
} else { |
|
237 |
stream = StringPrep.class.getResourceAsStream(IDN_PROFILE); |
|
238 |
} |
|
239 |
||
240 |
namePrep = new StringPrep(stream); |
|
241 |
stream.close(); |
|
242 |
} catch (IOException e) { |
|
243 |
// should never reach here |
|
244 |
assert false; |
|
245 |
} |
|
246 |
} |
|
247 |
||
248 |
||
249 |
/* ---------------- Private operations -------------- */ |
|
250 |
||
251 |
||
252 |
// |
|
253 |
// to suppress the default zero-argument constructor |
|
254 |
// |
|
255 |
private IDN() {} |
|
256 |
||
257 |
// |
|
258 |
// toASCII operation; should only apply to a single label |
|
259 |
// |
|
260 |
private static String toASCIIInternal(String label, int flag) |
|
261 |
{ |
|
262 |
// step 1 |
|
263 |
// Check if the string contains code points outside the ASCII range 0..0x7c. |
|
264 |
boolean isASCII = isAllASCII(label); |
|
265 |
StringBuffer dest; |
|
266 |
||
267 |
// step 2 |
|
268 |
// perform the nameprep operation; flag ALLOW_UNASSIGNED is used here |
|
269 |
if (!isASCII) { |
|
270 |
UCharacterIterator iter = UCharacterIterator.getInstance(label); |
|
271 |
try { |
|
272 |
dest = namePrep.prepare(iter, flag); |
|
273 |
} catch (java.text.ParseException e) { |
|
274 |
throw new IllegalArgumentException(e); |
|
275 |
} |
|
276 |
} else { |
|
277 |
dest = new StringBuffer(label); |
|
278 |
} |
|
279 |
||
19440
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
280 |
// step 8, move forward to check the smallest number of the code points |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
281 |
// the length must be inside 1..63 |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
282 |
if (dest.length() == 0) { |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
283 |
throw new IllegalArgumentException( |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
284 |
"Empty label is not a legal name"); |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
285 |
} |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
286 |
|
2 | 287 |
// step 3 |
288 |
// Verify the absence of non-LDH ASCII code points |
|
289 |
// 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60, 0x7b..0x7f |
|
290 |
// Verify the absence of leading and trailing hyphen |
|
291 |
boolean useSTD3ASCIIRules = ((flag & USE_STD3_ASCII_RULES) != 0); |
|
292 |
if (useSTD3ASCIIRules) { |
|
293 |
for (int i = 0; i < dest.length(); i++) { |
|
294 |
int c = dest.charAt(i); |
|
19790
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
295 |
if (isNonLDHAsciiCodePoint(c)) { |
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
296 |
throw new IllegalArgumentException( |
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
297 |
"Contains non-LDH ASCII characters"); |
2 | 298 |
} |
299 |
} |
|
300 |
||
19790
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
301 |
if (dest.charAt(0) == '-' || |
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
302 |
dest.charAt(dest.length() - 1) == '-') { |
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
303 |
|
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
304 |
throw new IllegalArgumentException( |
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
305 |
"Has leading or trailing hyphen"); |
2 | 306 |
} |
307 |
} |
|
308 |
||
309 |
if (!isASCII) { |
|
310 |
// step 4 |
|
311 |
// If all code points are inside 0..0x7f, skip to step 8 |
|
312 |
if (!isAllASCII(dest.toString())) { |
|
313 |
// step 5 |
|
314 |
// verify the sequence does not begin with ACE prefix |
|
315 |
if(!startsWithACEPrefix(dest)){ |
|
316 |
||
317 |
// step 6 |
|
318 |
// encode the sequence with punycode |
|
319 |
try { |
|
320 |
dest = Punycode.encode(dest, null); |
|
321 |
} catch (java.text.ParseException e) { |
|
322 |
throw new IllegalArgumentException(e); |
|
323 |
} |
|
324 |
||
325 |
dest = toASCIILower(dest); |
|
326 |
||
327 |
// step 7 |
|
328 |
// prepend the ACE prefix |
|
329 |
dest.insert(0, ACE_PREFIX); |
|
330 |
} else { |
|
331 |
throw new IllegalArgumentException("The input starts with the ACE Prefix"); |
|
332 |
} |
|
333 |
||
334 |
} |
|
335 |
} |
|
336 |
||
337 |
// step 8 |
|
338 |
// the length must be inside 1..63 |
|
19440
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
339 |
if (dest.length() > MAX_LABEL_LENGTH) { |
2 | 340 |
throw new IllegalArgumentException("The label in the input is too long"); |
341 |
} |
|
342 |
||
343 |
return dest.toString(); |
|
344 |
} |
|
345 |
||
346 |
// |
|
347 |
// toUnicode operation; should only apply to a single label |
|
348 |
// |
|
349 |
private static String toUnicodeInternal(String label, int flag) { |
|
350 |
boolean[] caseFlags = null; |
|
351 |
StringBuffer dest; |
|
352 |
||
353 |
// step 1 |
|
354 |
// find out if all the codepoints in input are ASCII |
|
355 |
boolean isASCII = isAllASCII(label); |
|
356 |
||
357 |
if(!isASCII){ |
|
358 |
// step 2 |
|
359 |
// perform the nameprep operation; flag ALLOW_UNASSIGNED is used here |
|
360 |
try { |
|
361 |
UCharacterIterator iter = UCharacterIterator.getInstance(label); |
|
362 |
dest = namePrep.prepare(iter, flag); |
|
363 |
} catch (Exception e) { |
|
364 |
// toUnicode never fails; if any step fails, return the input string |
|
365 |
return label; |
|
366 |
} |
|
367 |
} else { |
|
368 |
dest = new StringBuffer(label); |
|
369 |
} |
|
370 |
||
371 |
// step 3 |
|
372 |
// verify ACE Prefix |
|
373 |
if(startsWithACEPrefix(dest)) { |
|
374 |
||
375 |
// step 4 |
|
376 |
// Remove the ACE Prefix |
|
377 |
String temp = dest.substring(ACE_PREFIX_LENGTH, dest.length()); |
|
378 |
||
379 |
try { |
|
380 |
// step 5 |
|
381 |
// Decode using punycode |
|
382 |
StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null); |
|
383 |
||
384 |
// step 6 |
|
385 |
// Apply toASCII |
|
386 |
String toASCIIOut = toASCII(decodeOut.toString(), flag); |
|
387 |
||
388 |
// step 7 |
|
389 |
// verify |
|
390 |
if (toASCIIOut.equalsIgnoreCase(dest.toString())) { |
|
391 |
// step 8 |
|
392 |
// return output of step 5 |
|
393 |
return decodeOut.toString(); |
|
394 |
} |
|
395 |
} catch (Exception ignored) { |
|
396 |
// no-op |
|
397 |
} |
|
398 |
} |
|
399 |
||
400 |
// just return the input |
|
401 |
return label; |
|
402 |
} |
|
403 |
||
404 |
||
405 |
// |
|
406 |
// LDH stands for "letter/digit/hyphen", with characters restricted to the |
|
407 |
// 26-letter Latin alphabet <A-Z a-z>, the digits <0-9>, and the hyphen |
|
19790
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
408 |
// <->. |
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
409 |
// Non LDH refers to characters in the ASCII range, but which are not |
52499
768b1c612100
8213490: Networking area typos and inconsistencies cleanup
prappo
parents:
47216
diff
changeset
|
410 |
// letters, digits or the hyphen. |
19790
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
411 |
// |
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
412 |
// non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7F |
2 | 413 |
// |
19790
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
414 |
private static boolean isNonLDHAsciiCodePoint(int ch){ |
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
415 |
return (0x0000 <= ch && ch <= 0x002C) || |
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
416 |
(0x002E <= ch && ch <= 0x002F) || |
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
417 |
(0x003A <= ch && ch <= 0x0040) || |
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
418 |
(0x005B <= ch && ch <= 0x0060) || |
d97d46e9bddf
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents:
19440
diff
changeset
|
419 |
(0x007B <= ch && ch <= 0x007F); |
2 | 420 |
} |
421 |
||
422 |
// |
|
423 |
// search dots in a string and return the index of that character; |
|
424 |
// or if there is no dots, return the length of input string |
|
425 |
// dots might be: \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop), |
|
426 |
// and \uFF61 (halfwidth ideographic full stop). |
|
427 |
// |
|
428 |
private static int searchDots(String s, int start) { |
|
429 |
int i; |
|
430 |
for (i = start; i < s.length(); i++) { |
|
19440
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
431 |
if (isLabelSeparator(s.charAt(i))) { |
2 | 432 |
break; |
433 |
} |
|
434 |
} |
|
435 |
||
436 |
return i; |
|
437 |
} |
|
438 |
||
19440
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
439 |
// |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
440 |
// to check if a string is a root label, ".". |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
441 |
// |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
442 |
private static boolean isRootLabel(String s) { |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
443 |
return (s.length() == 1 && isLabelSeparator(s.charAt(0))); |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
444 |
} |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
445 |
|
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
446 |
// |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
447 |
// to check if a character is a label separator, i.e. a dot character. |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
448 |
// |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
449 |
private static boolean isLabelSeparator(char c) { |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
450 |
return (c == '.' || c == '\u3002' || c == '\uFF0E' || c == '\uFF61'); |
c4414bc88602
8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents:
19069
diff
changeset
|
451 |
} |
2 | 452 |
|
453 |
// |
|
454 |
// to check if a string only contains US-ASCII code point |
|
455 |
// |
|
456 |
private static boolean isAllASCII(String input) { |
|
457 |
boolean isASCII = true; |
|
458 |
for (int i = 0; i < input.length(); i++) { |
|
459 |
int c = input.charAt(i); |
|
460 |
if (c > 0x7F) { |
|
461 |
isASCII = false; |
|
462 |
break; |
|
463 |
} |
|
464 |
} |
|
465 |
return isASCII; |
|
466 |
} |
|
467 |
||
468 |
// |
|
469 |
// to check if a string starts with ACE-prefix |
|
470 |
// |
|
471 |
private static boolean startsWithACEPrefix(StringBuffer input){ |
|
472 |
boolean startsWithPrefix = true; |
|
473 |
||
474 |
if(input.length() < ACE_PREFIX_LENGTH){ |
|
475 |
return false; |
|
476 |
} |
|
477 |
for(int i = 0; i < ACE_PREFIX_LENGTH; i++){ |
|
478 |
if(toASCIILower(input.charAt(i)) != ACE_PREFIX.charAt(i)){ |
|
479 |
startsWithPrefix = false; |
|
480 |
} |
|
481 |
} |
|
482 |
return startsWithPrefix; |
|
483 |
} |
|
484 |
||
485 |
private static char toASCIILower(char ch){ |
|
486 |
if('A' <= ch && ch <= 'Z'){ |
|
487 |
return (char)(ch + 'a' - 'A'); |
|
488 |
} |
|
489 |
return ch; |
|
490 |
} |
|
491 |
||
492 |
private static StringBuffer toASCIILower(StringBuffer input){ |
|
493 |
StringBuffer dest = new StringBuffer(); |
|
494 |
for(int i = 0; i < input.length();i++){ |
|
495 |
dest.append(toASCIILower(input.charAt(i))); |
|
496 |
} |
|
497 |
return dest; |
|
498 |
} |
|
499 |
} |