jdk-sandbox: jdk/src/share/classes/java/net/IDN.java@1d9cb0d080e3 (annotated)

2 90ce3da70b43 Initial load duke parents: diff changeset	1	/*
19069 1d9cb0d080e3 8021833: javadoc cleanup in java.net juh parents: 5506 diff changeset	2	* Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
2 90ce3da70b43 Initial load duke parents: diff changeset	3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load duke parents: diff changeset	4	*
90ce3da70b43 Initial load duke parents: diff changeset	5	* This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load duke parents: diff changeset	6	* under the terms of the GNU General Public License version 2 only, as
5506 202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2 diff changeset	7	* published by the Free Software Foundation. Oracle designates this
2 90ce3da70b43 Initial load duke parents: diff changeset	8	* particular file as subject to the "Classpath" exception as provided
5506 202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2 diff changeset	9	* by Oracle in the LICENSE file that accompanied this code.
2 90ce3da70b43 Initial load duke parents: diff changeset	10	*
90ce3da70b43 Initial load duke parents: diff changeset	11	* This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load duke parents: diff changeset	12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load duke parents: diff changeset	13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
90ce3da70b43 Initial load duke parents: diff changeset	14	* version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load duke parents: diff changeset	15	* accompanied this code).
90ce3da70b43 Initial load duke parents: diff changeset	16	*
90ce3da70b43 Initial load duke parents: diff changeset	17	* You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load duke parents: diff changeset	18	* 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load duke parents: diff changeset	19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load duke parents: diff changeset	20	*
5506 202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2 diff changeset	21	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2 diff changeset	22	* or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices ohair parents: 2 diff changeset	23	* questions.
2 90ce3da70b43 Initial load duke parents: diff changeset	24	*/
90ce3da70b43 Initial load duke parents: diff changeset	25	package java.net;
90ce3da70b43 Initial load duke parents: diff changeset	26
90ce3da70b43 Initial load duke parents: diff changeset	27	import java.io.InputStream;
90ce3da70b43 Initial load duke parents: diff changeset	28	import java.io.IOException;
90ce3da70b43 Initial load duke parents: diff changeset	29	import java.security.AccessController;
90ce3da70b43 Initial load duke parents: diff changeset	30	import java.security.PrivilegedAction;
90ce3da70b43 Initial load duke parents: diff changeset	31
90ce3da70b43 Initial load duke parents: diff changeset	32	import sun.net.idn.StringPrep;
90ce3da70b43 Initial load duke parents: diff changeset	33	import sun.net.idn.Punycode;
90ce3da70b43 Initial load duke parents: diff changeset	34	import sun.text.normalizer.UCharacterIterator;
90ce3da70b43 Initial load duke parents: diff changeset	35
90ce3da70b43 Initial load duke parents: diff changeset	36	/**
90ce3da70b43 Initial load duke parents: diff changeset	37	* Provides methods to convert internationalized domain names (IDNs) between
90ce3da70b43 Initial load duke parents: diff changeset	38	* a normal Unicode representation and an ASCII Compatible Encoding (ACE) representation.
90ce3da70b43 Initial load duke parents: diff changeset	39	* Internationalized domain names can use characters from the entire range of
90ce3da70b43 Initial load duke parents: diff changeset	40	* Unicode, while traditional domain names are restricted to ASCII characters.
90ce3da70b43 Initial load duke parents: diff changeset	41	* ACE is an encoding of Unicode strings that uses only ASCII characters and
90ce3da70b43 Initial load duke parents: diff changeset	42	* can be used with software (such as the Domain Name System) that only
90ce3da70b43 Initial load duke parents: diff changeset	43	* understands traditional domain names.
90ce3da70b43 Initial load duke parents: diff changeset	44	*
90ce3da70b43 Initial load duke parents: diff changeset	45	* <p>Internationalized domain names are defined in <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
90ce3da70b43 Initial load duke parents: diff changeset	46	* RFC 3490 defines two operations: ToASCII and ToUnicode. These 2 operations employ
90ce3da70b43 Initial load duke parents: diff changeset	47	* <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a> algorithm, which is a
90ce3da70b43 Initial load duke parents: diff changeset	48	* profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a>, and
90ce3da70b43 Initial load duke parents: diff changeset	49	* <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a> algorithm to convert
90ce3da70b43 Initial load duke parents: diff changeset	50	* domain name string back and forth.
90ce3da70b43 Initial load duke parents: diff changeset	51	*
90ce3da70b43 Initial load duke parents: diff changeset	52	* <p>The behavior of aforementioned conversion process can be adjusted by various flags:
90ce3da70b43 Initial load duke parents: diff changeset	53	* <ul>
90ce3da70b43 Initial load duke parents: diff changeset	54	* <li>If the ALLOW_UNASSIGNED flag is used, the domain name string to be converted
90ce3da70b43 Initial load duke parents: diff changeset	55	* can contain code points that are unassigned in Unicode 3.2, which is the
90ce3da70b43 Initial load duke parents: diff changeset	56	* Unicode version on which IDN conversion is based. If the flag is not used,
90ce3da70b43 Initial load duke parents: diff changeset	57	* the presence of such unassigned code points is treated as an error.
90ce3da70b43 Initial load duke parents: diff changeset	58	* <li>If the USE_STD3_ASCII_RULES flag is used, ASCII strings are checked against <a href="http://www.ietf.org/rfc/rfc1122.txt">RFC 1122</a> and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC 1123</a>.
90ce3da70b43 Initial load duke parents: diff changeset	59	* It is an error if they don't meet the requirements.
90ce3da70b43 Initial load duke parents: diff changeset	60	* </ul>
90ce3da70b43 Initial load duke parents: diff changeset	61	* These flags can be logically OR'ed together.
90ce3da70b43 Initial load duke parents: diff changeset	62	*
90ce3da70b43 Initial load duke parents: diff changeset	63	* <p>The security consideration is important with respect to internationalization
90ce3da70b43 Initial load duke parents: diff changeset	64	* domain name support. For example, English domain names may be <i>homographed</i>
90ce3da70b43 Initial load duke parents: diff changeset	65	* - maliciously misspelled by substitution of non-Latin letters.
90ce3da70b43 Initial load duke parents: diff changeset	66	* <a href="http://www.unicode.org/reports/tr36/">Unicode Technical Report #36</a>
90ce3da70b43 Initial load duke parents: diff changeset	67	* discusses security issues of IDN support as well as possible solutions.
90ce3da70b43 Initial load duke parents: diff changeset	68	* Applications are responsible for taking adequate security measures when using
90ce3da70b43 Initial load duke parents: diff changeset	69	* international domain names.
90ce3da70b43 Initial load duke parents: diff changeset	70	*
90ce3da70b43 Initial load duke parents: diff changeset	71	* @author Edward Wang
90ce3da70b43 Initial load duke parents: diff changeset	72	* @since 1.6
90ce3da70b43 Initial load duke parents: diff changeset	73	*
90ce3da70b43 Initial load duke parents: diff changeset	74	*/
90ce3da70b43 Initial load duke parents: diff changeset	75	public final class IDN {
90ce3da70b43 Initial load duke parents: diff changeset	76	/**
90ce3da70b43 Initial load duke parents: diff changeset	77	* Flag to allow processing of unassigned code points
90ce3da70b43 Initial load duke parents: diff changeset	78	*/
90ce3da70b43 Initial load duke parents: diff changeset	79	public static final int ALLOW_UNASSIGNED = 0x01;
90ce3da70b43 Initial load duke parents: diff changeset	80
90ce3da70b43 Initial load duke parents: diff changeset	81	/**
90ce3da70b43 Initial load duke parents: diff changeset	82	* Flag to turn on the check against STD-3 ASCII rules
90ce3da70b43 Initial load duke parents: diff changeset	83	*/
90ce3da70b43 Initial load duke parents: diff changeset	84	public static final int USE_STD3_ASCII_RULES = 0x02;
90ce3da70b43 Initial load duke parents: diff changeset	85
90ce3da70b43 Initial load duke parents: diff changeset	86
90ce3da70b43 Initial load duke parents: diff changeset	87	/**
90ce3da70b43 Initial load duke parents: diff changeset	88	* Translates a string from Unicode to ASCII Compatible Encoding (ACE),
90ce3da70b43 Initial load duke parents: diff changeset	89	* as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
90ce3da70b43 Initial load duke parents: diff changeset	90	*
90ce3da70b43 Initial load duke parents: diff changeset	91	* <p>ToASCII operation can fail. ToASCII fails if any step of it fails.
90ce3da70b43 Initial load duke parents: diff changeset	92	* If ToASCII operation fails, an IllegalArgumentException will be thrown.
90ce3da70b43 Initial load duke parents: diff changeset	93	* In this case, the input string should not be used in an internationalized domain name.
90ce3da70b43 Initial load duke parents: diff changeset	94	*
90ce3da70b43 Initial load duke parents: diff changeset	95	* <p> A label is an individual part of a domain name. The original ToASCII operation,
90ce3da70b43 Initial load duke parents: diff changeset	96	* as defined in RFC 3490, only operates on a single label. This method can handle
90ce3da70b43 Initial load duke parents: diff changeset	97	* both label and entire domain name, by assuming that labels in a domain name are
90ce3da70b43 Initial load duke parents: diff changeset	98	* always separated by dots. The following characters are recognized as dots:
90ce3da70b43 Initial load duke parents: diff changeset	99	* \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop),
90ce3da70b43 Initial load duke parents: diff changeset	100	* and \uFF61 (halfwidth ideographic full stop). if dots are
90ce3da70b43 Initial load duke parents: diff changeset	101	* used as label separators, this method also changes all of them to \u002E (full stop)
90ce3da70b43 Initial load duke parents: diff changeset	102	* in output translated string.
90ce3da70b43 Initial load duke parents: diff changeset	103	*
90ce3da70b43 Initial load duke parents: diff changeset	104	* @param input the string to be processed
90ce3da70b43 Initial load duke parents: diff changeset	105	* @param flag process flag; can be 0 or any logical OR of possible flags
90ce3da70b43 Initial load duke parents: diff changeset	106	*
19069 1d9cb0d080e3 8021833: javadoc cleanup in java.net juh parents: 5506 diff changeset	107	* @return the translated {@code String}
2 90ce3da70b43 Initial load duke parents: diff changeset	108	*
90ce3da70b43 Initial load duke parents: diff changeset	109	* @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification
90ce3da70b43 Initial load duke parents: diff changeset	110	*/
90ce3da70b43 Initial load duke parents: diff changeset	111	public static String toASCII(String input, int flag)
90ce3da70b43 Initial load duke parents: diff changeset	112	{
90ce3da70b43 Initial load duke parents: diff changeset	113	int p = 0, q = 0;
90ce3da70b43 Initial load duke parents: diff changeset	114	StringBuffer out = new StringBuffer();
90ce3da70b43 Initial load duke parents: diff changeset	115
90ce3da70b43 Initial load duke parents: diff changeset	116	while (p < input.length()) {
90ce3da70b43 Initial load duke parents: diff changeset	117	q = searchDots(input, p);
90ce3da70b43 Initial load duke parents: diff changeset	118	out.append(toASCIIInternal(input.substring(p, q), flag));
90ce3da70b43 Initial load duke parents: diff changeset	119	p = q + 1;
90ce3da70b43 Initial load duke parents: diff changeset	120	if (p < input.length()) out.append('.');
90ce3da70b43 Initial load duke parents: diff changeset	121	}
90ce3da70b43 Initial load duke parents: diff changeset	122
90ce3da70b43 Initial load duke parents: diff changeset	123	return out.toString();
90ce3da70b43 Initial load duke parents: diff changeset	124	}
90ce3da70b43 Initial load duke parents: diff changeset	125
90ce3da70b43 Initial load duke parents: diff changeset	126
90ce3da70b43 Initial load duke parents: diff changeset	127	/**
90ce3da70b43 Initial load duke parents: diff changeset	128	* Translates a string from Unicode to ASCII Compatible Encoding (ACE),
90ce3da70b43 Initial load duke parents: diff changeset	129	* as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
90ce3da70b43 Initial load duke parents: diff changeset	130	*
90ce3da70b43 Initial load duke parents: diff changeset	131	* <p> This convenience method works as if by invoking the
90ce3da70b43 Initial load duke parents: diff changeset	132	* two-argument counterpart as follows:
19069 1d9cb0d080e3 8021833: javadoc cleanup in java.net juh parents: 5506 diff changeset	133	* <blockquote>
2 90ce3da70b43 Initial load duke parents: diff changeset	134	* {@link #toASCII(String, int) toASCII}(input, 0);
19069 1d9cb0d080e3 8021833: javadoc cleanup in java.net juh parents: 5506 diff changeset	135	* </blockquote>
2 90ce3da70b43 Initial load duke parents: diff changeset	136	*
90ce3da70b43 Initial load duke parents: diff changeset	137	* @param input the string to be processed
90ce3da70b43 Initial load duke parents: diff changeset	138	*
19069 1d9cb0d080e3 8021833: javadoc cleanup in java.net juh parents: 5506 diff changeset	139	* @return the translated {@code String}
2 90ce3da70b43 Initial load duke parents: diff changeset	140	*
90ce3da70b43 Initial load duke parents: diff changeset	141	* @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification
90ce3da70b43 Initial load duke parents: diff changeset	142	*/
90ce3da70b43 Initial load duke parents: diff changeset	143	public static String toASCII(String input) {
90ce3da70b43 Initial load duke parents: diff changeset	144	return toASCII(input, 0);
90ce3da70b43 Initial load duke parents: diff changeset	145	}
90ce3da70b43 Initial load duke parents: diff changeset	146
90ce3da70b43 Initial load duke parents: diff changeset	147
90ce3da70b43 Initial load duke parents: diff changeset	148	/**
90ce3da70b43 Initial load duke parents: diff changeset	149	* Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
90ce3da70b43 Initial load duke parents: diff changeset	150	* as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
90ce3da70b43 Initial load duke parents: diff changeset	151	*
90ce3da70b43 Initial load duke parents: diff changeset	152	* <p>ToUnicode never fails. In case of any error, the input string is returned unmodified.
90ce3da70b43 Initial load duke parents: diff changeset	153	*
90ce3da70b43 Initial load duke parents: diff changeset	154	* <p> A label is an individual part of a domain name. The original ToUnicode operation,
90ce3da70b43 Initial load duke parents: diff changeset	155	* as defined in RFC 3490, only operates on a single label. This method can handle
90ce3da70b43 Initial load duke parents: diff changeset	156	* both label and entire domain name, by assuming that labels in a domain name are
90ce3da70b43 Initial load duke parents: diff changeset	157	* always separated by dots. The following characters are recognized as dots:
90ce3da70b43 Initial load duke parents: diff changeset	158	* \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop),
90ce3da70b43 Initial load duke parents: diff changeset	159	* and \uFF61 (halfwidth ideographic full stop).
90ce3da70b43 Initial load duke parents: diff changeset	160	*
90ce3da70b43 Initial load duke parents: diff changeset	161	* @param input the string to be processed
90ce3da70b43 Initial load duke parents: diff changeset	162	* @param flag process flag; can be 0 or any logical OR of possible flags
90ce3da70b43 Initial load duke parents: diff changeset	163	*
19069 1d9cb0d080e3 8021833: javadoc cleanup in java.net juh parents: 5506 diff changeset	164	* @return the translated {@code String}
2 90ce3da70b43 Initial load duke parents: diff changeset	165	*/
90ce3da70b43 Initial load duke parents: diff changeset	166	public static String toUnicode(String input, int flag) {
90ce3da70b43 Initial load duke parents: diff changeset	167	int p = 0, q = 0;
90ce3da70b43 Initial load duke parents: diff changeset	168	StringBuffer out = new StringBuffer();
90ce3da70b43 Initial load duke parents: diff changeset	169
90ce3da70b43 Initial load duke parents: diff changeset	170	while (p < input.length()) {
90ce3da70b43 Initial load duke parents: diff changeset	171	q = searchDots(input, p);
90ce3da70b43 Initial load duke parents: diff changeset	172	out.append(toUnicodeInternal(input.substring(p, q), flag));
90ce3da70b43 Initial load duke parents: diff changeset	173	p = q + 1;
90ce3da70b43 Initial load duke parents: diff changeset	174	if (p < input.length()) out.append('.');
90ce3da70b43 Initial load duke parents: diff changeset	175	}
90ce3da70b43 Initial load duke parents: diff changeset	176
90ce3da70b43 Initial load duke parents: diff changeset	177	return out.toString();
90ce3da70b43 Initial load duke parents: diff changeset	178	}
90ce3da70b43 Initial load duke parents: diff changeset	179
90ce3da70b43 Initial load duke parents: diff changeset	180
90ce3da70b43 Initial load duke parents: diff changeset	181	/**
90ce3da70b43 Initial load duke parents: diff changeset	182	* Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
90ce3da70b43 Initial load duke parents: diff changeset	183	* as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
90ce3da70b43 Initial load duke parents: diff changeset	184	*
90ce3da70b43 Initial load duke parents: diff changeset	185	* <p> This convenience method works as if by invoking the
90ce3da70b43 Initial load duke parents: diff changeset	186	* two-argument counterpart as follows:
19069 1d9cb0d080e3 8021833: javadoc cleanup in java.net juh parents: 5506 diff changeset	187	* <blockquote>
2 90ce3da70b43 Initial load duke parents: diff changeset	188	* {@link #toUnicode(String, int) toUnicode}(input, 0);
19069 1d9cb0d080e3 8021833: javadoc cleanup in java.net juh parents: 5506 diff changeset	189	* </blockquote>
2 90ce3da70b43 Initial load duke parents: diff changeset	190	*
90ce3da70b43 Initial load duke parents: diff changeset	191	* @param input the string to be processed
90ce3da70b43 Initial load duke parents: diff changeset	192	*
19069 1d9cb0d080e3 8021833: javadoc cleanup in java.net juh parents: 5506 diff changeset	193	* @return the translated {@code String}
2 90ce3da70b43 Initial load duke parents: diff changeset	194	*/
90ce3da70b43 Initial load duke parents: diff changeset	195	public static String toUnicode(String input) {
90ce3da70b43 Initial load duke parents: diff changeset	196	return toUnicode(input, 0);
90ce3da70b43 Initial load duke parents: diff changeset	197	}
90ce3da70b43 Initial load duke parents: diff changeset	198
90ce3da70b43 Initial load duke parents: diff changeset	199
90ce3da70b43 Initial load duke parents: diff changeset	200	/* ---------------- Private members -------------- */
90ce3da70b43 Initial load duke parents: diff changeset	201
90ce3da70b43 Initial load duke parents: diff changeset	202	// ACE Prefix is "xn--"
90ce3da70b43 Initial load duke parents: diff changeset	203	private static final String ACE_PREFIX = "xn--";
90ce3da70b43 Initial load duke parents: diff changeset	204	private static final int ACE_PREFIX_LENGTH = ACE_PREFIX.length();
90ce3da70b43 Initial load duke parents: diff changeset	205
90ce3da70b43 Initial load duke parents: diff changeset	206	private static final int MAX_LABEL_LENGTH = 63;
90ce3da70b43 Initial load duke parents: diff changeset	207
90ce3da70b43 Initial load duke parents: diff changeset	208	// single instance of nameprep
90ce3da70b43 Initial load duke parents: diff changeset	209	private static StringPrep namePrep = null;
90ce3da70b43 Initial load duke parents: diff changeset	210
90ce3da70b43 Initial load duke parents: diff changeset	211	static {
90ce3da70b43 Initial load duke parents: diff changeset	212	InputStream stream = null;
90ce3da70b43 Initial load duke parents: diff changeset	213
90ce3da70b43 Initial load duke parents: diff changeset	214	try {
90ce3da70b43 Initial load duke parents: diff changeset	215	final String IDN_PROFILE = "uidna.spp";
90ce3da70b43 Initial load duke parents: diff changeset	216	if (System.getSecurityManager() != null) {
90ce3da70b43 Initial load duke parents: diff changeset	217	stream = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
90ce3da70b43 Initial load duke parents: diff changeset	218	public InputStream run() {
90ce3da70b43 Initial load duke parents: diff changeset	219	return StringPrep.class.getResourceAsStream(IDN_PROFILE);
90ce3da70b43 Initial load duke parents: diff changeset	220	}
90ce3da70b43 Initial load duke parents: diff changeset	221	});
90ce3da70b43 Initial load duke parents: diff changeset	222	} else {
90ce3da70b43 Initial load duke parents: diff changeset	223	stream = StringPrep.class.getResourceAsStream(IDN_PROFILE);
90ce3da70b43 Initial load duke parents: diff changeset	224	}
90ce3da70b43 Initial load duke parents: diff changeset	225
90ce3da70b43 Initial load duke parents: diff changeset	226	namePrep = new StringPrep(stream);
90ce3da70b43 Initial load duke parents: diff changeset	227	stream.close();
90ce3da70b43 Initial load duke parents: diff changeset	228	} catch (IOException e) {
90ce3da70b43 Initial load duke parents: diff changeset	229	// should never reach here
90ce3da70b43 Initial load duke parents: diff changeset	230	assert false;
90ce3da70b43 Initial load duke parents: diff changeset	231	}
90ce3da70b43 Initial load duke parents: diff changeset	232	}
90ce3da70b43 Initial load duke parents: diff changeset	233
90ce3da70b43 Initial load duke parents: diff changeset	234
90ce3da70b43 Initial load duke parents: diff changeset	235	/* ---------------- Private operations -------------- */
90ce3da70b43 Initial load duke parents: diff changeset	236
90ce3da70b43 Initial load duke parents: diff changeset	237
90ce3da70b43 Initial load duke parents: diff changeset	238	//
90ce3da70b43 Initial load duke parents: diff changeset	239	// to suppress the default zero-argument constructor
90ce3da70b43 Initial load duke parents: diff changeset	240	//
90ce3da70b43 Initial load duke parents: diff changeset	241	private IDN() {}
90ce3da70b43 Initial load duke parents: diff changeset	242
90ce3da70b43 Initial load duke parents: diff changeset	243	//
90ce3da70b43 Initial load duke parents: diff changeset	244	// toASCII operation; should only apply to a single label
90ce3da70b43 Initial load duke parents: diff changeset	245	//
90ce3da70b43 Initial load duke parents: diff changeset	246	private static String toASCIIInternal(String label, int flag)
90ce3da70b43 Initial load duke parents: diff changeset	247	{
90ce3da70b43 Initial load duke parents: diff changeset	248	// step 1
90ce3da70b43 Initial load duke parents: diff changeset	249	// Check if the string contains code points outside the ASCII range 0..0x7c.
90ce3da70b43 Initial load duke parents: diff changeset	250	boolean isASCII = isAllASCII(label);
90ce3da70b43 Initial load duke parents: diff changeset	251	StringBuffer dest;
90ce3da70b43 Initial load duke parents: diff changeset	252
90ce3da70b43 Initial load duke parents: diff changeset	253	// step 2
90ce3da70b43 Initial load duke parents: diff changeset	254	// perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
90ce3da70b43 Initial load duke parents: diff changeset	255	if (!isASCII) {
90ce3da70b43 Initial load duke parents: diff changeset	256	UCharacterIterator iter = UCharacterIterator.getInstance(label);
90ce3da70b43 Initial load duke parents: diff changeset	257	try {
90ce3da70b43 Initial load duke parents: diff changeset	258	dest = namePrep.prepare(iter, flag);
90ce3da70b43 Initial load duke parents: diff changeset	259	} catch (java.text.ParseException e) {
90ce3da70b43 Initial load duke parents: diff changeset	260	throw new IllegalArgumentException(e);
90ce3da70b43 Initial load duke parents: diff changeset	261	}
90ce3da70b43 Initial load duke parents: diff changeset	262	} else {
90ce3da70b43 Initial load duke parents: diff changeset	263	dest = new StringBuffer(label);
90ce3da70b43 Initial load duke parents: diff changeset	264	}
90ce3da70b43 Initial load duke parents: diff changeset	265
90ce3da70b43 Initial load duke parents: diff changeset	266	// step 3
90ce3da70b43 Initial load duke parents: diff changeset	267	// Verify the absence of non-LDH ASCII code points
90ce3da70b43 Initial load duke parents: diff changeset	268	// 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60, 0x7b..0x7f
90ce3da70b43 Initial load duke parents: diff changeset	269	// Verify the absence of leading and trailing hyphen
90ce3da70b43 Initial load duke parents: diff changeset	270	boolean useSTD3ASCIIRules = ((flag & USE_STD3_ASCII_RULES) != 0);
90ce3da70b43 Initial load duke parents: diff changeset	271	if (useSTD3ASCIIRules) {
90ce3da70b43 Initial load duke parents: diff changeset	272	for (int i = 0; i < dest.length(); i++) {
90ce3da70b43 Initial load duke parents: diff changeset	273	int c = dest.charAt(i);
90ce3da70b43 Initial load duke parents: diff changeset	274	if (!isLDHChar(c)) {
90ce3da70b43 Initial load duke parents: diff changeset	275	throw new IllegalArgumentException("Contains non-LDH characters");
90ce3da70b43 Initial load duke parents: diff changeset	276	}
90ce3da70b43 Initial load duke parents: diff changeset	277	}
90ce3da70b43 Initial load duke parents: diff changeset	278
90ce3da70b43 Initial load duke parents: diff changeset	279	if (dest.charAt(0) == '-' \|\| dest.charAt(dest.length() - 1) == '-') {
90ce3da70b43 Initial load duke parents: diff changeset	280	throw new IllegalArgumentException("Has leading or trailing hyphen");
90ce3da70b43 Initial load duke parents: diff changeset	281	}
90ce3da70b43 Initial load duke parents: diff changeset	282	}
90ce3da70b43 Initial load duke parents: diff changeset	283
90ce3da70b43 Initial load duke parents: diff changeset	284	if (!isASCII) {
90ce3da70b43 Initial load duke parents: diff changeset	285	// step 4
90ce3da70b43 Initial load duke parents: diff changeset	286	// If all code points are inside 0..0x7f, skip to step 8
90ce3da70b43 Initial load duke parents: diff changeset	287	if (!isAllASCII(dest.toString())) {
90ce3da70b43 Initial load duke parents: diff changeset	288	// step 5
90ce3da70b43 Initial load duke parents: diff changeset	289	// verify the sequence does not begin with ACE prefix
90ce3da70b43 Initial load duke parents: diff changeset	290	if(!startsWithACEPrefix(dest)){
90ce3da70b43 Initial load duke parents: diff changeset	291
90ce3da70b43 Initial load duke parents: diff changeset	292	// step 6
90ce3da70b43 Initial load duke parents: diff changeset	293	// encode the sequence with punycode
90ce3da70b43 Initial load duke parents: diff changeset	294	try {
90ce3da70b43 Initial load duke parents: diff changeset	295	dest = Punycode.encode(dest, null);
90ce3da70b43 Initial load duke parents: diff changeset	296	} catch (java.text.ParseException e) {
90ce3da70b43 Initial load duke parents: diff changeset	297	throw new IllegalArgumentException(e);
90ce3da70b43 Initial load duke parents: diff changeset	298	}
90ce3da70b43 Initial load duke parents: diff changeset	299
90ce3da70b43 Initial load duke parents: diff changeset	300	dest = toASCIILower(dest);
90ce3da70b43 Initial load duke parents: diff changeset	301
90ce3da70b43 Initial load duke parents: diff changeset	302	// step 7
90ce3da70b43 Initial load duke parents: diff changeset	303	// prepend the ACE prefix
90ce3da70b43 Initial load duke parents: diff changeset	304	dest.insert(0, ACE_PREFIX);
90ce3da70b43 Initial load duke parents: diff changeset	305	} else {
90ce3da70b43 Initial load duke parents: diff changeset	306	throw new IllegalArgumentException("The input starts with the ACE Prefix");
90ce3da70b43 Initial load duke parents: diff changeset	307	}
90ce3da70b43 Initial load duke parents: diff changeset	308
90ce3da70b43 Initial load duke parents: diff changeset	309	}
90ce3da70b43 Initial load duke parents: diff changeset	310	}
90ce3da70b43 Initial load duke parents: diff changeset	311
90ce3da70b43 Initial load duke parents: diff changeset	312	// step 8
90ce3da70b43 Initial load duke parents: diff changeset	313	// the length must be inside 1..63
90ce3da70b43 Initial load duke parents: diff changeset	314	if(dest.length() > MAX_LABEL_LENGTH){
90ce3da70b43 Initial load duke parents: diff changeset	315	throw new IllegalArgumentException("The label in the input is too long");
90ce3da70b43 Initial load duke parents: diff changeset	316	}
90ce3da70b43 Initial load duke parents: diff changeset	317
90ce3da70b43 Initial load duke parents: diff changeset	318	return dest.toString();
90ce3da70b43 Initial load duke parents: diff changeset	319	}
90ce3da70b43 Initial load duke parents: diff changeset	320
90ce3da70b43 Initial load duke parents: diff changeset	321	//
90ce3da70b43 Initial load duke parents: diff changeset	322	// toUnicode operation; should only apply to a single label
90ce3da70b43 Initial load duke parents: diff changeset	323	//
90ce3da70b43 Initial load duke parents: diff changeset	324	private static String toUnicodeInternal(String label, int flag) {
90ce3da70b43 Initial load duke parents: diff changeset	325	boolean[] caseFlags = null;
90ce3da70b43 Initial load duke parents: diff changeset	326	StringBuffer dest;
90ce3da70b43 Initial load duke parents: diff changeset	327
90ce3da70b43 Initial load duke parents: diff changeset	328	// step 1
90ce3da70b43 Initial load duke parents: diff changeset	329	// find out if all the codepoints in input are ASCII
90ce3da70b43 Initial load duke parents: diff changeset	330	boolean isASCII = isAllASCII(label);
90ce3da70b43 Initial load duke parents: diff changeset	331
90ce3da70b43 Initial load duke parents: diff changeset	332	if(!isASCII){
90ce3da70b43 Initial load duke parents: diff changeset	333	// step 2
90ce3da70b43 Initial load duke parents: diff changeset	334	// perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
90ce3da70b43 Initial load duke parents: diff changeset	335	try {
90ce3da70b43 Initial load duke parents: diff changeset	336	UCharacterIterator iter = UCharacterIterator.getInstance(label);
90ce3da70b43 Initial load duke parents: diff changeset	337	dest = namePrep.prepare(iter, flag);
90ce3da70b43 Initial load duke parents: diff changeset	338	} catch (Exception e) {
90ce3da70b43 Initial load duke parents: diff changeset	339	// toUnicode never fails; if any step fails, return the input string
90ce3da70b43 Initial load duke parents: diff changeset	340	return label;
90ce3da70b43 Initial load duke parents: diff changeset	341	}
90ce3da70b43 Initial load duke parents: diff changeset	342	} else {
90ce3da70b43 Initial load duke parents: diff changeset	343	dest = new StringBuffer(label);
90ce3da70b43 Initial load duke parents: diff changeset	344	}
90ce3da70b43 Initial load duke parents: diff changeset	345
90ce3da70b43 Initial load duke parents: diff changeset	346	// step 3
90ce3da70b43 Initial load duke parents: diff changeset	347	// verify ACE Prefix
90ce3da70b43 Initial load duke parents: diff changeset	348	if(startsWithACEPrefix(dest)) {
90ce3da70b43 Initial load duke parents: diff changeset	349
90ce3da70b43 Initial load duke parents: diff changeset	350	// step 4
90ce3da70b43 Initial load duke parents: diff changeset	351	// Remove the ACE Prefix
90ce3da70b43 Initial load duke parents: diff changeset	352	String temp = dest.substring(ACE_PREFIX_LENGTH, dest.length());
90ce3da70b43 Initial load duke parents: diff changeset	353
90ce3da70b43 Initial load duke parents: diff changeset	354	try {
90ce3da70b43 Initial load duke parents: diff changeset	355	// step 5
90ce3da70b43 Initial load duke parents: diff changeset	356	// Decode using punycode
90ce3da70b43 Initial load duke parents: diff changeset	357	StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null);
90ce3da70b43 Initial load duke parents: diff changeset	358
90ce3da70b43 Initial load duke parents: diff changeset	359	// step 6
90ce3da70b43 Initial load duke parents: diff changeset	360	// Apply toASCII
90ce3da70b43 Initial load duke parents: diff changeset	361	String toASCIIOut = toASCII(decodeOut.toString(), flag);
90ce3da70b43 Initial load duke parents: diff changeset	362
90ce3da70b43 Initial load duke parents: diff changeset	363	// step 7
90ce3da70b43 Initial load duke parents: diff changeset	364	// verify
90ce3da70b43 Initial load duke parents: diff changeset	365	if (toASCIIOut.equalsIgnoreCase(dest.toString())) {
90ce3da70b43 Initial load duke parents: diff changeset	366	// step 8
90ce3da70b43 Initial load duke parents: diff changeset	367	// return output of step 5
90ce3da70b43 Initial load duke parents: diff changeset	368	return decodeOut.toString();
90ce3da70b43 Initial load duke parents: diff changeset	369	}
90ce3da70b43 Initial load duke parents: diff changeset	370	} catch (Exception ignored) {
90ce3da70b43 Initial load duke parents: diff changeset	371	// no-op
90ce3da70b43 Initial load duke parents: diff changeset	372	}
90ce3da70b43 Initial load duke parents: diff changeset	373	}
90ce3da70b43 Initial load duke parents: diff changeset	374
90ce3da70b43 Initial load duke parents: diff changeset	375	// just return the input
90ce3da70b43 Initial load duke parents: diff changeset	376	return label;
90ce3da70b43 Initial load duke parents: diff changeset	377	}
90ce3da70b43 Initial load duke parents: diff changeset	378
90ce3da70b43 Initial load duke parents: diff changeset	379
90ce3da70b43 Initial load duke parents: diff changeset	380	//
90ce3da70b43 Initial load duke parents: diff changeset	381	// LDH stands for "letter/digit/hyphen", with characters restricted to the
90ce3da70b43 Initial load duke parents: diff changeset	382	// 26-letter Latin alphabet <A-Z a-z>, the digits <0-9>, and the hyphen
90ce3da70b43 Initial load duke parents: diff changeset	383	// <->
90ce3da70b43 Initial load duke parents: diff changeset	384	// non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x56..0x60, 0x7B..0x7F
90ce3da70b43 Initial load duke parents: diff changeset	385	//
90ce3da70b43 Initial load duke parents: diff changeset	386	private static boolean isLDHChar(int ch){
90ce3da70b43 Initial load duke parents: diff changeset	387	// high runner case
90ce3da70b43 Initial load duke parents: diff changeset	388	if(ch > 0x007A){
90ce3da70b43 Initial load duke parents: diff changeset	389	return false;
90ce3da70b43 Initial load duke parents: diff changeset	390	}
90ce3da70b43 Initial load duke parents: diff changeset	391	//['-' '0'..'9' 'A'..'Z' 'a'..'z']
90ce3da70b43 Initial load duke parents: diff changeset	392	if((ch == 0x002D) \|\|
90ce3da70b43 Initial load duke parents: diff changeset	393	(0x0030 <= ch && ch <= 0x0039) \|\|
90ce3da70b43 Initial load duke parents: diff changeset	394	(0x0041 <= ch && ch <= 0x005A) \|\|
90ce3da70b43 Initial load duke parents: diff changeset	395	(0x0061 <= ch && ch <= 0x007A)
90ce3da70b43 Initial load duke parents: diff changeset	396	){
90ce3da70b43 Initial load duke parents: diff changeset	397	return true;
90ce3da70b43 Initial load duke parents: diff changeset	398	}
90ce3da70b43 Initial load duke parents: diff changeset	399	return false;
90ce3da70b43 Initial load duke parents: diff changeset	400	}
90ce3da70b43 Initial load duke parents: diff changeset	401
90ce3da70b43 Initial load duke parents: diff changeset	402
90ce3da70b43 Initial load duke parents: diff changeset	403	//
90ce3da70b43 Initial load duke parents: diff changeset	404	// search dots in a string and return the index of that character;
90ce3da70b43 Initial load duke parents: diff changeset	405	// or if there is no dots, return the length of input string
90ce3da70b43 Initial load duke parents: diff changeset	406	// dots might be: \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop),
90ce3da70b43 Initial load duke parents: diff changeset	407	// and \uFF61 (halfwidth ideographic full stop).
90ce3da70b43 Initial load duke parents: diff changeset	408	//
90ce3da70b43 Initial load duke parents: diff changeset	409	private static int searchDots(String s, int start) {
90ce3da70b43 Initial load duke parents: diff changeset	410	int i;
90ce3da70b43 Initial load duke parents: diff changeset	411	for (i = start; i < s.length(); i++) {
90ce3da70b43 Initial load duke parents: diff changeset	412	char c = s.charAt(i);
90ce3da70b43 Initial load duke parents: diff changeset	413	if (c == '.' \|\| c == '\u3002' \|\| c == '\uFF0E' \|\| c == '\uFF61') {
90ce3da70b43 Initial load duke parents: diff changeset	414	break;
90ce3da70b43 Initial load duke parents: diff changeset	415	}
90ce3da70b43 Initial load duke parents: diff changeset	416	}
90ce3da70b43 Initial load duke parents: diff changeset	417
90ce3da70b43 Initial load duke parents: diff changeset	418	return i;
90ce3da70b43 Initial load duke parents: diff changeset	419	}
90ce3da70b43 Initial load duke parents: diff changeset	420
90ce3da70b43 Initial load duke parents: diff changeset	421
90ce3da70b43 Initial load duke parents: diff changeset	422	//
90ce3da70b43 Initial load duke parents: diff changeset	423	// to check if a string only contains US-ASCII code point
90ce3da70b43 Initial load duke parents: diff changeset	424	//
90ce3da70b43 Initial load duke parents: diff changeset	425	private static boolean isAllASCII(String input) {
90ce3da70b43 Initial load duke parents: diff changeset	426	boolean isASCII = true;
90ce3da70b43 Initial load duke parents: diff changeset	427	for (int i = 0; i < input.length(); i++) {
90ce3da70b43 Initial load duke parents: diff changeset	428	int c = input.charAt(i);
90ce3da70b43 Initial load duke parents: diff changeset	429	if (c > 0x7F) {
90ce3da70b43 Initial load duke parents: diff changeset	430	isASCII = false;
90ce3da70b43 Initial load duke parents: diff changeset	431	break;
90ce3da70b43 Initial load duke parents: diff changeset	432	}
90ce3da70b43 Initial load duke parents: diff changeset	433	}
90ce3da70b43 Initial load duke parents: diff changeset	434	return isASCII;
90ce3da70b43 Initial load duke parents: diff changeset	435	}
90ce3da70b43 Initial load duke parents: diff changeset	436
90ce3da70b43 Initial load duke parents: diff changeset	437	//
90ce3da70b43 Initial load duke parents: diff changeset	438	// to check if a string starts with ACE-prefix
90ce3da70b43 Initial load duke parents: diff changeset	439	//
90ce3da70b43 Initial load duke parents: diff changeset	440	private static boolean startsWithACEPrefix(StringBuffer input){
90ce3da70b43 Initial load duke parents: diff changeset	441	boolean startsWithPrefix = true;
90ce3da70b43 Initial load duke parents: diff changeset	442
90ce3da70b43 Initial load duke parents: diff changeset	443	if(input.length() < ACE_PREFIX_LENGTH){
90ce3da70b43 Initial load duke parents: diff changeset	444	return false;
90ce3da70b43 Initial load duke parents: diff changeset	445	}
90ce3da70b43 Initial load duke parents: diff changeset	446	for(int i = 0; i < ACE_PREFIX_LENGTH; i++){
90ce3da70b43 Initial load duke parents: diff changeset	447	if(toASCIILower(input.charAt(i)) != ACE_PREFIX.charAt(i)){
90ce3da70b43 Initial load duke parents: diff changeset	448	startsWithPrefix = false;
90ce3da70b43 Initial load duke parents: diff changeset	449	}
90ce3da70b43 Initial load duke parents: diff changeset	450	}
90ce3da70b43 Initial load duke parents: diff changeset	451	return startsWithPrefix;
90ce3da70b43 Initial load duke parents: diff changeset	452	}
90ce3da70b43 Initial load duke parents: diff changeset	453
90ce3da70b43 Initial load duke parents: diff changeset	454	private static char toASCIILower(char ch){
90ce3da70b43 Initial load duke parents: diff changeset	455	if('A' <= ch && ch <= 'Z'){
90ce3da70b43 Initial load duke parents: diff changeset	456	return (char)(ch + 'a' - 'A');
90ce3da70b43 Initial load duke parents: diff changeset	457	}
90ce3da70b43 Initial load duke parents: diff changeset	458	return ch;
90ce3da70b43 Initial load duke parents: diff changeset	459	}
90ce3da70b43 Initial load duke parents: diff changeset	460
90ce3da70b43 Initial load duke parents: diff changeset	461	private static StringBuffer toASCIILower(StringBuffer input){
90ce3da70b43 Initial load duke parents: diff changeset	462	StringBuffer dest = new StringBuffer();
90ce3da70b43 Initial load duke parents: diff changeset	463	for(int i = 0; i < input.length();i++){
90ce3da70b43 Initial load duke parents: diff changeset	464	dest.append(toASCIILower(input.charAt(i)));
90ce3da70b43 Initial load duke parents: diff changeset	465	}
90ce3da70b43 Initial load duke parents: diff changeset	466	return dest;
90ce3da70b43 Initial load duke parents: diff changeset	467	}
90ce3da70b43 Initial load duke parents: diff changeset	468	}

author	juh
	Tue, 30 Jul 2013 11:04:19 -0700
changeset 19069	1d9cb0d080e3
parent 5506	202f599c92aa
child 19440	c4414bc88602
permissions	-rw-r--r--