jdk-sandbox: jdk/src/share/classes/sun/net/idn/Punycode.java@90ce3da70b43 (annotated)

2 90ce3da70b43 Initial load duke parents: diff changeset	1	/*
90ce3da70b43 Initial load duke parents: diff changeset	2	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load duke parents: diff changeset	3	*
90ce3da70b43 Initial load duke parents: diff changeset	4	* This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load duke parents: diff changeset	5	* under the terms of the GNU General Public License version 2 only, as
90ce3da70b43 Initial load duke parents: diff changeset	6	* published by the Free Software Foundation. Sun designates this
90ce3da70b43 Initial load duke parents: diff changeset	7	* particular file as subject to the "Classpath" exception as provided
90ce3da70b43 Initial load duke parents: diff changeset	8	* by Sun in the LICENSE file that accompanied this code.
90ce3da70b43 Initial load duke parents: diff changeset	9	*
90ce3da70b43 Initial load duke parents: diff changeset	10	* This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load duke parents: diff changeset	11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load duke parents: diff changeset	12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
90ce3da70b43 Initial load duke parents: diff changeset	13	* version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load duke parents: diff changeset	14	* accompanied this code).
90ce3da70b43 Initial load duke parents: diff changeset	15	*
90ce3da70b43 Initial load duke parents: diff changeset	16	* You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load duke parents: diff changeset	17	* 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load duke parents: diff changeset	18	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load duke parents: diff changeset	19	*
90ce3da70b43 Initial load duke parents: diff changeset	20	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
90ce3da70b43 Initial load duke parents: diff changeset	21	* CA 95054 USA or visit www.sun.com if you need additional information or
90ce3da70b43 Initial load duke parents: diff changeset	22	* have any questions.
90ce3da70b43 Initial load duke parents: diff changeset	23	*/
90ce3da70b43 Initial load duke parents: diff changeset	24	/*
90ce3da70b43 Initial load duke parents: diff changeset	25	*******************************************************************************
90ce3da70b43 Initial load duke parents: diff changeset	26	* Copyright (C) 2003-2004, International Business Machines Corporation and *
90ce3da70b43 Initial load duke parents: diff changeset	27	* others. All Rights Reserved. *
90ce3da70b43 Initial load duke parents: diff changeset	28	*******************************************************************************
90ce3da70b43 Initial load duke parents: diff changeset	29	*/
90ce3da70b43 Initial load duke parents: diff changeset	30	//
90ce3da70b43 Initial load duke parents: diff changeset	31	// CHANGELOG
90ce3da70b43 Initial load duke parents: diff changeset	32	// 2005-05-19 Edward Wang
90ce3da70b43 Initial load duke parents: diff changeset	33	// - copy this file from icu4jsrc_3_2/src/com/ibm/icu/text/Punycode.java
90ce3da70b43 Initial load duke parents: diff changeset	34	// - move from package com.ibm.icu.text to package sun.net.idn
90ce3da70b43 Initial load duke parents: diff changeset	35	// - use ParseException instead of StringPrepParseException
90ce3da70b43 Initial load duke parents: diff changeset	36	// 2007-08-14 Martin Buchholz
90ce3da70b43 Initial load duke parents: diff changeset	37	// - remove redundant casts
90ce3da70b43 Initial load duke parents: diff changeset	38	//
90ce3da70b43 Initial load duke parents: diff changeset	39	package sun.net.idn;
90ce3da70b43 Initial load duke parents: diff changeset	40
90ce3da70b43 Initial load duke parents: diff changeset	41	import java.text.ParseException;
90ce3da70b43 Initial load duke parents: diff changeset	42	import sun.text.normalizer.UCharacter;
90ce3da70b43 Initial load duke parents: diff changeset	43	import sun.text.normalizer.UTF16;
90ce3da70b43 Initial load duke parents: diff changeset	44
90ce3da70b43 Initial load duke parents: diff changeset	45	/**
90ce3da70b43 Initial load duke parents: diff changeset	46	* Ported code from ICU punycode.c
90ce3da70b43 Initial load duke parents: diff changeset	47	* @author ram
90ce3da70b43 Initial load duke parents: diff changeset	48	*/
90ce3da70b43 Initial load duke parents: diff changeset	49
90ce3da70b43 Initial load duke parents: diff changeset	50	/* Package Private class */
90ce3da70b43 Initial load duke parents: diff changeset	51	public final class Punycode {
90ce3da70b43 Initial load duke parents: diff changeset	52
90ce3da70b43 Initial load duke parents: diff changeset	53	/* Punycode parameters for Bootstring */
90ce3da70b43 Initial load duke parents: diff changeset	54	private static final int BASE = 36;
90ce3da70b43 Initial load duke parents: diff changeset	55	private static final int TMIN = 1;
90ce3da70b43 Initial load duke parents: diff changeset	56	private static final int TMAX = 26;
90ce3da70b43 Initial load duke parents: diff changeset	57	private static final int SKEW = 38;
90ce3da70b43 Initial load duke parents: diff changeset	58	private static final int DAMP = 700;
90ce3da70b43 Initial load duke parents: diff changeset	59	private static final int INITIAL_BIAS = 72;
90ce3da70b43 Initial load duke parents: diff changeset	60	private static final int INITIAL_N = 0x80;
90ce3da70b43 Initial load duke parents: diff changeset	61
90ce3da70b43 Initial load duke parents: diff changeset	62	/* "Basic" Unicode/ASCII code points */
90ce3da70b43 Initial load duke parents: diff changeset	63	private static final int HYPHEN = 0x2d;
90ce3da70b43 Initial load duke parents: diff changeset	64	private static final int DELIMITER = HYPHEN;
90ce3da70b43 Initial load duke parents: diff changeset	65
90ce3da70b43 Initial load duke parents: diff changeset	66	private static final int ZERO = 0x30;
90ce3da70b43 Initial load duke parents: diff changeset	67	private static final int NINE = 0x39;
90ce3da70b43 Initial load duke parents: diff changeset	68
90ce3da70b43 Initial load duke parents: diff changeset	69	private static final int SMALL_A = 0x61;
90ce3da70b43 Initial load duke parents: diff changeset	70	private static final int SMALL_Z = 0x7a;
90ce3da70b43 Initial load duke parents: diff changeset	71
90ce3da70b43 Initial load duke parents: diff changeset	72	private static final int CAPITAL_A = 0x41;
90ce3da70b43 Initial load duke parents: diff changeset	73	private static final int CAPITAL_Z = 0x5a;
90ce3da70b43 Initial load duke parents: diff changeset	74
90ce3da70b43 Initial load duke parents: diff changeset	75	// TODO: eliminate the 256 limitation
90ce3da70b43 Initial load duke parents: diff changeset	76	private static final int MAX_CP_COUNT = 256;
90ce3da70b43 Initial load duke parents: diff changeset	77
90ce3da70b43 Initial load duke parents: diff changeset	78	private static final int UINT_MAGIC = 0x80000000;
90ce3da70b43 Initial load duke parents: diff changeset	79	private static final long ULONG_MAGIC = 0x8000000000000000L;
90ce3da70b43 Initial load duke parents: diff changeset	80
90ce3da70b43 Initial load duke parents: diff changeset	81	private static int adaptBias(int delta, int length, boolean firstTime){
90ce3da70b43 Initial load duke parents: diff changeset	82	if(firstTime){
90ce3da70b43 Initial load duke parents: diff changeset	83	delta /=DAMP;
90ce3da70b43 Initial load duke parents: diff changeset	84	}else{
90ce3da70b43 Initial load duke parents: diff changeset	85	delta /= 2;
90ce3da70b43 Initial load duke parents: diff changeset	86	}
90ce3da70b43 Initial load duke parents: diff changeset	87	delta += delta/length;
90ce3da70b43 Initial load duke parents: diff changeset	88
90ce3da70b43 Initial load duke parents: diff changeset	89	int count=0;
90ce3da70b43 Initial load duke parents: diff changeset	90	for(; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) {
90ce3da70b43 Initial load duke parents: diff changeset	91	delta/=(BASE-TMIN);
90ce3da70b43 Initial load duke parents: diff changeset	92	}
90ce3da70b43 Initial load duke parents: diff changeset	93
90ce3da70b43 Initial load duke parents: diff changeset	94	return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
90ce3da70b43 Initial load duke parents: diff changeset	95	}
90ce3da70b43 Initial load duke parents: diff changeset	96
90ce3da70b43 Initial load duke parents: diff changeset	97	/**
90ce3da70b43 Initial load duke parents: diff changeset	98	* basicToDigit[] contains the numeric value of a basic code
90ce3da70b43 Initial load duke parents: diff changeset	99	* point (for use in representing integers) in the range 0 to
90ce3da70b43 Initial load duke parents: diff changeset	100	* BASE-1, or -1 if b is does not represent a value.
90ce3da70b43 Initial load duke parents: diff changeset	101	*/
90ce3da70b43 Initial load duke parents: diff changeset	102	static final int[] basicToDigit= new int[]{
90ce3da70b43 Initial load duke parents: diff changeset	103	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90ce3da70b43 Initial load duke parents: diff changeset	104	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90ce3da70b43 Initial load duke parents: diff changeset	105
90ce3da70b43 Initial load duke parents: diff changeset	106	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90ce3da70b43 Initial load duke parents: diff changeset	107	26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
90ce3da70b43 Initial load duke parents: diff changeset	108
90ce3da70b43 Initial load duke parents: diff changeset	109	-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
90ce3da70b43 Initial load duke parents: diff changeset	110	15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
90ce3da70b43 Initial load duke parents: diff changeset	111
90ce3da70b43 Initial load duke parents: diff changeset	112	-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
90ce3da70b43 Initial load duke parents: diff changeset	113	15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
90ce3da70b43 Initial load duke parents: diff changeset	114
90ce3da70b43 Initial load duke parents: diff changeset	115	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90ce3da70b43 Initial load duke parents: diff changeset	116	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90ce3da70b43 Initial load duke parents: diff changeset	117
90ce3da70b43 Initial load duke parents: diff changeset	118	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90ce3da70b43 Initial load duke parents: diff changeset	119	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90ce3da70b43 Initial load duke parents: diff changeset	120
90ce3da70b43 Initial load duke parents: diff changeset	121	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90ce3da70b43 Initial load duke parents: diff changeset	122	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90ce3da70b43 Initial load duke parents: diff changeset	123
90ce3da70b43 Initial load duke parents: diff changeset	124	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90ce3da70b43 Initial load duke parents: diff changeset	125	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
90ce3da70b43 Initial load duke parents: diff changeset	126	};
90ce3da70b43 Initial load duke parents: diff changeset	127
90ce3da70b43 Initial load duke parents: diff changeset	128	private static char asciiCaseMap(char b, boolean uppercase) {
90ce3da70b43 Initial load duke parents: diff changeset	129	if(uppercase) {
90ce3da70b43 Initial load duke parents: diff changeset	130	if(SMALL_A<=b && b<=SMALL_Z) {
90ce3da70b43 Initial load duke parents: diff changeset	131	b-=(SMALL_A-CAPITAL_A);
90ce3da70b43 Initial load duke parents: diff changeset	132	}
90ce3da70b43 Initial load duke parents: diff changeset	133	} else {
90ce3da70b43 Initial load duke parents: diff changeset	134	if(CAPITAL_A<=b && b<=CAPITAL_Z) {
90ce3da70b43 Initial load duke parents: diff changeset	135	b+=(SMALL_A-CAPITAL_A);
90ce3da70b43 Initial load duke parents: diff changeset	136	}
90ce3da70b43 Initial load duke parents: diff changeset	137	}
90ce3da70b43 Initial load duke parents: diff changeset	138	return b;
90ce3da70b43 Initial load duke parents: diff changeset	139	}
90ce3da70b43 Initial load duke parents: diff changeset	140
90ce3da70b43 Initial load duke parents: diff changeset	141	/**
90ce3da70b43 Initial load duke parents: diff changeset	142	* digitToBasic() returns the basic code point whose value
90ce3da70b43 Initial load duke parents: diff changeset	143	* (when used for representing integers) is d, which must be in the
90ce3da70b43 Initial load duke parents: diff changeset	144	* range 0 to BASE-1. The lowercase form is used unless the uppercase flag is
90ce3da70b43 Initial load duke parents: diff changeset	145	* nonzero, in which case the uppercase form is used.
90ce3da70b43 Initial load duke parents: diff changeset	146	*/
90ce3da70b43 Initial load duke parents: diff changeset	147	private static char digitToBasic(int digit, boolean uppercase) {
90ce3da70b43 Initial load duke parents: diff changeset	148	/* 0..25 map to ASCII a..z or A..Z */
90ce3da70b43 Initial load duke parents: diff changeset	149	/* 26..35 map to ASCII 0..9 */
90ce3da70b43 Initial load duke parents: diff changeset	150	if(digit<26) {
90ce3da70b43 Initial load duke parents: diff changeset	151	if(uppercase) {
90ce3da70b43 Initial load duke parents: diff changeset	152	return (char)(CAPITAL_A+digit);
90ce3da70b43 Initial load duke parents: diff changeset	153	} else {
90ce3da70b43 Initial load duke parents: diff changeset	154	return (char)(SMALL_A+digit);
90ce3da70b43 Initial load duke parents: diff changeset	155	}
90ce3da70b43 Initial load duke parents: diff changeset	156	} else {
90ce3da70b43 Initial load duke parents: diff changeset	157	return (char)((ZERO-26)+digit);
90ce3da70b43 Initial load duke parents: diff changeset	158	}
90ce3da70b43 Initial load duke parents: diff changeset	159	}
90ce3da70b43 Initial load duke parents: diff changeset	160	/**
90ce3da70b43 Initial load duke parents: diff changeset	161	* Converts Unicode to Punycode.
90ce3da70b43 Initial load duke parents: diff changeset	162	* The input string must not contain single, unpaired surrogates.
90ce3da70b43 Initial load duke parents: diff changeset	163	* The output will be represented as an array of ASCII code points.
90ce3da70b43 Initial load duke parents: diff changeset	164	*
90ce3da70b43 Initial load duke parents: diff changeset	165	* @param src
90ce3da70b43 Initial load duke parents: diff changeset	166	* @param caseFlags
90ce3da70b43 Initial load duke parents: diff changeset	167	* @return
90ce3da70b43 Initial load duke parents: diff changeset	168	* @throws ParseException
90ce3da70b43 Initial load duke parents: diff changeset	169	*/
90ce3da70b43 Initial load duke parents: diff changeset	170	public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws ParseException{
90ce3da70b43 Initial load duke parents: diff changeset	171
90ce3da70b43 Initial load duke parents: diff changeset	172	int[] cpBuffer = new int[MAX_CP_COUNT];
90ce3da70b43 Initial load duke parents: diff changeset	173	int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
90ce3da70b43 Initial load duke parents: diff changeset	174	char c, c2;
90ce3da70b43 Initial load duke parents: diff changeset	175	int srcLength = src.length();
90ce3da70b43 Initial load duke parents: diff changeset	176	int destCapacity = MAX_CP_COUNT;
90ce3da70b43 Initial load duke parents: diff changeset	177	char[] dest = new char[destCapacity];
90ce3da70b43 Initial load duke parents: diff changeset	178	StringBuffer result = new StringBuffer();
90ce3da70b43 Initial load duke parents: diff changeset	179	/*
90ce3da70b43 Initial load duke parents: diff changeset	180	* Handle the basic code points and
90ce3da70b43 Initial load duke parents: diff changeset	181	* convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
90ce3da70b43 Initial load duke parents: diff changeset	182	*/
90ce3da70b43 Initial load duke parents: diff changeset	183	srcCPCount=destLength=0;
90ce3da70b43 Initial load duke parents: diff changeset	184
90ce3da70b43 Initial load duke parents: diff changeset	185	for(j=0; j<srcLength; ++j) {
90ce3da70b43 Initial load duke parents: diff changeset	186	if(srcCPCount==MAX_CP_COUNT) {
90ce3da70b43 Initial load duke parents: diff changeset	187	/* too many input code points */
90ce3da70b43 Initial load duke parents: diff changeset	188	throw new IndexOutOfBoundsException();
90ce3da70b43 Initial load duke parents: diff changeset	189	}
90ce3da70b43 Initial load duke parents: diff changeset	190	c=src.charAt(j);
90ce3da70b43 Initial load duke parents: diff changeset	191	if(isBasic(c)) {
90ce3da70b43 Initial load duke parents: diff changeset	192	if(destLength<destCapacity) {
90ce3da70b43 Initial load duke parents: diff changeset	193	cpBuffer[srcCPCount++]=0;
90ce3da70b43 Initial load duke parents: diff changeset	194	dest[destLength]=
90ce3da70b43 Initial load duke parents: diff changeset	195	caseFlags!=null ?
90ce3da70b43 Initial load duke parents: diff changeset	196	asciiCaseMap(c, caseFlags[j]) :
90ce3da70b43 Initial load duke parents: diff changeset	197	c;
90ce3da70b43 Initial load duke parents: diff changeset	198	}
90ce3da70b43 Initial load duke parents: diff changeset	199	++destLength;
90ce3da70b43 Initial load duke parents: diff changeset	200	} else {
90ce3da70b43 Initial load duke parents: diff changeset	201	n=((caseFlags!=null && caseFlags[j])? 1 : 0)<<31L;
90ce3da70b43 Initial load duke parents: diff changeset	202	if(!UTF16.isSurrogate(c)) {
90ce3da70b43 Initial load duke parents: diff changeset	203	n\|=c;
90ce3da70b43 Initial load duke parents: diff changeset	204	} else if(UTF16.isLeadSurrogate(c) && (j+1)<srcLength && UTF16.isTrailSurrogate(c2=src.charAt(j+1))) {
90ce3da70b43 Initial load duke parents: diff changeset	205	++j;
90ce3da70b43 Initial load duke parents: diff changeset	206
90ce3da70b43 Initial load duke parents: diff changeset	207	n\|=UCharacter.getCodePoint(c, c2);
90ce3da70b43 Initial load duke parents: diff changeset	208	} else {
90ce3da70b43 Initial load duke parents: diff changeset	209	/* error: unmatched surrogate */
90ce3da70b43 Initial load duke parents: diff changeset	210	throw new ParseException("Illegal char found", -1);
90ce3da70b43 Initial load duke parents: diff changeset	211	}
90ce3da70b43 Initial load duke parents: diff changeset	212	cpBuffer[srcCPCount++]=n;
90ce3da70b43 Initial load duke parents: diff changeset	213	}
90ce3da70b43 Initial load duke parents: diff changeset	214	}
90ce3da70b43 Initial load duke parents: diff changeset	215
90ce3da70b43 Initial load duke parents: diff changeset	216	/* Finish the basic string - if it is not empty - with a delimiter. */
90ce3da70b43 Initial load duke parents: diff changeset	217	basicLength=destLength;
90ce3da70b43 Initial load duke parents: diff changeset	218	if(basicLength>0) {
90ce3da70b43 Initial load duke parents: diff changeset	219	if(destLength<destCapacity) {
90ce3da70b43 Initial load duke parents: diff changeset	220	dest[destLength]=DELIMITER;
90ce3da70b43 Initial load duke parents: diff changeset	221	}
90ce3da70b43 Initial load duke parents: diff changeset	222	++destLength;
90ce3da70b43 Initial load duke parents: diff changeset	223	}
90ce3da70b43 Initial load duke parents: diff changeset	224
90ce3da70b43 Initial load duke parents: diff changeset	225	/*
90ce3da70b43 Initial load duke parents: diff changeset	226	* handledCPCount is the number of code points that have been handled
90ce3da70b43 Initial load duke parents: diff changeset	227	* basicLength is the number of basic code points
90ce3da70b43 Initial load duke parents: diff changeset	228	* destLength is the number of chars that have been output
90ce3da70b43 Initial load duke parents: diff changeset	229	*/
90ce3da70b43 Initial load duke parents: diff changeset	230
90ce3da70b43 Initial load duke parents: diff changeset	231	/* Initialize the state: */
90ce3da70b43 Initial load duke parents: diff changeset	232	n=INITIAL_N;
90ce3da70b43 Initial load duke parents: diff changeset	233	delta=0;
90ce3da70b43 Initial load duke parents: diff changeset	234	bias=INITIAL_BIAS;
90ce3da70b43 Initial load duke parents: diff changeset	235
90ce3da70b43 Initial load duke parents: diff changeset	236	/* Main encoding loop: */
90ce3da70b43 Initial load duke parents: diff changeset	237	for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) {
90ce3da70b43 Initial load duke parents: diff changeset	238	/*
90ce3da70b43 Initial load duke parents: diff changeset	239	* All non-basic code points < n have been handled already.
90ce3da70b43 Initial load duke parents: diff changeset	240	* Find the next larger one:
90ce3da70b43 Initial load duke parents: diff changeset	241	*/
90ce3da70b43 Initial load duke parents: diff changeset	242	for(m=0x7fffffff, j=0; j<srcCPCount; ++j) {
90ce3da70b43 Initial load duke parents: diff changeset	243	q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
90ce3da70b43 Initial load duke parents: diff changeset	244	if(n<=q && q<m) {
90ce3da70b43 Initial load duke parents: diff changeset	245	m=q;
90ce3da70b43 Initial load duke parents: diff changeset	246	}
90ce3da70b43 Initial load duke parents: diff changeset	247	}
90ce3da70b43 Initial load duke parents: diff changeset	248
90ce3da70b43 Initial load duke parents: diff changeset	249	/*
90ce3da70b43 Initial load duke parents: diff changeset	250	* Increase delta enough to advance the decoder's
90ce3da70b43 Initial load duke parents: diff changeset	251	* <n,i> state to <m,0>, but guard against overflow:
90ce3da70b43 Initial load duke parents: diff changeset	252	*/
90ce3da70b43 Initial load duke parents: diff changeset	253	if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
90ce3da70b43 Initial load duke parents: diff changeset	254	throw new RuntimeException("Internal program error");
90ce3da70b43 Initial load duke parents: diff changeset	255	}
90ce3da70b43 Initial load duke parents: diff changeset	256	delta+=(m-n)*(handledCPCount+1);
90ce3da70b43 Initial load duke parents: diff changeset	257	n=m;
90ce3da70b43 Initial load duke parents: diff changeset	258
90ce3da70b43 Initial load duke parents: diff changeset	259	/* Encode a sequence of same code points n */
90ce3da70b43 Initial load duke parents: diff changeset	260	for(j=0; j<srcCPCount; ++j) {
90ce3da70b43 Initial load duke parents: diff changeset	261	q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
90ce3da70b43 Initial load duke parents: diff changeset	262	if(q<n) {
90ce3da70b43 Initial load duke parents: diff changeset	263	++delta;
90ce3da70b43 Initial load duke parents: diff changeset	264	} else if(q==n) {
90ce3da70b43 Initial load duke parents: diff changeset	265	/* Represent delta as a generalized variable-length integer: */
90ce3da70b43 Initial load duke parents: diff changeset	266	for(q=delta, k=BASE; /* no condition */; k+=BASE) {
90ce3da70b43 Initial load duke parents: diff changeset	267
90ce3da70b43 Initial load duke parents: diff changeset	268	/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
90ce3da70b43 Initial load duke parents: diff changeset	269
90ce3da70b43 Initial load duke parents: diff changeset	270	t=k-bias;
90ce3da70b43 Initial load duke parents: diff changeset	271	if(t<TMIN) {
90ce3da70b43 Initial load duke parents: diff changeset	272	t=TMIN;
90ce3da70b43 Initial load duke parents: diff changeset	273	} else if(t>TMAX) {
90ce3da70b43 Initial load duke parents: diff changeset	274	t=TMAX;
90ce3da70b43 Initial load duke parents: diff changeset	275	}
90ce3da70b43 Initial load duke parents: diff changeset	276	*/
90ce3da70b43 Initial load duke parents: diff changeset	277
90ce3da70b43 Initial load duke parents: diff changeset	278	t=k-bias;
90ce3da70b43 Initial load duke parents: diff changeset	279	if(t<TMIN) {
90ce3da70b43 Initial load duke parents: diff changeset	280	t=TMIN;
90ce3da70b43 Initial load duke parents: diff changeset	281	} else if(k>=(bias+TMAX)) {
90ce3da70b43 Initial load duke parents: diff changeset	282	t=TMAX;
90ce3da70b43 Initial load duke parents: diff changeset	283	}
90ce3da70b43 Initial load duke parents: diff changeset	284
90ce3da70b43 Initial load duke parents: diff changeset	285	if(q<t) {
90ce3da70b43 Initial load duke parents: diff changeset	286	break;
90ce3da70b43 Initial load duke parents: diff changeset	287	}
90ce3da70b43 Initial load duke parents: diff changeset	288
90ce3da70b43 Initial load duke parents: diff changeset	289	if(destLength<destCapacity) {
90ce3da70b43 Initial load duke parents: diff changeset	290	dest[destLength++]=digitToBasic(t+(q-t)%(BASE-t), false);
90ce3da70b43 Initial load duke parents: diff changeset	291	}
90ce3da70b43 Initial load duke parents: diff changeset	292	q=(q-t)/(BASE-t);
90ce3da70b43 Initial load duke parents: diff changeset	293	}
90ce3da70b43 Initial load duke parents: diff changeset	294
90ce3da70b43 Initial load duke parents: diff changeset	295	if(destLength<destCapacity) {
90ce3da70b43 Initial load duke parents: diff changeset	296	dest[destLength++]=digitToBasic(q, (cpBuffer[j]<0));
90ce3da70b43 Initial load duke parents: diff changeset	297	}
90ce3da70b43 Initial load duke parents: diff changeset	298	bias=adaptBias(delta, handledCPCount+1,(handledCPCount==basicLength));
90ce3da70b43 Initial load duke parents: diff changeset	299	delta=0;
90ce3da70b43 Initial load duke parents: diff changeset	300	++handledCPCount;
90ce3da70b43 Initial load duke parents: diff changeset	301	}
90ce3da70b43 Initial load duke parents: diff changeset	302	}
90ce3da70b43 Initial load duke parents: diff changeset	303
90ce3da70b43 Initial load duke parents: diff changeset	304	++delta;
90ce3da70b43 Initial load duke parents: diff changeset	305	++n;
90ce3da70b43 Initial load duke parents: diff changeset	306	}
90ce3da70b43 Initial load duke parents: diff changeset	307
90ce3da70b43 Initial load duke parents: diff changeset	308	return result.append(dest, 0, destLength);
90ce3da70b43 Initial load duke parents: diff changeset	309	}
90ce3da70b43 Initial load duke parents: diff changeset	310
90ce3da70b43 Initial load duke parents: diff changeset	311	private static boolean isBasic(int ch){
90ce3da70b43 Initial load duke parents: diff changeset	312	return (ch < INITIAL_N);
90ce3da70b43 Initial load duke parents: diff changeset	313	}
90ce3da70b43 Initial load duke parents: diff changeset	314
90ce3da70b43 Initial load duke parents: diff changeset	315	private static boolean isBasicUpperCase(int ch){
90ce3da70b43 Initial load duke parents: diff changeset	316	return( CAPITAL_A <= ch && ch <= CAPITAL_Z);
90ce3da70b43 Initial load duke parents: diff changeset	317	}
90ce3da70b43 Initial load duke parents: diff changeset	318
90ce3da70b43 Initial load duke parents: diff changeset	319	private static boolean isSurrogate(int ch){
90ce3da70b43 Initial load duke parents: diff changeset	320	return (((ch)&0xfffff800)==0xd800);
90ce3da70b43 Initial load duke parents: diff changeset	321	}
90ce3da70b43 Initial load duke parents: diff changeset	322	/**
90ce3da70b43 Initial load duke parents: diff changeset	323	* Converts Punycode to Unicode.
90ce3da70b43 Initial load duke parents: diff changeset	324	* The Unicode string will be at most as long as the Punycode string.
90ce3da70b43 Initial load duke parents: diff changeset	325	*
90ce3da70b43 Initial load duke parents: diff changeset	326	* @param src
90ce3da70b43 Initial load duke parents: diff changeset	327	* @param caseFlags
90ce3da70b43 Initial load duke parents: diff changeset	328	* @return
90ce3da70b43 Initial load duke parents: diff changeset	329	* @throws ParseException
90ce3da70b43 Initial load duke parents: diff changeset	330	*/
90ce3da70b43 Initial load duke parents: diff changeset	331	public static StringBuffer decode(StringBuffer src, boolean[] caseFlags)
90ce3da70b43 Initial load duke parents: diff changeset	332	throws ParseException{
90ce3da70b43 Initial load duke parents: diff changeset	333	int srcLength = src.length();
90ce3da70b43 Initial load duke parents: diff changeset	334	StringBuffer result = new StringBuffer();
90ce3da70b43 Initial load duke parents: diff changeset	335	int n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
90ce3da70b43 Initial load duke parents: diff changeset	336	destCPCount, firstSupplementaryIndex, cpLength;
90ce3da70b43 Initial load duke parents: diff changeset	337	char b;
90ce3da70b43 Initial load duke parents: diff changeset	338	int destCapacity = MAX_CP_COUNT;
90ce3da70b43 Initial load duke parents: diff changeset	339	char[] dest = new char[destCapacity];
90ce3da70b43 Initial load duke parents: diff changeset	340
90ce3da70b43 Initial load duke parents: diff changeset	341	/*
90ce3da70b43 Initial load duke parents: diff changeset	342	* Handle the basic code points:
90ce3da70b43 Initial load duke parents: diff changeset	343	* Let basicLength be the number of input code points
90ce3da70b43 Initial load duke parents: diff changeset	344	* before the last delimiter, or 0 if there is none,
90ce3da70b43 Initial load duke parents: diff changeset	345	* then copy the first basicLength code points to the output.
90ce3da70b43 Initial load duke parents: diff changeset	346	*
90ce3da70b43 Initial load duke parents: diff changeset	347	* The two following loops iterate backward.
90ce3da70b43 Initial load duke parents: diff changeset	348	*/
90ce3da70b43 Initial load duke parents: diff changeset	349	for(j=srcLength; j>0;) {
90ce3da70b43 Initial load duke parents: diff changeset	350	if(src.charAt(--j)==DELIMITER) {
90ce3da70b43 Initial load duke parents: diff changeset	351	break;
90ce3da70b43 Initial load duke parents: diff changeset	352	}
90ce3da70b43 Initial load duke parents: diff changeset	353	}
90ce3da70b43 Initial load duke parents: diff changeset	354	destLength=basicLength=destCPCount=j;
90ce3da70b43 Initial load duke parents: diff changeset	355
90ce3da70b43 Initial load duke parents: diff changeset	356	while(j>0) {
90ce3da70b43 Initial load duke parents: diff changeset	357	b=src.charAt(--j);
90ce3da70b43 Initial load duke parents: diff changeset	358	if(!isBasic(b)) {
90ce3da70b43 Initial load duke parents: diff changeset	359	throw new ParseException("Illegal char found", -1);
90ce3da70b43 Initial load duke parents: diff changeset	360	}
90ce3da70b43 Initial load duke parents: diff changeset	361
90ce3da70b43 Initial load duke parents: diff changeset	362	if(j<destCapacity) {
90ce3da70b43 Initial load duke parents: diff changeset	363	dest[j]= b;
90ce3da70b43 Initial load duke parents: diff changeset	364
90ce3da70b43 Initial load duke parents: diff changeset	365	if(caseFlags!=null) {
90ce3da70b43 Initial load duke parents: diff changeset	366	caseFlags[j]=isBasicUpperCase(b);
90ce3da70b43 Initial load duke parents: diff changeset	367	}
90ce3da70b43 Initial load duke parents: diff changeset	368	}
90ce3da70b43 Initial load duke parents: diff changeset	369	}
90ce3da70b43 Initial load duke parents: diff changeset	370
90ce3da70b43 Initial load duke parents: diff changeset	371	/* Initialize the state: */
90ce3da70b43 Initial load duke parents: diff changeset	372	n=INITIAL_N;
90ce3da70b43 Initial load duke parents: diff changeset	373	i=0;
90ce3da70b43 Initial load duke parents: diff changeset	374	bias=INITIAL_BIAS;
90ce3da70b43 Initial load duke parents: diff changeset	375	firstSupplementaryIndex=1000000000;
90ce3da70b43 Initial load duke parents: diff changeset	376
90ce3da70b43 Initial load duke parents: diff changeset	377	/*
90ce3da70b43 Initial load duke parents: diff changeset	378	* Main decoding loop:
90ce3da70b43 Initial load duke parents: diff changeset	379	* Start just after the last delimiter if any
90ce3da70b43 Initial load duke parents: diff changeset	380	* basic code points were copied; start at the beginning otherwise.
90ce3da70b43 Initial load duke parents: diff changeset	381	*/
90ce3da70b43 Initial load duke parents: diff changeset	382	for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) {
90ce3da70b43 Initial load duke parents: diff changeset	383	/*
90ce3da70b43 Initial load duke parents: diff changeset	384	* in is the index of the next character to be consumed, and
90ce3da70b43 Initial load duke parents: diff changeset	385	* destCPCount is the number of code points in the output array.
90ce3da70b43 Initial load duke parents: diff changeset	386	*
90ce3da70b43 Initial load duke parents: diff changeset	387	* Decode a generalized variable-length integer into delta,
90ce3da70b43 Initial load duke parents: diff changeset	388	* which gets added to i. The overflow checking is easier
90ce3da70b43 Initial load duke parents: diff changeset	389	* if we increase i as we go, then subtract off its starting
90ce3da70b43 Initial load duke parents: diff changeset	390	* value at the end to obtain delta.
90ce3da70b43 Initial load duke parents: diff changeset	391	*/
90ce3da70b43 Initial load duke parents: diff changeset	392	for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) {
90ce3da70b43 Initial load duke parents: diff changeset	393	if(in>=srcLength) {
90ce3da70b43 Initial load duke parents: diff changeset	394	throw new ParseException("Illegal char found", -1);
90ce3da70b43 Initial load duke parents: diff changeset	395	}
90ce3da70b43 Initial load duke parents: diff changeset	396
90ce3da70b43 Initial load duke parents: diff changeset	397	digit=basicToDigit[(byte)src.charAt(in++)];
90ce3da70b43 Initial load duke parents: diff changeset	398	if(digit<0) {
90ce3da70b43 Initial load duke parents: diff changeset	399	throw new ParseException("Invalid char found", -1);
90ce3da70b43 Initial load duke parents: diff changeset	400	}
90ce3da70b43 Initial load duke parents: diff changeset	401	if(digit>(0x7fffffff-i)/w) {
90ce3da70b43 Initial load duke parents: diff changeset	402	/* integer overflow */
90ce3da70b43 Initial load duke parents: diff changeset	403	throw new ParseException("Illegal char found", -1);
90ce3da70b43 Initial load duke parents: diff changeset	404	}
90ce3da70b43 Initial load duke parents: diff changeset	405
90ce3da70b43 Initial load duke parents: diff changeset	406	i+=digit*w;
90ce3da70b43 Initial load duke parents: diff changeset	407	t=k-bias;
90ce3da70b43 Initial load duke parents: diff changeset	408	if(t<TMIN) {
90ce3da70b43 Initial load duke parents: diff changeset	409	t=TMIN;
90ce3da70b43 Initial load duke parents: diff changeset	410	} else if(k>=(bias+TMAX)) {
90ce3da70b43 Initial load duke parents: diff changeset	411	t=TMAX;
90ce3da70b43 Initial load duke parents: diff changeset	412	}
90ce3da70b43 Initial load duke parents: diff changeset	413	if(digit<t) {
90ce3da70b43 Initial load duke parents: diff changeset	414	break;
90ce3da70b43 Initial load duke parents: diff changeset	415	}
90ce3da70b43 Initial load duke parents: diff changeset	416
90ce3da70b43 Initial load duke parents: diff changeset	417	if(w>0x7fffffff/(BASE-t)) {
90ce3da70b43 Initial load duke parents: diff changeset	418	/* integer overflow */
90ce3da70b43 Initial load duke parents: diff changeset	419	throw new ParseException("Illegal char found", -1);
90ce3da70b43 Initial load duke parents: diff changeset	420	}
90ce3da70b43 Initial load duke parents: diff changeset	421	w*=BASE-t;
90ce3da70b43 Initial load duke parents: diff changeset	422	}
90ce3da70b43 Initial load duke parents: diff changeset	423
90ce3da70b43 Initial load duke parents: diff changeset	424	/*
90ce3da70b43 Initial load duke parents: diff changeset	425	* Modification from sample code:
90ce3da70b43 Initial load duke parents: diff changeset	426	* Increments destCPCount here,
90ce3da70b43 Initial load duke parents: diff changeset	427	* where needed instead of in for() loop tail.
90ce3da70b43 Initial load duke parents: diff changeset	428	*/
90ce3da70b43 Initial load duke parents: diff changeset	429	++destCPCount;
90ce3da70b43 Initial load duke parents: diff changeset	430	bias=adaptBias(i-oldi, destCPCount, (oldi==0));
90ce3da70b43 Initial load duke parents: diff changeset	431
90ce3da70b43 Initial load duke parents: diff changeset	432	/*
90ce3da70b43 Initial load duke parents: diff changeset	433	* i was supposed to wrap around from (incremented) destCPCount to 0,
90ce3da70b43 Initial load duke parents: diff changeset	434	* incrementing n each time, so we'll fix that now:
90ce3da70b43 Initial load duke parents: diff changeset	435	*/
90ce3da70b43 Initial load duke parents: diff changeset	436	if(i/destCPCount>(0x7fffffff-n)) {
90ce3da70b43 Initial load duke parents: diff changeset	437	/* integer overflow */
90ce3da70b43 Initial load duke parents: diff changeset	438	throw new ParseException("Illegal char found", -1);
90ce3da70b43 Initial load duke parents: diff changeset	439	}
90ce3da70b43 Initial load duke parents: diff changeset	440
90ce3da70b43 Initial load duke parents: diff changeset	441	n+=i/destCPCount;
90ce3da70b43 Initial load duke parents: diff changeset	442	i%=destCPCount;
90ce3da70b43 Initial load duke parents: diff changeset	443	/* not needed for Punycode: */
90ce3da70b43 Initial load duke parents: diff changeset	444	/* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
90ce3da70b43 Initial load duke parents: diff changeset	445
90ce3da70b43 Initial load duke parents: diff changeset	446	if(n>0x10ffff \|\| isSurrogate(n)) {
90ce3da70b43 Initial load duke parents: diff changeset	447	/* Unicode code point overflow */
90ce3da70b43 Initial load duke parents: diff changeset	448	throw new ParseException("Illegal char found", -1);
90ce3da70b43 Initial load duke parents: diff changeset	449	}
90ce3da70b43 Initial load duke parents: diff changeset	450
90ce3da70b43 Initial load duke parents: diff changeset	451	/* Insert n at position i of the output: */
90ce3da70b43 Initial load duke parents: diff changeset	452	cpLength=UTF16.getCharCount(n);
90ce3da70b43 Initial load duke parents: diff changeset	453	if((destLength+cpLength)<destCapacity) {
90ce3da70b43 Initial load duke parents: diff changeset	454	int codeUnitIndex;
90ce3da70b43 Initial load duke parents: diff changeset	455
90ce3da70b43 Initial load duke parents: diff changeset	456	/*
90ce3da70b43 Initial load duke parents: diff changeset	457	* Handle indexes when supplementary code points are present.
90ce3da70b43 Initial load duke parents: diff changeset	458	*
90ce3da70b43 Initial load duke parents: diff changeset	459	* In almost all cases, there will be only BMP code points before i
90ce3da70b43 Initial load duke parents: diff changeset	460	* and even in the entire string.
90ce3da70b43 Initial load duke parents: diff changeset	461	* This is handled with the same efficiency as with UTF-32.
90ce3da70b43 Initial load duke parents: diff changeset	462	*
90ce3da70b43 Initial load duke parents: diff changeset	463	* Only the rare cases with supplementary code points are handled
90ce3da70b43 Initial load duke parents: diff changeset	464	* more slowly - but not too bad since this is an insertion anyway.
90ce3da70b43 Initial load duke parents: diff changeset	465	*/
90ce3da70b43 Initial load duke parents: diff changeset	466	if(i<=firstSupplementaryIndex) {
90ce3da70b43 Initial load duke parents: diff changeset	467	codeUnitIndex=i;
90ce3da70b43 Initial load duke parents: diff changeset	468	if(cpLength>1) {
90ce3da70b43 Initial load duke parents: diff changeset	469	firstSupplementaryIndex=codeUnitIndex;
90ce3da70b43 Initial load duke parents: diff changeset	470	} else {
90ce3da70b43 Initial load duke parents: diff changeset	471	++firstSupplementaryIndex;
90ce3da70b43 Initial load duke parents: diff changeset	472	}
90ce3da70b43 Initial load duke parents: diff changeset	473	} else {
90ce3da70b43 Initial load duke parents: diff changeset	474	codeUnitIndex=firstSupplementaryIndex;
90ce3da70b43 Initial load duke parents: diff changeset	475	codeUnitIndex=UTF16.moveCodePointOffset(dest, 0, destLength, codeUnitIndex, i-codeUnitIndex);
90ce3da70b43 Initial load duke parents: diff changeset	476	}
90ce3da70b43 Initial load duke parents: diff changeset	477
90ce3da70b43 Initial load duke parents: diff changeset	478	/* use the UChar index codeUnitIndex instead of the code point index i */
90ce3da70b43 Initial load duke parents: diff changeset	479	if(codeUnitIndex<destLength) {
90ce3da70b43 Initial load duke parents: diff changeset	480	System.arraycopy(dest, codeUnitIndex,
90ce3da70b43 Initial load duke parents: diff changeset	481	dest, codeUnitIndex+cpLength,
90ce3da70b43 Initial load duke parents: diff changeset	482	(destLength-codeUnitIndex));
90ce3da70b43 Initial load duke parents: diff changeset	483	if(caseFlags!=null) {
90ce3da70b43 Initial load duke parents: diff changeset	484	System.arraycopy(caseFlags, codeUnitIndex,
90ce3da70b43 Initial load duke parents: diff changeset	485	caseFlags, codeUnitIndex+cpLength,
90ce3da70b43 Initial load duke parents: diff changeset	486	destLength-codeUnitIndex);
90ce3da70b43 Initial load duke parents: diff changeset	487	}
90ce3da70b43 Initial load duke parents: diff changeset	488	}
90ce3da70b43 Initial load duke parents: diff changeset	489	if(cpLength==1) {
90ce3da70b43 Initial load duke parents: diff changeset	490	/* BMP, insert one code unit */
90ce3da70b43 Initial load duke parents: diff changeset	491	dest[codeUnitIndex]=(char)n;
90ce3da70b43 Initial load duke parents: diff changeset	492	} else {
90ce3da70b43 Initial load duke parents: diff changeset	493	/* supplementary character, insert two code units */
90ce3da70b43 Initial load duke parents: diff changeset	494	dest[codeUnitIndex]=UTF16.getLeadSurrogate(n);
90ce3da70b43 Initial load duke parents: diff changeset	495	dest[codeUnitIndex+1]=UTF16.getTrailSurrogate(n);
90ce3da70b43 Initial load duke parents: diff changeset	496	}
90ce3da70b43 Initial load duke parents: diff changeset	497	if(caseFlags!=null) {
90ce3da70b43 Initial load duke parents: diff changeset	498	/* Case of last character determines uppercase flag: */
90ce3da70b43 Initial load duke parents: diff changeset	499	caseFlags[codeUnitIndex]=isBasicUpperCase(src.charAt(in-1));
90ce3da70b43 Initial load duke parents: diff changeset	500	if(cpLength==2) {
90ce3da70b43 Initial load duke parents: diff changeset	501	caseFlags[codeUnitIndex+1]=false;
90ce3da70b43 Initial load duke parents: diff changeset	502	}
90ce3da70b43 Initial load duke parents: diff changeset	503	}
90ce3da70b43 Initial load duke parents: diff changeset	504	}
90ce3da70b43 Initial load duke parents: diff changeset	505	destLength+=cpLength;
90ce3da70b43 Initial load duke parents: diff changeset	506	++i;
90ce3da70b43 Initial load duke parents: diff changeset	507	}
90ce3da70b43 Initial load duke parents: diff changeset	508	result.append(dest, 0, destLength);
90ce3da70b43 Initial load duke parents: diff changeset	509	return result;
90ce3da70b43 Initial load duke parents: diff changeset	510	}
90ce3da70b43 Initial load duke parents: diff changeset	511	}

author	duke
	Sat, 01 Dec 2007 00:00:00 +0000
changeset 2	90ce3da70b43
child 5506	202f599c92aa
permissions	-rw-r--r--