--- a/jdk/src/share/classes/sun/text/resources/CollationData_th.java Mon Oct 31 20:14:12 2011 -0700
+++ b/jdk/src/share/classes/sun/text/resources/CollationData_th.java Mon Oct 31 21:38:12 2011 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -103,18 +103,13 @@
//
// Normal vowels
//
+ + "< \u0E4D " // NIKHAHIT
+ "< \u0E30 " // SARA A
+ "< \u0E31 " // MAI HAN-AKAT
+ "< \u0E32 " // SARA AA
- // Normalizer will decompose this character to \u0e4d\u0e32. This is
- // a Bad Thing, because we want the separate characters to sort
- // differently than this individual one. Since there's no public way to
- // set the decomposition to be used when creating a collator, there's
- // no way around this right now.
- // It's best to go ahead and leave the character in, because it occurs
- // this way a lot more often than it occurs as separate characters.
- + "< \u0E33 " // SARA AM
+ // Normalizer will decompose this character to \u0e4d\u0e32.
+ + "< \u0E33 = \u0E4D\u0E32 " // SARA AM
+ "< \u0E34 " // SARA I
@@ -133,62 +128,58 @@
+ "< \u0E43 " // SARA AI MAIMUAN
+ "< \u0E44 " // SARA AI MAIMALAI
- //
- // Digits
- //
- + "< \u0E50 " // DIGIT ZERO
- + "< \u0E51 " // DIGIT ONE
- + "< \u0E52 " // DIGIT TWO
- + "< \u0E53 " // DIGIT THREE
- + "< \u0E54 " // DIGIT FOUR
- + "< \u0E55 " // DIGIT FIVE
- + "< \u0E56 " // DIGIT SIX
- + "< \u0E57 " // DIGIT SEVEN
- + "< \u0E58 " // DIGIT EIGHT
- + "< \u0E59 " // DIGIT NINE
+
+ //according to CLDR, it's after 0e44
+ + "< \u0E3A " // PHINTHU
+
+
- // Sorta tonal marks, but maybe not really
- + "< \u0E4D " // NIKHAHIT
+ // This rare symbol comes after all characters.
+ + "< \u0E45 " // LAKKHANGYAO
+ + "& \u0E32 , \0E45 " // According to CLDR, 0E45 is after 0E32 in tertiary level
+
+
+
- //
- // Thai symbols are supposed to sort "after white space".
- // I'm treating this as making them sort just after the normal Latin-1
- // symbols, which are in turn after the white space.
- //
- + "&'\u007d'" // right-brace
- + "< \u0E2F " // PAIYANNOI (ellipsis, abbreviation)
- + "< \u0E46 " // MAIYAMOK
- + "< \u0E4F " // FONGMAN
- + "< \u0E5A " // ANGKHANKHU
- + "< \u0E5B " // KHOMUT
- + "< \u0E3F " // CURRENCY SYMBOL BAHT
+ // Below are thai puntuation marks and Tonal(Accent) marks. According to CLDR 1.9 and
+ // ISO/IEC 14651, Annex C, C.2.1 Thai ordering principles, 0E2F to 0E5B are punctuaion marks that need to be ignored
+ // in the first three leveles. 0E4E to 0E4B are tonal marks to be compared in secondary level.
+ // In real implmentation, set puncutation marks in tertiary as there is no fourth level in Java.
+ // Set all these special marks after \u0301, the accute accent.
+ + "& \u0301 " // acute accent
- // These symbols are supposed to be "after all characters"
- + "< \u0E4E " // YAMAKKAN
+ //puncutation marks
+ + ", \u0E2F " // PAIYANNOI (ellipsis, abbreviation)
+ + ", \u0E46 " // MAIYAMOK
+ + ", \u0E4F " // FONGMAN
+ + ", \u0E5A " // ANGKHANKHU
+ + ", \u0E5B " // KHOMUT
- // This rare symbol also comes after all characters. But when it is
- // used in combination with RU and LU, the combination is treated as
- // a separate letter, ala "CH" sorting after "C" in traditional Spanish.
- + "< \u0E45 " // LAKKHANGYAO
- + "& \u0E24 < \u0E24\u0E45 "
- + "& \u0E26 < \u0E26\u0E45 "
-
- // Tonal marks are primary ignorables but are treated as secondary
- // differences
- + "& \u0301 " // acute accent
+ //tonal marks
+ + "; \u0E4E " // YAMAKKAN
+ + "; \u0E4C " // THANTHAKHAT
+ "; \u0E47 " // MAITAIKHU
+ "; \u0E48 " // MAI EK
+ "; \u0E49 " // MAI THO
+ "; \u0E4A " // MAI TRI
+ "; \u0E4B " // MAI CHATTAWA
- + "; \u0E4C " // THANTHAKHAT
+
+ //
+ // Digits are equal to their corresponding Arabic digits in the first level
+ //
+ + "& 0 = \u0E50 " // DIGIT ZERO
+ + "& 1 = \u0E51 " // DIGIT ONE
+ + "& 2 = \u0E52 " // DIGIT TWO
+ + "& 3 = \u0E53 " // DIGIT THREE
+ + "& 4 = \u0E54 " // DIGIT FOUR
+ + "& 5 = \u0E55 " // DIGIT FIVE
+ + "& 6 = \u0E56 " // DIGIT SIX
+ + "& 7 = \u0E57 " // DIGIT SEVEN
+ + "& 8 = \u0E58 " // DIGIT EIGHT
+ + "& 9 = \u0E59 " // DIGIT NINE
- // These are supposed to be ignored, so I'm treating them as controls
- + "& \u0001 "
- + "= \u0E3A " // PHINTHU
- + "= '.' " // period
- }
+ }
};
}
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/text/resources/Collator/Bug6755060.java Mon Oct 31 21:38:12 2011 -0700
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 6755060
+ * @summary updating collation tables for thai to make it consistent with CLDR 1.9
+ */
+
+import java.text.*;
+import java.util.*;
+
+public class Bug6755060 {
+
+ /********************************************************
+ *********************************************************/
+ public static void main (String[] args) {
+
+ Locale reservedLocale = Locale.getDefault();
+
+ try{
+
+ int errors=0;
+
+ Locale loc = new Locale ("th", "TH"); // Thai
+
+ Locale.setDefault (loc);
+ Collator col = Collator.getInstance ();
+
+ /*
+ * The original data "data" are the data to be sorted provided by the submitter of the CR.
+ * It's in correct order in accord with thai collation in CLDR 1.9. If we use old Java without this fix,
+ * the output order will be incorrect. Correct order will be turned into incorrect order.
+
+ * If fix is there, "data" after sorting will be unchanged, same as "sortedData". If fix is lost (regression),
+ * "data" after sorting will be changed, not as "sortedData".(not correct anymore)
+
+ * The submitter of the CR also gives a expected "sortedData" in the CR, but it's in accord with collation in CLDR 1.4.
+ * His data to be sorted are actually well sorted in accord with CLDR 1.9.
+ */
+
+ String[] data = {"\u0e01", "\u0e01\u0e2f", "\u0e01\u0e46", "\u0e01\u0e4f", "\u0e01\u0e5a", "\u0e01\u0e5b", "\u0e01\u0e4e", "\u0e01\u0e4c", "\u0e01\u0e48", "\u0e01\u0e01", "\u0e01\u0e4b\u0e01", "\u0e01\u0e4d", "\u0e01\u0e30", "\u0e01\u0e31\u0e01", "\u0e01\u0e32", "\u0e01\u0e33", "\u0e01\u0e34", "\u0e01\u0e35", "\u0e01\u0e36", "\u0e01\u0e37", "\u0e01\u0e38", "\u0e01\u0e39", "\u0e40\u0e01", "\u0e40\u0e01\u0e48", "\u0e40\u0e01\u0e49", "\u0e40\u0e01\u0e4b", "\u0e41\u0e01", "\u0e42\u0e01", "\u0e43\u0e01", "\u0e44\u0e01", "\u0e01\u0e3a", "\u0e24\u0e32", "\u0e24\u0e45", "\u0e40\u0e25", "\u0e44\u0e26"};
+
+ String[] sortedData = {"\u0e01", "\u0e01\u0e2f", "\u0e01\u0e46", "\u0e01\u0e4f", "\u0e01\u0e5a", "\u0e01\u0e5b", "\u0e01\u0e4e", "\u0e01\u0e4c", "\u0e01\u0e48", "\u0e01\u0e01", "\u0e01\u0e4b\u0e01", "\u0e01\u0e4d", "\u0e01\u0e30", "\u0e01\u0e31\u0e01", "\u0e01\u0e32", "\u0e01\u0e33", "\u0e01\u0e34", "\u0e01\u0e35", "\u0e01\u0e36", "\u0e01\u0e37", "\u0e01\u0e38", "\u0e01\u0e39", "\u0e40\u0e01", "\u0e40\u0e01\u0e48", "\u0e40\u0e01\u0e49", "\u0e40\u0e01\u0e4b", "\u0e41\u0e01", "\u0e42\u0e01", "\u0e43\u0e01", "\u0e44\u0e01", "\u0e01\u0e3a", "\u0e24\u0e32", "\u0e24\u0e45", "\u0e40\u0e25", "\u0e44\u0e26"};
+
+ Arrays.sort (data, col);
+
+ System.out.println ("Using " + loc.getDisplayName());
+ for (int i = 0; i < data.length; i++) {
+ System.out.println(data[i] + " : " + sortedData[i]);
+ if (sortedData[i].compareTo(data[i]) != 0) {
+ errors++;
+ }
+ }//end for
+
+ if (errors > 0){
+ StringBuffer expected = new StringBuffer(), actual = new StringBuffer();
+ expected.append(sortedData[0]);
+ actual.append(data[0]);
+
+ for (int i=1; i<data.length; i++) {
+ expected.append(",");
+ expected.append(sortedData[i]);
+
+ actual.append(",");
+ actual.append(data[i]);
+ }
+
+ String errmsg = "Error is found in collation testing in Thai\n" + "exepected order is: " + expected.toString() + "\n" + "actual order is: " + actual.toString() + "\n";
+
+ throw new RuntimeException(errmsg);
+ }
+ }finally{
+ // restore the reserved locale
+ Locale.setDefault(reservedLocale);
+ }
+
+ }//end main
+
+}//end class CollatorTest