author | martin |
Tue, 15 Sep 2015 21:56:04 -0700 | |
changeset 32649 | 2ee9017c7597 |
parent 25859 | 3317bb8137f4 |
permissions | -rw-r--r-- |
2 | 1 |
/* |
13583 | 2 |
* Copyright (c) 1996, 2012, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
/* |
|
27 |
* (C) Copyright Taligent, Inc. 1996,1997 - All Rights Reserved |
|
28 |
* (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved |
|
29 |
* |
|
30 |
* The original version of this source code and documentation is copyrighted |
|
31 |
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These |
|
32 |
* materials are provided under terms of a License Agreement between Taligent |
|
33 |
* and Sun. This technology is protected by multiple US and International |
|
34 |
* patents. This notice and attribution to Taligent may not be removed. |
|
35 |
* Taligent is a registered trademark of Taligent, Inc. |
|
36 |
* |
|
37 |
*/ |
|
38 |
||
13583 | 39 |
package sun.util.locale.provider; |
2 | 40 |
/** |
41 |
* CollationRules contains the default en_US collation rules as a base |
|
42 |
* for building other collation tables. |
|
43 |
* <p>Note that decompositions are done before these rules are used, |
|
44 |
* so they do not have to contain accented characters, such as A-grave. |
|
45 |
* @see RuleBasedCollator |
|
46 |
* @see LocaleElements |
|
47 |
* @author Helena Shih, Mark Davis |
|
48 |
*/ |
|
49 |
final class CollationRules { |
|
32649
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
25859
diff
changeset
|
50 |
static final String DEFAULTRULES = |
2 | 51 |
"" // no FRENCH accent order by default, add in French Delta |
52 |
// IGNORABLES (up to first < character) |
|
53 |
// COMPLETELY IGNORE format characters |
|
54 |
+ "='\u200B'=\u200C=\u200D=\u200E=\u200F" |
|
55 |
// Control Characters |
|
56 |
+ "=\u0000 =\u0001 =\u0002 =\u0003 =\u0004" //null, .. eot |
|
57 |
+ "=\u0005 =\u0006 =\u0007 =\u0008 ='\u0009'" //enq, ... |
|
58 |
+ "='\u000b' =\u000e" //vt,, so |
|
59 |
+ "=\u000f ='\u0010' =\u0011 =\u0012 =\u0013" //si, dle, dc1, dc2, dc3 |
|
60 |
+ "=\u0014 =\u0015 =\u0016 =\u0017 =\u0018" //dc4, nak, syn, etb, can |
|
61 |
+ "=\u0019 =\u001a =\u001b =\u001c =\u001d" //em, sub, esc, fs, gs |
|
62 |
+ "=\u001e =\u001f =\u007f" //rs, us, del |
|
63 |
//....then the C1 Latin 1 reserved control codes |
|
64 |
+ "=\u0080 =\u0081 =\u0082 =\u0083 =\u0084 =\u0085" |
|
65 |
+ "=\u0086 =\u0087 =\u0088 =\u0089 =\u008a =\u008b" |
|
66 |
+ "=\u008c =\u008d =\u008e =\u008f =\u0090 =\u0091" |
|
67 |
+ "=\u0092 =\u0093 =\u0094 =\u0095 =\u0096 =\u0097" |
|
68 |
+ "=\u0098 =\u0099 =\u009a =\u009b =\u009c =\u009d" |
|
69 |
+ "=\u009e =\u009f" |
|
70 |
// IGNORE except for secondary, tertiary difference |
|
71 |
// Spaces |
|
72 |
+ ";'\u0020';'\u00A0'" // spaces |
|
73 |
+ ";'\u2000';'\u2001';'\u2002';'\u2003';'\u2004'" // spaces |
|
74 |
+ ";'\u2005';'\u2006';'\u2007';'\u2008';'\u2009'" // spaces |
|
75 |
+ ";'\u200A';'\u3000';'\uFEFF'" // spaces |
|
76 |
+ ";'\r' ;'\t' ;'\n';'\f';'\u000b'" // whitespace |
|
77 |
||
78 |
// Non-spacing accents |
|
79 |
||
80 |
+ ";\u0301" // non-spacing acute accent |
|
81 |
+ ";\u0300" // non-spacing grave accent |
|
82 |
+ ";\u0306" // non-spacing breve accent |
|
83 |
+ ";\u0302" // non-spacing circumflex accent |
|
84 |
+ ";\u030c" // non-spacing caron/hacek accent |
|
85 |
+ ";\u030a" // non-spacing ring above accent |
|
86 |
+ ";\u030d" // non-spacing vertical line above |
|
87 |
+ ";\u0308" // non-spacing diaeresis accent |
|
88 |
+ ";\u030b" // non-spacing double acute accent |
|
89 |
+ ";\u0303" // non-spacing tilde accent |
|
90 |
+ ";\u0307" // non-spacing dot above/overdot accent |
|
91 |
+ ";\u0304" // non-spacing macron accent |
|
92 |
+ ";\u0337" // non-spacing short slash overlay (overstruck diacritic) |
|
93 |
+ ";\u0327" // non-spacing cedilla accent |
|
94 |
+ ";\u0328" // non-spacing ogonek accent |
|
95 |
+ ";\u0323" // non-spacing dot-below/underdot accent |
|
96 |
+ ";\u0332" // non-spacing underscore/underline accent |
|
97 |
// with the rest of the general diacritical marks in binary order |
|
98 |
+ ";\u0305" // non-spacing overscore/overline |
|
99 |
+ ";\u0309" // non-spacing hook above |
|
100 |
+ ";\u030e" // non-spacing double vertical line above |
|
101 |
+ ";\u030f" // non-spacing double grave |
|
102 |
+ ";\u0310" // non-spacing chandrabindu |
|
103 |
+ ";\u0311" // non-spacing inverted breve |
|
104 |
+ ";\u0312" // non-spacing turned comma above/cedilla above |
|
105 |
+ ";\u0313" // non-spacing comma above |
|
106 |
+ ";\u0314" // non-spacing reversed comma above |
|
107 |
+ ";\u0315" // non-spacing comma above right |
|
108 |
+ ";\u0316" // non-spacing grave below |
|
109 |
+ ";\u0317" // non-spacing acute below |
|
110 |
+ ";\u0318" // non-spacing left tack below |
|
111 |
+ ";\u0319" // non-spacing tack below |
|
112 |
+ ";\u031a" // non-spacing left angle above |
|
113 |
+ ";\u031b" // non-spacing horn |
|
114 |
+ ";\u031c" // non-spacing left half ring below |
|
115 |
+ ";\u031d" // non-spacing up tack below |
|
116 |
+ ";\u031e" // non-spacing down tack below |
|
117 |
+ ";\u031f" // non-spacing plus sign below |
|
118 |
+ ";\u0320" // non-spacing minus sign below |
|
119 |
+ ";\u0321" // non-spacing palatalized hook below |
|
120 |
+ ";\u0322" // non-spacing retroflex hook below |
|
121 |
+ ";\u0324" // non-spacing double dot below |
|
122 |
+ ";\u0325" // non-spacing ring below |
|
123 |
+ ";\u0326" // non-spacing comma below |
|
124 |
+ ";\u0329" // non-spacing vertical line below |
|
125 |
+ ";\u032a" // non-spacing bridge below |
|
126 |
+ ";\u032b" // non-spacing inverted double arch below |
|
127 |
+ ";\u032c" // non-spacing hacek below |
|
128 |
+ ";\u032d" // non-spacing circumflex below |
|
129 |
+ ";\u032e" // non-spacing breve below |
|
130 |
+ ";\u032f" // non-spacing inverted breve below |
|
131 |
+ ";\u0330" // non-spacing tilde below |
|
132 |
+ ";\u0331" // non-spacing macron below |
|
133 |
+ ";\u0333" // non-spacing double underscore |
|
134 |
+ ";\u0334" // non-spacing tilde overlay |
|
135 |
+ ";\u0335" // non-spacing short bar overlay |
|
136 |
+ ";\u0336" // non-spacing long bar overlay |
|
137 |
+ ";\u0338" // non-spacing long slash overlay |
|
138 |
+ ";\u0339" // non-spacing right half ring below |
|
139 |
+ ";\u033a" // non-spacing inverted bridge below |
|
140 |
+ ";\u033b" // non-spacing square below |
|
141 |
+ ";\u033c" // non-spacing seagull below |
|
142 |
+ ";\u033d" // non-spacing x above |
|
143 |
+ ";\u033e" // non-spacing vertical tilde |
|
144 |
+ ";\u033f" // non-spacing double overscore |
|
145 |
//+ ";\u0340" // non-spacing grave tone mark == \u0300 |
|
146 |
//+ ";\u0341" // non-spacing acute tone mark == \u0301 |
|
147 |
+ ";\u0342;" |
|
148 |
//+ "\u0343;" // == \u0313 |
|
149 |
+ "\u0344;\u0345;\u0360;\u0361" // newer |
|
150 |
+ ";\u0483;\u0484;\u0485;\u0486" // Cyrillic accents |
|
151 |
||
152 |
+ ";\u20D0;\u20D1;\u20D2" // symbol accents |
|
153 |
+ ";\u20D3;\u20D4;\u20D5" // symbol accents |
|
154 |
+ ";\u20D6;\u20D7;\u20D8" // symbol accents |
|
155 |
+ ";\u20D9;\u20DA;\u20DB" // symbol accents |
|
156 |
+ ";\u20DC;\u20DD;\u20DE" // symbol accents |
|
157 |
+ ";\u20DF;\u20E0;\u20E1" // symbol accents |
|
158 |
||
159 |
+ ",'\u002D';\u00AD" // dashes |
|
160 |
+ ";\u2010;\u2011;\u2012" // dashes |
|
161 |
+ ";\u2013;\u2014;\u2015" // dashes |
|
162 |
+ ";\u2212" // dashes |
|
163 |
||
164 |
// other punctuation |
|
165 |
||
166 |
+ "<'\u005f'" // underline/underscore (spacing) |
|
167 |
+ "<\u00af" // overline or macron (spacing) |
|
168 |
+ "<'\u002c'" // comma (spacing) |
|
169 |
+ "<'\u003b'" // semicolon |
|
170 |
+ "<'\u003a'" // colon |
|
171 |
+ "<'\u0021'" // exclamation point |
|
172 |
+ "<\u00a1" // inverted exclamation point |
|
173 |
+ "<'\u003f'" // question mark |
|
174 |
+ "<\u00bf" // inverted question mark |
|
175 |
+ "<'\u002f'" // slash |
|
176 |
+ "<'\u002e'" // period/full stop |
|
177 |
+ "<\u00b4" // acute accent (spacing) |
|
178 |
+ "<'\u0060'" // grave accent (spacing) |
|
179 |
+ "<'\u005e'" // circumflex accent (spacing) |
|
180 |
+ "<\u00a8" // diaresis/umlaut accent (spacing) |
|
181 |
+ "<'\u007e'" // tilde accent (spacing) |
|
182 |
+ "<\u00b7" // middle dot (spacing) |
|
183 |
+ "<\u00b8" // cedilla accent (spacing) |
|
184 |
+ "<'\u0027'" // apostrophe |
|
185 |
+ "<'\"'" // quotation marks |
|
186 |
+ "<\u00ab" // left angle quotes |
|
187 |
+ "<\u00bb" // right angle quotes |
|
188 |
+ "<'\u0028'" // left parenthesis |
|
189 |
+ "<'\u0029'" // right parenthesis |
|
190 |
+ "<'\u005b'" // left bracket |
|
191 |
+ "<'\u005d'" // right bracket |
|
192 |
+ "<'\u007b'" // left brace |
|
193 |
+ "<'\u007d'" // right brace |
|
194 |
+ "<\u00a7" // section symbol |
|
195 |
+ "<\u00b6" // paragraph symbol |
|
196 |
+ "<\u00a9" // copyright symbol |
|
197 |
+ "<\u00ae" // registered trademark symbol |
|
198 |
+ "<'\u0040'" // at sign |
|
199 |
+ "<\u00a4" // international currency symbol |
|
200 |
+ "<\u0e3f" // baht sign |
|
201 |
+ "<\u00a2" // cent sign |
|
202 |
+ "<\u20a1" // colon sign |
|
203 |
+ "<\u20a2" // cruzeiro sign |
|
204 |
+ "<'\u0024'" // dollar sign |
|
205 |
+ "<\u20ab" // dong sign |
|
206 |
+ "<\u20ac" // euro sign |
|
207 |
+ "<\u20a3" // franc sign |
|
208 |
+ "<\u20a4" // lira sign |
|
209 |
+ "<\u20a5" // mill sign |
|
210 |
+ "<\u20a6" // naira sign |
|
211 |
+ "<\u20a7" // peseta sign |
|
212 |
+ "<\u00a3" // pound-sterling sign |
|
213 |
+ "<\u20a8" // rupee sign |
|
214 |
+ "<\u20aa" // new shekel sign |
|
215 |
+ "<\u20a9" // won sign |
|
216 |
+ "<\u00a5" // yen sign |
|
217 |
+ "<'\u002a'" // asterisk |
|
218 |
+ "<'\\'" // backslash |
|
219 |
+ "<'\u0026'" // ampersand |
|
220 |
+ "<'\u0023'" // number sign |
|
221 |
+ "<'\u0025'" // percent sign |
|
222 |
+ "<'\u002b'" // plus sign |
|
223 |
+ "<\u00b1" // plus-or-minus sign |
|
224 |
+ "<\u00f7" // divide sign |
|
225 |
+ "<\u00d7" // multiply sign |
|
226 |
+ "<'\u003c'" // less-than sign |
|
227 |
+ "<'\u003d'" // equal sign |
|
228 |
+ "<'\u003e'" // greater-than sign |
|
229 |
+ "<\u00ac" // end of line symbol/logical NOT symbol |
|
230 |
+ "<'\u007c'" // vertical line/logical OR symbol |
|
231 |
+ "<\u00a6" // broken vertical line |
|
232 |
+ "<\u00b0" // degree symbol |
|
233 |
+ "<\u00b5" // micro symbol |
|
234 |
||
235 |
// NUMERICS |
|
236 |
||
237 |
+ "<0<1<2<3<4<5<6<7<8<9" |
|
238 |
+ "<\u00bc<\u00bd<\u00be" // 1/4,1/2,3/4 fractions |
|
239 |
||
240 |
// NON-IGNORABLES |
|
241 |
+ "<a,A" |
|
242 |
+ "<b,B" |
|
243 |
+ "<c,C" |
|
244 |
+ "<d,D" |
|
245 |
+ "<\u00F0,\u00D0" // eth |
|
246 |
+ "<e,E" |
|
247 |
+ "<f,F" |
|
248 |
+ "<g,G" |
|
249 |
+ "<h,H" |
|
250 |
+ "<i,I" |
|
251 |
+ "<j,J" |
|
252 |
+ "<k,K" |
|
253 |
+ "<l,L" |
|
254 |
+ "<m,M" |
|
255 |
+ "<n,N" |
|
256 |
+ "<o,O" |
|
257 |
+ "<p,P" |
|
258 |
+ "<q,Q" |
|
259 |
+ "<r,R" |
|
260 |
+ "<s, S & SS,\u00DF" // s-zet |
|
261 |
+ "<t,T" |
|
262 |
+ "& TH, \u00DE &TH, \u00FE " // thorn |
|
263 |
+ "<u,U" |
|
264 |
+ "<v,V" |
|
265 |
+ "<w,W" |
|
266 |
+ "<x,X" |
|
267 |
+ "<y,Y" |
|
268 |
+ "<z,Z" |
|
269 |
+ "&AE,\u00C6" // ae & AE ligature |
|
270 |
+ "&AE,\u00E6" |
|
271 |
+ "&OE,\u0152" // oe & OE ligature |
|
13583 | 272 |
+ "&OE,\u0153"; |
273 |
||
274 |
// No instantiation |
|
275 |
private CollationRules() { |
|
276 |
} |
|
2 | 277 |
} |