author | ohair |
Wed, 06 Apr 2011 22:06:11 -0700 | |
changeset 9035 | 1255eb81cc2f |
parent 8149 | 768769e3cddd |
child 9232 | 9e29d6359705 |
permissions | -rw-r--r-- |
6501 | 1 |
/* |
9035
1255eb81cc2f
7033660: Update copyright year to 2011 on any files changed in 2011
ohair
parents:
8149
diff
changeset
|
2 |
* Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved. |
6501 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
7 |
* published by the Free Software Foundation. Oracle designates this |
|
8 |
* particular file as subject to the "Classpath" exception as provided |
|
9 |
* by Oracle in the LICENSE file that accompanied this code. |
|
10 |
* |
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
24 |
*/ |
|
25 |
||
26 |
/* |
|
27 |
******************************************************************************* |
|
28 |
* Copyright (C) 2010, International Business Machines Corporation and * |
|
29 |
* others. All Rights Reserved. * |
|
30 |
******************************************************************************* |
|
31 |
*/ |
|
32 |
package sun.util.locale; |
|
33 |
||
34 |
import java.util.ArrayList; |
|
35 |
import java.util.Collections; |
|
36 |
import java.util.HashMap; |
|
37 |
import java.util.List; |
|
38 |
import java.util.Map; |
|
39 |
import java.util.Set; |
|
40 |
||
41 |
public class LanguageTag { |
|
42 |
// |
|
43 |
// static fields |
|
44 |
// |
|
45 |
public static final String SEP = "-"; |
|
46 |
public static final String PRIVATEUSE = "x"; |
|
47 |
public static String UNDETERMINED = "und"; |
|
48 |
public static final String PRIVUSE_VARIANT_PREFIX = "lvariant"; |
|
49 |
||
50 |
// |
|
51 |
// Language subtag fields |
|
52 |
// |
|
53 |
private String _language = ""; // language subtag |
|
54 |
private String _script = ""; // script subtag |
|
55 |
private String _region = ""; // region subtag |
|
56 |
private String _privateuse = ""; // privateuse |
|
57 |
||
58 |
private List<String> _extlangs = Collections.emptyList(); // extlang subtags |
|
59 |
private List<String> _variants = Collections.emptyList(); // variant subtags |
|
60 |
private List<String> _extensions = Collections.emptyList(); // extensions |
|
61 |
||
62 |
// Map contains grandfathered tags and its preferred mappings from |
|
63 |
// http://www.ietf.org/rfc/rfc5646.txt |
|
64 |
private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> GRANDFATHERED = |
|
65 |
new HashMap<AsciiUtil.CaseInsensitiveKey, String[]>(); |
|
66 |
||
67 |
static { |
|
68 |
// grandfathered = irregular ; non-redundant tags registered |
|
69 |
// / regular ; during the RFC 3066 era |
|
70 |
// |
|
71 |
// irregular = "en-GB-oed" ; irregular tags do not match |
|
72 |
// / "i-ami" ; the 'langtag' production and |
|
73 |
// / "i-bnn" ; would not otherwise be |
|
74 |
// / "i-default" ; considered 'well-formed' |
|
75 |
// / "i-enochian" ; These tags are all valid, |
|
76 |
// / "i-hak" ; but most are deprecated |
|
77 |
// / "i-klingon" ; in favor of more modern |
|
78 |
// / "i-lux" ; subtags or subtag |
|
79 |
// / "i-mingo" ; combination |
|
80 |
// / "i-navajo" |
|
81 |
// / "i-pwn" |
|
82 |
// / "i-tao" |
|
83 |
// / "i-tay" |
|
84 |
// / "i-tsu" |
|
85 |
// / "sgn-BE-FR" |
|
86 |
// / "sgn-BE-NL" |
|
87 |
// / "sgn-CH-DE" |
|
88 |
// |
|
89 |
// regular = "art-lojban" ; these tags match the 'langtag' |
|
90 |
// / "cel-gaulish" ; production, but their subtags |
|
91 |
// / "no-bok" ; are not extended language |
|
92 |
// / "no-nyn" ; or variant subtags: their meaning |
|
93 |
// / "zh-guoyu" ; is defined by their registration |
|
94 |
// / "zh-hakka" ; and all of these are deprecated |
|
95 |
// / "zh-min" ; in favor of a more modern |
|
96 |
// / "zh-min-nan" ; subtag or sequence of subtags |
|
97 |
// / "zh-xiang" |
|
98 |
||
99 |
final String[][] entries = { |
|
100 |
//{"tag", "preferred"}, |
|
101 |
{"art-lojban", "jbo"}, |
|
102 |
{"cel-gaulish", "xtg-x-cel-gaulish"}, // fallback |
|
103 |
{"en-GB-oed", "en-GB-x-oed"}, // fallback |
|
104 |
{"i-ami", "ami"}, |
|
105 |
{"i-bnn", "bnn"}, |
|
106 |
{"i-default", "en-x-i-default"}, // fallback |
|
107 |
{"i-enochian", "und-x-i-enochian"}, // fallback |
|
108 |
{"i-hak", "hak"}, |
|
109 |
{"i-klingon", "tlh"}, |
|
110 |
{"i-lux", "lb"}, |
|
111 |
{"i-mingo", "see-x-i-mingo"}, // fallback |
|
112 |
{"i-navajo", "nv"}, |
|
113 |
{"i-pwn", "pwn"}, |
|
114 |
{"i-tao", "tao"}, |
|
115 |
{"i-tay", "tay"}, |
|
116 |
{"i-tsu", "tsu"}, |
|
117 |
{"no-bok", "nb"}, |
|
118 |
{"no-nyn", "nn"}, |
|
119 |
{"sgn-BE-FR", "sfb"}, |
|
120 |
{"sgn-BE-NL", "vgt"}, |
|
121 |
{"sgn-CH-DE", "sgg"}, |
|
122 |
{"zh-guoyu", "cmn"}, |
|
123 |
{"zh-hakka", "hak"}, |
|
124 |
{"zh-min", "nan-x-zh-min"}, // fallback |
|
125 |
{"zh-min-nan", "nan"}, |
|
126 |
{"zh-xiang", "hsn"}, |
|
127 |
}; |
|
128 |
for (String[] e : entries) { |
|
129 |
GRANDFATHERED.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e); |
|
130 |
} |
|
131 |
} |
|
132 |
||
133 |
private LanguageTag() { |
|
134 |
} |
|
135 |
||
136 |
/* |
|
137 |
* BNF in RFC5464 |
|
138 |
* |
|
139 |
* Language-Tag = langtag ; normal language tags |
|
140 |
* / privateuse ; private use tag |
|
141 |
* / grandfathered ; grandfathered tags |
|
142 |
* |
|
143 |
* |
|
144 |
* langtag = language |
|
145 |
* ["-" script] |
|
146 |
* ["-" region] |
|
147 |
* *("-" variant) |
|
148 |
* *("-" extension) |
|
149 |
* ["-" privateuse] |
|
150 |
* |
|
151 |
* language = 2*3ALPHA ; shortest ISO 639 code |
|
152 |
* ["-" extlang] ; sometimes followed by |
|
153 |
* ; extended language subtags |
|
154 |
* / 4ALPHA ; or reserved for future use |
|
155 |
* / 5*8ALPHA ; or registered language subtag |
|
156 |
* |
|
157 |
* extlang = 3ALPHA ; selected ISO 639 codes |
|
158 |
* *2("-" 3ALPHA) ; permanently reserved |
|
159 |
* |
|
160 |
* script = 4ALPHA ; ISO 15924 code |
|
161 |
* |
|
162 |
* region = 2ALPHA ; ISO 3166-1 code |
|
163 |
* / 3DIGIT ; UN M.49 code |
|
164 |
* |
|
165 |
* variant = 5*8alphanum ; registered variants |
|
166 |
* / (DIGIT 3alphanum) |
|
167 |
* |
|
168 |
* extension = singleton 1*("-" (2*8alphanum)) |
|
169 |
* |
|
170 |
* ; Single alphanumerics |
|
171 |
* ; "x" reserved for private use |
|
172 |
* singleton = DIGIT ; 0 - 9 |
|
173 |
* / %x41-57 ; A - W |
|
174 |
* / %x59-5A ; Y - Z |
|
175 |
* / %x61-77 ; a - w |
|
176 |
* / %x79-7A ; y - z |
|
177 |
* |
|
178 |
* privateuse = "x" 1*("-" (1*8alphanum)) |
|
179 |
* |
|
180 |
*/ |
|
181 |
public static LanguageTag parse(String languageTag, ParseStatus sts) { |
|
182 |
if (sts == null) { |
|
183 |
sts = new ParseStatus(); |
|
184 |
} else { |
|
185 |
sts.reset(); |
|
186 |
} |
|
187 |
||
188 |
StringTokenIterator itr; |
|
189 |
||
190 |
// Check if the tag is grandfathered |
|
191 |
String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag)); |
|
192 |
if (gfmap != null) { |
|
193 |
// use preferred mapping |
|
194 |
itr = new StringTokenIterator(gfmap[1], SEP); |
|
195 |
} else { |
|
196 |
itr = new StringTokenIterator(languageTag, SEP); |
|
197 |
} |
|
198 |
||
199 |
LanguageTag tag = new LanguageTag(); |
|
200 |
||
201 |
// langtag must start with either language or privateuse |
|
202 |
if (tag.parseLanguage(itr, sts)) { |
|
203 |
tag.parseExtlangs(itr, sts); |
|
204 |
tag.parseScript(itr, sts); |
|
205 |
tag.parseRegion(itr, sts); |
|
206 |
tag.parseVariants(itr, sts); |
|
207 |
tag.parseExtensions(itr, sts); |
|
208 |
} |
|
209 |
tag.parsePrivateuse(itr, sts); |
|
210 |
||
211 |
if (!itr.isDone() && !sts.isError()) { |
|
212 |
String s = itr.current(); |
|
213 |
sts._errorIndex = itr.currentStart(); |
|
214 |
if (s.length() == 0) { |
|
215 |
sts._errorMsg = "Empty subtag"; |
|
216 |
} else { |
|
217 |
sts._errorMsg = "Invalid subtag: " + s; |
|
218 |
} |
|
219 |
} |
|
220 |
||
221 |
return tag; |
|
222 |
} |
|
223 |
||
224 |
// |
|
225 |
// Language subtag parsers |
|
226 |
// |
|
227 |
||
228 |
private boolean parseLanguage(StringTokenIterator itr, ParseStatus sts) { |
|
229 |
if (itr.isDone() || sts.isError()) { |
|
230 |
return false; |
|
231 |
} |
|
232 |
||
233 |
boolean found = false; |
|
234 |
||
235 |
String s = itr.current(); |
|
236 |
if (isLanguage(s)) { |
|
237 |
found = true; |
|
238 |
_language = s; |
|
239 |
sts._parseLength = itr.currentEnd(); |
|
240 |
itr.next(); |
|
241 |
} |
|
242 |
||
243 |
return found; |
|
244 |
} |
|
245 |
||
246 |
private boolean parseExtlangs(StringTokenIterator itr, ParseStatus sts) { |
|
247 |
if (itr.isDone() || sts.isError()) { |
|
248 |
return false; |
|
249 |
} |
|
250 |
||
251 |
boolean found = false; |
|
252 |
||
253 |
while (!itr.isDone()) { |
|
254 |
String s = itr.current(); |
|
255 |
if (!isExtlang(s)) { |
|
256 |
break; |
|
257 |
} |
|
258 |
found = true; |
|
259 |
if (_extlangs.isEmpty()) { |
|
260 |
_extlangs = new ArrayList<String>(3); |
|
261 |
} |
|
262 |
_extlangs.add(s); |
|
263 |
sts._parseLength = itr.currentEnd(); |
|
264 |
itr.next(); |
|
265 |
||
266 |
if (_extlangs.size() == 3) { |
|
267 |
// Maximum 3 extlangs |
|
268 |
break; |
|
269 |
} |
|
270 |
} |
|
271 |
||
272 |
return found; |
|
273 |
} |
|
274 |
||
275 |
private boolean parseScript(StringTokenIterator itr, ParseStatus sts) { |
|
276 |
if (itr.isDone() || sts.isError()) { |
|
277 |
return false; |
|
278 |
} |
|
279 |
||
280 |
boolean found = false; |
|
281 |
||
282 |
String s = itr.current(); |
|
283 |
if (isScript(s)) { |
|
284 |
found = true; |
|
285 |
_script = s; |
|
286 |
sts._parseLength = itr.currentEnd(); |
|
287 |
itr.next(); |
|
288 |
} |
|
289 |
||
290 |
return found; |
|
291 |
} |
|
292 |
||
293 |
private boolean parseRegion(StringTokenIterator itr, ParseStatus sts) { |
|
294 |
if (itr.isDone() || sts.isError()) { |
|
295 |
return false; |
|
296 |
} |
|
297 |
||
298 |
boolean found = false; |
|
299 |
||
300 |
String s = itr.current(); |
|
301 |
if (isRegion(s)) { |
|
302 |
found = true; |
|
303 |
_region = s; |
|
304 |
sts._parseLength = itr.currentEnd(); |
|
305 |
itr.next(); |
|
306 |
} |
|
307 |
||
308 |
return found; |
|
309 |
} |
|
310 |
||
311 |
private boolean parseVariants(StringTokenIterator itr, ParseStatus sts) { |
|
312 |
if (itr.isDone() || sts.isError()) { |
|
313 |
return false; |
|
314 |
} |
|
315 |
||
316 |
boolean found = false; |
|
317 |
||
318 |
while (!itr.isDone()) { |
|
319 |
String s = itr.current(); |
|
320 |
if (!isVariant(s)) { |
|
321 |
break; |
|
322 |
} |
|
323 |
found = true; |
|
324 |
if (_variants.isEmpty()) { |
|
325 |
_variants = new ArrayList<String>(3); |
|
326 |
} |
|
327 |
_variants.add(s); |
|
328 |
sts._parseLength = itr.currentEnd(); |
|
329 |
itr.next(); |
|
330 |
} |
|
331 |
||
332 |
return found; |
|
333 |
} |
|
334 |
||
335 |
private boolean parseExtensions(StringTokenIterator itr, ParseStatus sts) { |
|
336 |
if (itr.isDone() || sts.isError()) { |
|
337 |
return false; |
|
338 |
} |
|
339 |
||
340 |
boolean found = false; |
|
341 |
||
342 |
while (!itr.isDone()) { |
|
343 |
String s = itr.current(); |
|
344 |
if (isExtensionSingleton(s)) { |
|
345 |
int start = itr.currentStart(); |
|
346 |
String singleton = s; |
|
347 |
StringBuilder sb = new StringBuilder(singleton); |
|
348 |
||
349 |
itr.next(); |
|
350 |
while (!itr.isDone()) { |
|
351 |
s = itr.current(); |
|
352 |
if (isExtensionSubtag(s)) { |
|
353 |
sb.append(SEP).append(s); |
|
354 |
sts._parseLength = itr.currentEnd(); |
|
355 |
} else { |
|
356 |
break; |
|
357 |
} |
|
358 |
itr.next(); |
|
359 |
} |
|
360 |
||
361 |
if (sts._parseLength <= start) { |
|
362 |
sts._errorIndex = start; |
|
363 |
sts._errorMsg = "Incomplete extension '" + singleton + "'"; |
|
364 |
break; |
|
365 |
} |
|
366 |
||
367 |
if (_extensions.size() == 0) { |
|
368 |
_extensions = new ArrayList<String>(4); |
|
369 |
} |
|
370 |
_extensions.add(sb.toString()); |
|
371 |
found = true; |
|
372 |
} else { |
|
373 |
break; |
|
374 |
} |
|
375 |
} |
|
376 |
return found; |
|
377 |
} |
|
378 |
||
379 |
private boolean parsePrivateuse(StringTokenIterator itr, ParseStatus sts) { |
|
380 |
if (itr.isDone() || sts.isError()) { |
|
381 |
return false; |
|
382 |
} |
|
383 |
||
384 |
boolean found = false; |
|
385 |
||
386 |
String s = itr.current(); |
|
387 |
if (isPrivateusePrefix(s)) { |
|
388 |
int start = itr.currentStart(); |
|
389 |
StringBuilder sb = new StringBuilder(s); |
|
390 |
||
391 |
itr.next(); |
|
392 |
while (!itr.isDone()) { |
|
393 |
s = itr.current(); |
|
394 |
if (!isPrivateuseSubtag(s)) { |
|
395 |
break; |
|
396 |
} |
|
397 |
sb.append(SEP).append(s); |
|
398 |
sts._parseLength = itr.currentEnd(); |
|
399 |
||
400 |
itr.next(); |
|
401 |
} |
|
402 |
||
403 |
if (sts._parseLength <= start) { |
|
404 |
// need at least 1 private subtag |
|
405 |
sts._errorIndex = start; |
|
406 |
sts._errorMsg = "Incomplete privateuse"; |
|
407 |
} else { |
|
408 |
_privateuse = sb.toString(); |
|
409 |
found = true; |
|
410 |
} |
|
411 |
} |
|
412 |
||
413 |
return found; |
|
414 |
} |
|
415 |
||
416 |
public static LanguageTag parseLocale(BaseLocale baseLocale, LocaleExtensions localeExtensions) { |
|
417 |
LanguageTag tag = new LanguageTag(); |
|
418 |
||
419 |
String language = baseLocale.getLanguage(); |
|
420 |
String script = baseLocale.getScript(); |
|
421 |
String region = baseLocale.getRegion(); |
|
422 |
String variant = baseLocale.getVariant(); |
|
423 |
||
8149
768769e3cddd
7015500: Locale.toLanguageTag() uses "und" as lang subtag for private use only Locale
naoto
parents:
6501
diff
changeset
|
424 |
boolean hasSubtag = false; |
768769e3cddd
7015500: Locale.toLanguageTag() uses "und" as lang subtag for private use only Locale
naoto
parents:
6501
diff
changeset
|
425 |
|
6501 | 426 |
String privuseVar = null; // store ill-formed variant subtags |
427 |
||
8149
768769e3cddd
7015500: Locale.toLanguageTag() uses "und" as lang subtag for private use only Locale
naoto
parents:
6501
diff
changeset
|
428 |
if (language.length() > 0 && isLanguage(language)) { |
6501 | 429 |
// Convert a deprecated language code used by Java to |
430 |
// a new code |
|
431 |
if (language.equals("iw")) { |
|
432 |
language = "he"; |
|
433 |
} else if (language.equals("ji")) { |
|
434 |
language = "yi"; |
|
435 |
} else if (language.equals("in")) { |
|
436 |
language = "id"; |
|
437 |
} |
|
438 |
tag._language = language; |
|
439 |
} |
|
440 |
||
441 |
if (script.length() > 0 && isScript(script)) { |
|
442 |
tag._script = canonicalizeScript(script); |
|
8149
768769e3cddd
7015500: Locale.toLanguageTag() uses "und" as lang subtag for private use only Locale
naoto
parents:
6501
diff
changeset
|
443 |
hasSubtag = true; |
6501 | 444 |
} |
445 |
||
446 |
if (region.length() > 0 && isRegion(region)) { |
|
447 |
tag._region = canonicalizeRegion(region); |
|
8149
768769e3cddd
7015500: Locale.toLanguageTag() uses "und" as lang subtag for private use only Locale
naoto
parents:
6501
diff
changeset
|
448 |
hasSubtag = true; |
6501 | 449 |
} |
450 |
||
451 |
// Special handling for no_NO_NY - use nn_NO for language tag |
|
452 |
if (tag._language.equals("no") && tag._region.equals("NO") && variant.equals("NY")) { |
|
453 |
tag._language = "nn"; |
|
454 |
variant = ""; |
|
455 |
} |
|
456 |
||
457 |
if (variant.length() > 0) { |
|
458 |
List<String> variants = null; |
|
459 |
StringTokenIterator varitr = new StringTokenIterator(variant, BaseLocale.SEP); |
|
460 |
while (!varitr.isDone()) { |
|
461 |
String var = varitr.current(); |
|
462 |
if (!isVariant(var)) { |
|
463 |
break; |
|
464 |
} |
|
465 |
if (variants == null) { |
|
466 |
variants = new ArrayList<String>(); |
|
467 |
} |
|
468 |
variants.add(var); // Do not canonicalize! |
|
469 |
varitr.next(); |
|
470 |
} |
|
471 |
if (variants != null) { |
|
472 |
tag._variants = variants; |
|
8149
768769e3cddd
7015500: Locale.toLanguageTag() uses "und" as lang subtag for private use only Locale
naoto
parents:
6501
diff
changeset
|
473 |
hasSubtag = true; |
6501 | 474 |
} |
475 |
if (!varitr.isDone()) { |
|
476 |
// ill-formed variant subtags |
|
477 |
StringBuilder buf = new StringBuilder(); |
|
478 |
while (!varitr.isDone()) { |
|
479 |
String prvv = varitr.current(); |
|
480 |
if (!isPrivateuseSubtag(prvv)) { |
|
481 |
// cannot use private use subtag - truncated |
|
482 |
break; |
|
483 |
} |
|
484 |
if (buf.length() > 0) { |
|
485 |
buf.append(SEP); |
|
486 |
} |
|
487 |
buf.append(prvv); |
|
488 |
varitr.next(); |
|
489 |
} |
|
490 |
if (buf.length() > 0) { |
|
491 |
privuseVar = buf.toString(); |
|
492 |
} |
|
493 |
} |
|
494 |
} |
|
495 |
||
496 |
List<String> extensions = null; |
|
497 |
String privateuse = null; |
|
498 |
||
499 |
Set<Character> locextKeys = localeExtensions.getKeys(); |
|
500 |
for (Character locextKey : locextKeys) { |
|
501 |
Extension ext = localeExtensions.getExtension(locextKey); |
|
502 |
if (isPrivateusePrefixChar(locextKey.charValue())) { |
|
503 |
privateuse = ext.getValue(); |
|
504 |
} else { |
|
505 |
if (extensions == null) { |
|
506 |
extensions = new ArrayList<String>(); |
|
507 |
} |
|
508 |
extensions.add(locextKey.toString() + SEP + ext.getValue()); |
|
509 |
} |
|
510 |
} |
|
511 |
||
512 |
if (extensions != null) { |
|
513 |
tag._extensions = extensions; |
|
8149
768769e3cddd
7015500: Locale.toLanguageTag() uses "und" as lang subtag for private use only Locale
naoto
parents:
6501
diff
changeset
|
514 |
hasSubtag = true; |
6501 | 515 |
} |
516 |
||
517 |
// append ill-formed variant subtags to private use |
|
518 |
if (privuseVar != null) { |
|
519 |
if (privateuse == null) { |
|
520 |
privateuse = PRIVUSE_VARIANT_PREFIX + SEP + privuseVar; |
|
521 |
} else { |
|
522 |
privateuse = privateuse + SEP + PRIVUSE_VARIANT_PREFIX + SEP + privuseVar.replace(BaseLocale.SEP, SEP); |
|
523 |
} |
|
524 |
} |
|
525 |
||
526 |
if (privateuse != null) { |
|
527 |
tag._privateuse = privateuse; |
|
8149
768769e3cddd
7015500: Locale.toLanguageTag() uses "und" as lang subtag for private use only Locale
naoto
parents:
6501
diff
changeset
|
528 |
} |
768769e3cddd
7015500: Locale.toLanguageTag() uses "und" as lang subtag for private use only Locale
naoto
parents:
6501
diff
changeset
|
529 |
|
768769e3cddd
7015500: Locale.toLanguageTag() uses "und" as lang subtag for private use only Locale
naoto
parents:
6501
diff
changeset
|
530 |
if (tag._language.length() == 0 && (hasSubtag || privateuse == null)) { |
768769e3cddd
7015500: Locale.toLanguageTag() uses "und" as lang subtag for private use only Locale
naoto
parents:
6501
diff
changeset
|
531 |
// use lang "und" when 1) no language is available AND |
768769e3cddd
7015500: Locale.toLanguageTag() uses "und" as lang subtag for private use only Locale
naoto
parents:
6501
diff
changeset
|
532 |
// 2) any of other subtags other than private use are available or |
768769e3cddd
7015500: Locale.toLanguageTag() uses "und" as lang subtag for private use only Locale
naoto
parents:
6501
diff
changeset
|
533 |
// no private use tag is available |
6501 | 534 |
tag._language = UNDETERMINED; |
535 |
} |
|
536 |
||
537 |
return tag; |
|
538 |
} |
|
539 |
||
540 |
// |
|
541 |
// Getter methods for language subtag fields |
|
542 |
// |
|
543 |
||
544 |
public String getLanguage() { |
|
545 |
return _language; |
|
546 |
} |
|
547 |
||
548 |
public List<String> getExtlangs() { |
|
549 |
return Collections.unmodifiableList(_extlangs); |
|
550 |
} |
|
551 |
||
552 |
public String getScript() { |
|
553 |
return _script; |
|
554 |
} |
|
555 |
||
556 |
public String getRegion() { |
|
557 |
return _region; |
|
558 |
} |
|
559 |
||
560 |
public List<String> getVariants() { |
|
561 |
return Collections.unmodifiableList(_variants); |
|
562 |
} |
|
563 |
||
564 |
public List<String> getExtensions() { |
|
565 |
return Collections.unmodifiableList(_extensions); |
|
566 |
} |
|
567 |
||
568 |
public String getPrivateuse() { |
|
569 |
return _privateuse; |
|
570 |
} |
|
571 |
||
572 |
// |
|
573 |
// Language subtag syntax checking methods |
|
574 |
// |
|
575 |
||
576 |
public static boolean isLanguage(String s) { |
|
577 |
// language = 2*3ALPHA ; shortest ISO 639 code |
|
578 |
// ["-" extlang] ; sometimes followed by |
|
579 |
// ; extended language subtags |
|
580 |
// / 4ALPHA ; or reserved for future use |
|
581 |
// / 5*8ALPHA ; or registered language subtag |
|
582 |
return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaString(s); |
|
583 |
} |
|
584 |
||
585 |
public static boolean isExtlang(String s) { |
|
586 |
// extlang = 3ALPHA ; selected ISO 639 codes |
|
587 |
// *2("-" 3ALPHA) ; permanently reserved |
|
588 |
return (s.length() == 3) && AsciiUtil.isAlphaString(s); |
|
589 |
} |
|
590 |
||
591 |
public static boolean isScript(String s) { |
|
592 |
// script = 4ALPHA ; ISO 15924 code |
|
593 |
return (s.length() == 4) && AsciiUtil.isAlphaString(s); |
|
594 |
} |
|
595 |
||
596 |
public static boolean isRegion(String s) { |
|
597 |
// region = 2ALPHA ; ISO 3166-1 code |
|
598 |
// / 3DIGIT ; UN M.49 code |
|
599 |
return ((s.length() == 2) && AsciiUtil.isAlphaString(s)) |
|
600 |
|| ((s.length() == 3) && AsciiUtil.isNumericString(s)); |
|
601 |
} |
|
602 |
||
603 |
public static boolean isVariant(String s) { |
|
604 |
// variant = 5*8alphanum ; registered variants |
|
605 |
// / (DIGIT 3alphanum) |
|
606 |
int len = s.length(); |
|
607 |
if (len >= 5 && len <= 8) { |
|
608 |
return AsciiUtil.isAlphaNumericString(s); |
|
609 |
} |
|
610 |
if (len == 4) { |
|
611 |
return AsciiUtil.isNumeric(s.charAt(0)) |
|
612 |
&& AsciiUtil.isAlphaNumeric(s.charAt(1)) |
|
613 |
&& AsciiUtil.isAlphaNumeric(s.charAt(2)) |
|
614 |
&& AsciiUtil.isAlphaNumeric(s.charAt(3)); |
|
615 |
} |
|
616 |
return false; |
|
617 |
} |
|
618 |
||
619 |
public static boolean isExtensionSingleton(String s) { |
|
620 |
// singleton = DIGIT ; 0 - 9 |
|
621 |
// / %x41-57 ; A - W |
|
622 |
// / %x59-5A ; Y - Z |
|
623 |
// / %x61-77 ; a - w |
|
624 |
// / %x79-7A ; y - z |
|
625 |
||
626 |
return (s.length() == 1) |
|
627 |
&& AsciiUtil.isAlphaString(s) |
|
628 |
&& !AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s); |
|
629 |
} |
|
630 |
||
631 |
public static boolean isExtensionSingletonChar(char c) { |
|
632 |
return isExtensionSingleton(String.valueOf(c)); |
|
633 |
} |
|
634 |
||
635 |
public static boolean isExtensionSubtag(String s) { |
|
636 |
// extension = singleton 1*("-" (2*8alphanum)) |
|
637 |
return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); |
|
638 |
} |
|
639 |
||
640 |
public static boolean isPrivateusePrefix(String s) { |
|
641 |
// privateuse = "x" 1*("-" (1*8alphanum)) |
|
642 |
return (s.length() == 1) |
|
643 |
&& AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s); |
|
644 |
} |
|
645 |
||
646 |
public static boolean isPrivateusePrefixChar(char c) { |
|
647 |
return (AsciiUtil.caseIgnoreMatch(PRIVATEUSE, String.valueOf(c))); |
|
648 |
} |
|
649 |
||
650 |
public static boolean isPrivateuseSubtag(String s) { |
|
651 |
// privateuse = "x" 1*("-" (1*8alphanum)) |
|
652 |
return (s.length() >= 1) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); |
|
653 |
} |
|
654 |
||
655 |
// |
|
656 |
// Language subtag canonicalization methods |
|
657 |
// |
|
658 |
||
659 |
public static String canonicalizeLanguage(String s) { |
|
660 |
return AsciiUtil.toLowerString(s); |
|
661 |
} |
|
662 |
||
663 |
public static String canonicalizeExtlang(String s) { |
|
664 |
return AsciiUtil.toLowerString(s); |
|
665 |
} |
|
666 |
||
667 |
public static String canonicalizeScript(String s) { |
|
668 |
return AsciiUtil.toTitleString(s); |
|
669 |
} |
|
670 |
||
671 |
public static String canonicalizeRegion(String s) { |
|
672 |
return AsciiUtil.toUpperString(s); |
|
673 |
} |
|
674 |
||
675 |
public static String canonicalizeVariant(String s) { |
|
676 |
return AsciiUtil.toLowerString(s); |
|
677 |
} |
|
678 |
||
679 |
public static String canonicalizeExtension(String s) { |
|
680 |
return AsciiUtil.toLowerString(s); |
|
681 |
} |
|
682 |
||
683 |
public static String canonicalizeExtensionSingleton(String s) { |
|
684 |
return AsciiUtil.toLowerString(s); |
|
685 |
} |
|
686 |
||
687 |
public static String canonicalizeExtensionSubtag(String s) { |
|
688 |
return AsciiUtil.toLowerString(s); |
|
689 |
} |
|
690 |
||
691 |
public static String canonicalizePrivateuse(String s) { |
|
692 |
return AsciiUtil.toLowerString(s); |
|
693 |
} |
|
694 |
||
695 |
public static String canonicalizePrivateuseSubtag(String s) { |
|
696 |
return AsciiUtil.toLowerString(s); |
|
697 |
} |
|
698 |
||
699 |
public String toString() { |
|
700 |
StringBuilder sb = new StringBuilder(); |
|
701 |
||
702 |
if (_language.length() > 0) { |
|
703 |
sb.append(_language); |
|
704 |
||
705 |
for (String extlang : _extlangs) { |
|
706 |
sb.append(SEP).append(extlang); |
|
707 |
} |
|
708 |
||
709 |
if (_script.length() > 0) { |
|
710 |
sb.append(SEP).append(_script); |
|
711 |
} |
|
712 |
||
713 |
if (_region.length() > 0) { |
|
714 |
sb.append(SEP).append(_region); |
|
715 |
} |
|
716 |
||
717 |
for (String variant : _extlangs) { |
|
718 |
sb.append(SEP).append(variant); |
|
719 |
} |
|
720 |
||
721 |
for (String extension : _extensions) { |
|
722 |
sb.append(SEP).append(extension); |
|
723 |
} |
|
724 |
} |
|
725 |
if (_privateuse.length() > 0) { |
|
726 |
if (sb.length() > 0) { |
|
727 |
sb.append(SEP); |
|
728 |
} |
|
729 |
sb.append(_privateuse); |
|
730 |
} |
|
731 |
||
732 |
return sb.toString(); |
|
733 |
} |
|
734 |
} |