jdk/src/share/classes/sun/util/locale/InternalLocaleBuilder.java
author okutsu
Thu, 14 Apr 2011 15:59:47 +0900
changeset 9224 75c0420badef
parent 6501 684810d882b3
permissions -rw-r--r--
7028818: (lc) Lazily initialize locale extension 7029740: (lc) New Locale class implementation doesn't follow the Java coding conventions 7032820: (lc) sun.util.locale.InternalLocaleBuilder.CaseInsensitiveChar.equals problems 7033503: (lc) Restore optimization code for Locale class initialization 7033504: (lc) incompatible behavior change for ja_JP_JP and th_TH_TH locales Reviewed-by: naoto

/*
 * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

/*
 *******************************************************************************
 * Copyright (C) 2009-2010, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
package sun.util.locale;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

public final class InternalLocaleBuilder {

    private static final CaseInsensitiveChar PRIVATEUSE_KEY
        = new CaseInsensitiveChar(LanguageTag.PRIVATEUSE);

    private String language = "";
    private String script = "";
    private String region = "";
    private String variant = "";

    private Map<CaseInsensitiveChar, String> extensions;
    private Set<CaseInsensitiveString> uattributes;
    private Map<CaseInsensitiveString, String> ukeywords;


    public InternalLocaleBuilder() {
    }

    public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException {
        if (LocaleUtils.isEmpty(language)) {
            this.language = "";
        } else {
            if (!LanguageTag.isLanguage(language)) {
                throw new LocaleSyntaxException("Ill-formed language: " + language, 0);
            }
            this.language = language;
        }
        return this;
    }

    public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException {
        if (LocaleUtils.isEmpty(script)) {
            this.script = "";
        } else {
            if (!LanguageTag.isScript(script)) {
                throw new LocaleSyntaxException("Ill-formed script: " + script, 0);
            }
            this.script = script;
        }
        return this;
    }

    public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException {
        if (LocaleUtils.isEmpty(region)) {
            this.region = "";
        } else {
            if (!LanguageTag.isRegion(region)) {
                throw new LocaleSyntaxException("Ill-formed region: " + region, 0);
            }
            this.region = region;
        }
        return this;
    }

    public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException {
        if (LocaleUtils.isEmpty(variant)) {
            this.variant = "";
        } else {
            // normalize separators to "_"
            String var = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP);
            int errIdx = checkVariants(var, BaseLocale.SEP);
            if (errIdx != -1) {
                throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx);
            }
            this.variant = var;
        }
        return this;
    }

    public InternalLocaleBuilder addUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException {
        if (!UnicodeLocaleExtension.isAttribute(attribute)) {
            throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute);
        }
        // Use case insensitive string to prevent duplication
        if (uattributes == null) {
            uattributes = new HashSet<>(4);
        }
        uattributes.add(new CaseInsensitiveString(attribute));
        return this;
    }

    public InternalLocaleBuilder removeUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException {
        if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) {
            throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute);
        }
        if (uattributes != null) {
            uattributes.remove(new CaseInsensitiveString(attribute));
        }
        return this;
    }

    public InternalLocaleBuilder setUnicodeLocaleKeyword(String key, String type) throws LocaleSyntaxException {
        if (!UnicodeLocaleExtension.isKey(key)) {
            throw new LocaleSyntaxException("Ill-formed Unicode locale keyword key: " + key);
        }

        CaseInsensitiveString cikey = new CaseInsensitiveString(key);
        if (type == null) {
            if (ukeywords != null) {
                // null type is used for remove the key
                ukeywords.remove(cikey);
            }
        } else {
            if (type.length() != 0) {
                // normalize separator to "-"
                String tp = type.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
                // validate
                StringTokenIterator itr = new StringTokenIterator(tp, LanguageTag.SEP);
                while (!itr.isDone()) {
                    String s = itr.current();
                    if (!UnicodeLocaleExtension.isTypeSubtag(s)) {
                        throw new LocaleSyntaxException("Ill-formed Unicode locale keyword type: "
                                                        + type,
                                                        itr.currentStart());
                    }
                    itr.next();
                }
            }
            if (ukeywords == null) {
                ukeywords = new HashMap<>(4);
            }
            ukeywords.put(cikey, type);
        }
        return this;
    }

    public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException {
        // validate key
        boolean isBcpPrivateuse = LanguageTag.isPrivateusePrefixChar(singleton);
        if (!isBcpPrivateuse && !LanguageTag.isExtensionSingletonChar(singleton)) {
            throw new LocaleSyntaxException("Ill-formed extension key: " + singleton);
        }

        boolean remove = LocaleUtils.isEmpty(value);
        CaseInsensitiveChar key = new CaseInsensitiveChar(singleton);

        if (remove) {
            if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
                // clear entire Unicode locale extension
                if (uattributes != null) {
                    uattributes.clear();
                }
                if (ukeywords != null) {
                    ukeywords.clear();
                }
            } else {
                if (extensions != null && extensions.containsKey(key)) {
                    extensions.remove(key);
                }
            }
        } else {
            // validate value
            String val = value.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
            StringTokenIterator itr = new StringTokenIterator(val, LanguageTag.SEP);
            while (!itr.isDone()) {
                String s = itr.current();
                boolean validSubtag;
                if (isBcpPrivateuse) {
                    validSubtag = LanguageTag.isPrivateuseSubtag(s);
                } else {
                    validSubtag = LanguageTag.isExtensionSubtag(s);
                }
                if (!validSubtag) {
                    throw new LocaleSyntaxException("Ill-formed extension value: " + s,
                                                    itr.currentStart());
                }
                itr.next();
            }

            if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
                setUnicodeLocaleExtension(val);
            } else {
                if (extensions == null) {
                    extensions = new HashMap<>(4);
                }
                extensions.put(key, val);
            }
        }
        return this;
    }

    /*
     * Set extension/private subtags in a single string representation
     */
    public InternalLocaleBuilder setExtensions(String subtags) throws LocaleSyntaxException {
        if (LocaleUtils.isEmpty(subtags)) {
            clearExtensions();
            return this;
        }
        subtags = subtags.replaceAll(BaseLocale.SEP, LanguageTag.SEP);
        StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP);

        List<String> extensions = null;
        String privateuse = null;

        int parsed = 0;
        int start;

        // Make a list of extension subtags
        while (!itr.isDone()) {
            String s = itr.current();
            if (LanguageTag.isExtensionSingleton(s)) {
                start = itr.currentStart();
                String singleton = s;
                StringBuilder sb = new StringBuilder(singleton);

                itr.next();
                while (!itr.isDone()) {
                    s = itr.current();
                    if (LanguageTag.isExtensionSubtag(s)) {
                        sb.append(LanguageTag.SEP).append(s);
                        parsed = itr.currentEnd();
                    } else {
                        break;
                    }
                    itr.next();
                }

                if (parsed < start) {
                    throw new LocaleSyntaxException("Incomplete extension '" + singleton + "'",
                                                    start);
                }

                if (extensions == null) {
                    extensions = new ArrayList<>(4);
                }
                extensions.add(sb.toString());
            } else {
                break;
            }
        }
        if (!itr.isDone()) {
            String s = itr.current();
            if (LanguageTag.isPrivateusePrefix(s)) {
                start = itr.currentStart();
                StringBuilder sb = new StringBuilder(s);

                itr.next();
                while (!itr.isDone()) {
                    s = itr.current();
                    if (!LanguageTag.isPrivateuseSubtag(s)) {
                        break;
                    }
                    sb.append(LanguageTag.SEP).append(s);
                    parsed = itr.currentEnd();

                    itr.next();
                }
                if (parsed <= start) {
                    throw new LocaleSyntaxException("Incomplete privateuse:"
                                                    + subtags.substring(start),
                                                    start);
                } else {
                    privateuse = sb.toString();
                }
            }
        }

        if (!itr.isDone()) {
            throw new LocaleSyntaxException("Ill-formed extension subtags:"
                                            + subtags.substring(itr.currentStart()),
                                            itr.currentStart());
        }

        return setExtensions(extensions, privateuse);
    }

    /*
     * Set a list of BCP47 extensions and private use subtags
     * BCP47 extensions are already validated and well-formed, but may contain duplicates
     */
    private InternalLocaleBuilder setExtensions(List<String> bcpExtensions, String privateuse) {
        clearExtensions();

        if (!LocaleUtils.isEmpty(bcpExtensions)) {
            Set<CaseInsensitiveChar> done = new HashSet<>(bcpExtensions.size());
            for (String bcpExt : bcpExtensions) {
                CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt);
                // ignore duplicates
                if (!done.contains(key)) {
                    // each extension string contains singleton, e.g. "a-abc-def"
                    if (UnicodeLocaleExtension.isSingletonChar(key.value())) {
                        setUnicodeLocaleExtension(bcpExt.substring(2));
                    } else {
                        if (extensions == null) {
                            extensions = new HashMap<>(4);
                        }
                        extensions.put(key, bcpExt.substring(2));
                    }
                }
                done.add(key);
            }
        }
        if (privateuse != null && privateuse.length() > 0) {
            // privateuse string contains prefix, e.g. "x-abc-def"
            if (extensions == null) {
                extensions = new HashMap<>(1);
            }
            extensions.put(new CaseInsensitiveChar(privateuse), privateuse.substring(2));
        }

        return this;
    }

    /*
     * Reset Builder's internal state with the given language tag
     */
    public InternalLocaleBuilder setLanguageTag(LanguageTag langtag) {
        clear();
        if (!langtag.getExtlangs().isEmpty()) {
            language = langtag.getExtlangs().get(0);
        } else {
            String lang = langtag.getLanguage();
            if (!lang.equals(LanguageTag.UNDETERMINED)) {
                language = lang;
            }
        }
        script = langtag.getScript();
        region = langtag.getRegion();

        List<String> bcpVariants = langtag.getVariants();
        if (!bcpVariants.isEmpty()) {
            StringBuilder var = new StringBuilder(bcpVariants.get(0));
            int size = bcpVariants.size();
            for (int i = 1; i < size; i++) {
                var.append(BaseLocale.SEP).append(bcpVariants.get(i));
            }
            variant = var.toString();
        }

        setExtensions(langtag.getExtensions(), langtag.getPrivateuse());

        return this;
    }

    public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions localeExtensions) throws LocaleSyntaxException {
        String language = base.getLanguage();
        String script = base.getScript();
        String region = base.getRegion();
        String variant = base.getVariant();

        // Special backward compatibility support

        // Exception 1 - ja_JP_JP
        if (language.equals("ja") && region.equals("JP") && variant.equals("JP")) {
            // When locale ja_JP_JP is created, ca-japanese is always there.
            // The builder ignores the variant "JP"
            assert("japanese".equals(localeExtensions.getUnicodeLocaleType("ca")));
            variant = "";
        }
        // Exception 2 - th_TH_TH
        else if (language.equals("th") && region.equals("TH") && variant.equals("TH")) {
            // When locale th_TH_TH is created, nu-thai is always there.
            // The builder ignores the variant "TH"
            assert("thai".equals(localeExtensions.getUnicodeLocaleType("nu")));
            variant = "";
        }
        // Exception 3 - no_NO_NY
        else if (language.equals("no") && region.equals("NO") && variant.equals("NY")) {
            // no_NO_NY is a valid locale and used by Java 6 or older versions.
            // The build ignores the variant "NY" and change the language to "nn".
            language = "nn";
            variant = "";
        }

        // Validate base locale fields before updating internal state.
        // LocaleExtensions always store validated/canonicalized values,
        // so no checks are necessary.
        if (language.length() > 0 && !LanguageTag.isLanguage(language)) {
            throw new LocaleSyntaxException("Ill-formed language: " + language);
        }

        if (script.length() > 0 && !LanguageTag.isScript(script)) {
            throw new LocaleSyntaxException("Ill-formed script: " + script);
        }

        if (region.length() > 0 && !LanguageTag.isRegion(region)) {
            throw new LocaleSyntaxException("Ill-formed region: " + region);
        }

        if (variant.length() > 0) {
            int errIdx = checkVariants(variant, BaseLocale.SEP);
            if (errIdx != -1) {
                throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx);
            }
        }

        // The input locale is validated at this point.
        // Now, updating builder's internal fields.
        this.language = language;
        this.script = script;
        this.region = region;
        this.variant = variant;
        clearExtensions();

        Set<Character> extKeys = (localeExtensions == null) ? null : localeExtensions.getKeys();
        if (extKeys != null) {
            // map localeExtensions back to builder's internal format
            for (Character key : extKeys) {
                Extension e = localeExtensions.getExtension(key);
                if (e instanceof UnicodeLocaleExtension) {
                    UnicodeLocaleExtension ue = (UnicodeLocaleExtension)e;
                    for (String uatr : ue.getUnicodeLocaleAttributes()) {
                        if (uattributes == null) {
                            uattributes = new HashSet<>(4);
                        }
                        uattributes.add(new CaseInsensitiveString(uatr));
                    }
                    for (String ukey : ue.getUnicodeLocaleKeys()) {
                        if (ukeywords == null) {
                            ukeywords = new HashMap<>(4);
                        }
                        ukeywords.put(new CaseInsensitiveString(ukey), ue.getUnicodeLocaleType(ukey));
                    }
                } else {
                    if (extensions == null) {
                        extensions = new HashMap<>(4);
                    }
                    extensions.put(new CaseInsensitiveChar(key), e.getValue());
                }
            }
        }
        return this;
    }

    public InternalLocaleBuilder clear() {
        language = "";
        script = "";
        region = "";
        variant = "";
        clearExtensions();
        return this;
    }

    public InternalLocaleBuilder clearExtensions() {
        if (extensions != null) {
            extensions.clear();
        }
        if (uattributes != null) {
            uattributes.clear();
        }
        if (ukeywords != null) {
            ukeywords.clear();
        }
        return this;
    }

    public BaseLocale getBaseLocale() {
        String language = this.language;
        String script = this.script;
        String region = this.region;
        String variant = this.variant;

        // Special private use subtag sequence identified by "lvariant" will be
        // interpreted as Java variant.
        if (extensions != null) {
            String privuse = extensions.get(PRIVATEUSE_KEY);
            if (privuse != null) {
                StringTokenIterator itr = new StringTokenIterator(privuse, LanguageTag.SEP);
                boolean sawPrefix = false;
                int privVarStart = -1;
                while (!itr.isDone()) {
                    if (sawPrefix) {
                        privVarStart = itr.currentStart();
                        break;
                    }
                    if (LocaleUtils.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) {
                        sawPrefix = true;
                    }
                    itr.next();
                }
                if (privVarStart != -1) {
                    StringBuilder sb = new StringBuilder(variant);
                    if (sb.length() != 0) {
                        sb.append(BaseLocale.SEP);
                    }
                    sb.append(privuse.substring(privVarStart).replaceAll(LanguageTag.SEP,
                                                                         BaseLocale.SEP));
                    variant = sb.toString();
                }
            }
        }

        return BaseLocale.getInstance(language, script, region, variant);
    }

    public LocaleExtensions getLocaleExtensions() {
        if (LocaleUtils.isEmpty(extensions) && LocaleUtils.isEmpty(uattributes)
            && LocaleUtils.isEmpty(ukeywords)) {
            return null;
        }

        LocaleExtensions lext = new LocaleExtensions(extensions, uattributes, ukeywords);
        return lext.isEmpty() ? null : lext;
    }

    /*
     * Remove special private use subtag sequence identified by "lvariant"
     * and return the rest. Only used by LocaleExtensions
     */
    static String removePrivateuseVariant(String privuseVal) {
        StringTokenIterator itr = new StringTokenIterator(privuseVal, LanguageTag.SEP);

        // Note: privateuse value "abc-lvariant" is unchanged
        // because no subtags after "lvariant".

        int prefixStart = -1;
        boolean sawPrivuseVar = false;
        while (!itr.isDone()) {
            if (prefixStart != -1) {
                // Note: privateuse value "abc-lvariant" is unchanged
                // because no subtags after "lvariant".
                sawPrivuseVar = true;
                break;
            }
            if (LocaleUtils.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) {
                prefixStart = itr.currentStart();
            }
            itr.next();
        }
        if (!sawPrivuseVar) {
            return privuseVal;
        }

        assert(prefixStart == 0 || prefixStart > 1);
        return (prefixStart == 0) ? null : privuseVal.substring(0, prefixStart -1);
    }

    /*
     * Check if the given variant subtags separated by the given
     * separator(s) are valid
     */
    private int checkVariants(String variants, String sep) {
        StringTokenIterator itr = new StringTokenIterator(variants, sep);
        while (!itr.isDone()) {
            String s = itr.current();
            if (!LanguageTag.isVariant(s)) {
                return itr.currentStart();
            }
            itr.next();
        }
        return -1;
    }

    /*
     * Private methods parsing Unicode Locale Extension subtags.
     * Duplicated attributes/keywords will be ignored.
     * The input must be a valid extension subtags (excluding singleton).
     */
    private void setUnicodeLocaleExtension(String subtags) {
        // wipe out existing attributes/keywords
        if (uattributes != null) {
            uattributes.clear();
        }
        if (ukeywords != null) {
            ukeywords.clear();
        }

        StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP);

        // parse attributes
        while (!itr.isDone()) {
            if (!UnicodeLocaleExtension.isAttribute(itr.current())) {
                break;
            }
            if (uattributes == null) {
                uattributes = new HashSet<>(4);
            }
            uattributes.add(new CaseInsensitiveString(itr.current()));
            itr.next();
        }

        // parse keywords
        CaseInsensitiveString key = null;
        String type;
        int typeStart = -1;
        int typeEnd = -1;
        while (!itr.isDone()) {
            if (key != null) {
                if (UnicodeLocaleExtension.isKey(itr.current())) {
                    // next keyword - emit previous one
                    assert(typeStart == -1 || typeEnd != -1);
                    type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd);
                    if (ukeywords == null) {
                        ukeywords = new HashMap<>(4);
                    }
                    ukeywords.put(key, type);

                    // reset keyword info
                    CaseInsensitiveString tmpKey = new CaseInsensitiveString(itr.current());
                    key = ukeywords.containsKey(tmpKey) ? null : tmpKey;
                    typeStart = typeEnd = -1;
                } else {
                    if (typeStart == -1) {
                        typeStart = itr.currentStart();
                    }
                    typeEnd = itr.currentEnd();
                }
            } else if (UnicodeLocaleExtension.isKey(itr.current())) {
                // 1. first keyword or
                // 2. next keyword, but previous one was duplicate
                key = new CaseInsensitiveString(itr.current());
                if (ukeywords != null && ukeywords.containsKey(key)) {
                    // duplicate
                    key = null;
                }
            }

            if (!itr.hasNext()) {
                if (key != null) {
                    // last keyword
                    assert(typeStart == -1 || typeEnd != -1);
                    type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd);
                    if (ukeywords == null) {
                        ukeywords = new HashMap<>(4);
                    }
                    ukeywords.put(key, type);
                }
                break;
            }

            itr.next();
        }
    }

    static final class CaseInsensitiveString {
        private final String str, lowerStr;

        CaseInsensitiveString(String s) {
            str = s;
            lowerStr = LocaleUtils.toLowerString(s);
        }

        public String value() {
            return str;
        }

        @Override
        public int hashCode() {
            return lowerStr.hashCode();
        }

        @Override
        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (!(obj instanceof CaseInsensitiveString)) {
                return false;
            }
            return lowerStr.equals(((CaseInsensitiveString)obj).lowerStr);
        }
    }

    static final class CaseInsensitiveChar {
        private final char ch, lowerCh;

        /**
         * Constructs a CaseInsensitiveChar with the first char of the
         * given s.
         */
        private CaseInsensitiveChar(String s) {
            this(s.charAt(0));
        }

        CaseInsensitiveChar(char c) {
            ch = c;
            lowerCh = LocaleUtils.toLower(ch);
        }

        public char value() {
            return ch;
        }

        @Override
        public int hashCode() {
            return lowerCh;
        }

        @Override
        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (!(obj instanceof CaseInsensitiveChar)) {
                return false;
            }
            return lowerCh == ((CaseInsensitiveChar)obj).lowerCh;
        }
    }
}