8179071: Month value is inconsistent between CLDR and Java in some locales
authorrgoel
Mon, 30 Apr 2018 11:59:42 +0530
changeset 49918 8b9c78f0a712
parent 49917 1871c5d07caf
child 49919 96d4658eb7f2
8179071: Month value is inconsistent between CLDR and Java in some locales Summary: handled Language aliases from CLDR SupplementalMetaData Reviewed-by: naoto
make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java
make/jdk/src/classes/build/tools/cldrconverter/ResourceBundleGenerator.java
make/jdk/src/classes/build/tools/cldrconverter/SupplementalMetadataParseHandler.java
src/java.base/share/classes/sun/util/cldr/CLDRLocaleProviderAdapter.java
src/java.base/share/classes/sun/util/locale/provider/LocaleDataMetaInfo.java
test/jdk/java/util/Locale/Bug8179071.java
test/jdk/sun/text/resources/LocaleData.cldr
test/jdk/sun/text/resources/LocaleDataTest.java
test/jdk/tools/jlink/plugins/IncludeLocalesPluginTest.java
--- a/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java	Fri Apr 27 15:55:29 2018 -0700
+++ b/make/jdk/src/classes/build/tools/cldrconverter/CLDRConverter.java	Mon Apr 30 11:59:42 2018 +0530
@@ -90,8 +90,8 @@
     static final String[] EMPTY_ZONE = {"", "", "", "", "", ""};
 
     private static SupplementDataParseHandler handlerSuppl;
-    private static SupplementalMetadataParseHandler handlerSupplMeta;
     private static LikelySubtagsParseHandler handlerLikelySubtags;
+    static SupplementalMetadataParseHandler handlerSupplMeta;
     static NumberingSystemsParseHandler handlerNumbering;
     static MetaZonesParseHandler handlerMetaZones;
     static TimeZoneParseHandler handlerTimeZone;
@@ -428,7 +428,7 @@
         parseLDMLFile(new File(LIKELYSUBTAGS_SOURCE_FILE), handlerLikelySubtags);
 
         // Parse supplementalMetadata
-        // Currently only interested in deprecated time zone ids.
+        // Currently interested in deprecated time zone ids and language aliases.
         handlerSupplMeta = new SupplementalMetadataParseHandler();
         parseLDMLFile(new File(SPPL_META_SOURCE_FILE), handlerSupplMeta);
     }
--- a/make/jdk/src/classes/build/tools/cldrconverter/ResourceBundleGenerator.java	Fri Apr 27 15:55:29 2018 -0700
+++ b/make/jdk/src/classes/build/tools/cldrconverter/ResourceBundleGenerator.java	Mon Apr 30 11:59:42 2018 +0530
@@ -270,7 +270,8 @@
             out.printf("public class %s implements LocaleDataMetaInfo {\n", className);
             out.printf("    private static final Map<String, String> resourceNameToLocales = new HashMap<>();\n" +
                        (CLDRConverter.isBaseModule ?
-                       "    private static final Map<Locale, String[]> parentLocalesMap = new HashMap<>();\n\n" :
+                       "    private static final Map<Locale, String[]> parentLocalesMap = new HashMap<>();\n" +
+                       "    private static final Map<String, String> languageAliasMap = new HashMap<>();\n\n" :
                        "\n") +
                        "    static {\n");
 
@@ -301,10 +302,16 @@
                 } else {
                     if ("AvailableLocales".equals(key)) {
                         out.printf("        resourceNameToLocales.put(\"%s\",\n", key);
-                        out.printf("              \"%s\");\n", toLocaleList(metaInfo.get(key), false));
+                        out.printf("              \"%s\");\n", toLocaleList(applyLanguageAliases(metaInfo.get(key)), false));
                     }
                 }
             }
+            // for languageAliasMap
+            if (CLDRConverter.isBaseModule) {
+                CLDRConverter.handlerSupplMeta.getLanguageAliasData().forEach((key, value) -> {
+                    out.printf("                languageAliasMap.put(\"%s\", \"%s\");\n", key, value);
+                });
+            }
 
             out.printf("    }\n\n");
 
@@ -340,6 +347,10 @@
 
             if (CLDRConverter.isBaseModule) {
                 out.printf("    @Override\n" +
+                           "    public Map<String, String> getLanguageAliasMap() {\n" +
+                           "        return languageAliasMap;\n" +
+                           "    }\n\n");
+                out.printf("    @Override\n" +
                            "    public Map<String, String> tzCanonicalIDs() {\n" +
                            "        return TZCanonicalIDMapHolder.tzCanonicalIDMap;\n" +
                            "    }\n\n");
@@ -377,4 +388,13 @@
         }
         return sb.toString();
     }
+
+    private static SortedSet<String> applyLanguageAliases(SortedSet<String> tags) {
+        CLDRConverter.handlerSupplMeta.getLanguageAliasData().forEach((key, value) -> {
+            if (tags.remove(key)) {
+                tags.add(value);
+            }
+        });
+        return tags;
+    }
 }
--- a/make/jdk/src/classes/build/tools/cldrconverter/SupplementalMetadataParseHandler.java	Fri Apr 27 15:55:29 2018 -0700
+++ b/make/jdk/src/classes/build/tools/cldrconverter/SupplementalMetadataParseHandler.java	Mon Apr 30 11:59:42 2018 +0530
@@ -27,6 +27,8 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.stream.Stream;
 import org.xml.sax.Attributes;
 import org.xml.sax.InputSource;
@@ -38,6 +40,12 @@
  */
 
 class SupplementalMetadataParseHandler extends AbstractLDMLHandler<Object> {
+    private final Map<String, String> languageAliasMap;
+
+    SupplementalMetadataParseHandler() {
+        languageAliasMap = new HashMap<>();
+    }
+
     @Override
     public InputSource resolveEntity(String publicID, String systemID) throws IOException, SAXException {
         // avoid HTTP traffic to unicode.org
@@ -57,6 +65,17 @@
             }
             pushIgnoredContainer(qName);
             break;
+        case "languageAlias":
+            String aliasReason = attributes.getValue("reason");
+            if ("deprecated".equals(aliasReason) || "legacy".equals(aliasReason)) {
+                String tag = attributes.getValue("type");
+                if (!checkLegacyLocales(tag)) {
+                   languageAliasMap.put(tag.replaceAll("_", "-"),
+                   attributes.getValue("replacement").replaceAll("_", "-"));
+                }
+            }
+            pushIgnoredContainer(qName);
+            break;
         default:
             // treat anything else as a container
             pushContainer(qName, attributes);
@@ -69,4 +88,13 @@
                 .map(k -> String.format("        \"%s\", \"%s\",", k, get(k)))
                 .sorted();
     }
+    Map<String, String> getLanguageAliasData() {
+        return languageAliasMap;
+    }
+
+    // skip language aliases for JDK legacy locales for ISO compatibility
+    private boolean checkLegacyLocales(String tag) {
+        return (tag.startsWith("no") || tag.startsWith("in")
+                || tag.startsWith("iw") || tag.startsWith("ji"));
+    }
 }
--- a/src/java.base/share/classes/sun/util/cldr/CLDRLocaleProviderAdapter.java	Fri Apr 27 15:55:29 2018 -0700
+++ b/src/java.base/share/classes/sun/util/cldr/CLDRLocaleProviderAdapter.java	Mon Apr 30 11:59:42 2018 +0530
@@ -64,8 +64,14 @@
 
     // parent locales map
     private static volatile Map<Locale, Locale> parentLocalesMap;
+    // language aliases map
+    private static volatile Map<String,String> langAliasesMap;
+    // cache to hold  locale to locale mapping for language aliases.
+    private static final Map<Locale, Locale> langAliasesCache;
     static {
         parentLocalesMap = new ConcurrentHashMap<>();
+        langAliasesMap = new ConcurrentHashMap<>();
+        langAliasesCache = new ConcurrentHashMap<>();
         // Assuming these locales do NOT have irregular parent locales.
         parentLocalesMap.put(Locale.ROOT, Locale.ROOT);
         parentLocalesMap.put(Locale.ENGLISH, Locale.ENGLISH);
@@ -160,6 +166,22 @@
         return locs;
     }
 
+    private Locale applyAliases(Locale loc) {
+        if (langAliasesMap.isEmpty()) {
+            langAliasesMap = baseMetaInfo.getLanguageAliasMap();
+        }
+        Locale locale = langAliasesCache.get(loc);
+        if (locale == null) {
+            String locTag = loc.toLanguageTag();
+            Locale aliasLocale = langAliasesMap.containsKey(locTag)
+                    ? Locale.forLanguageTag(langAliasesMap.get(locTag)) : loc;
+            langAliasesCache.putIfAbsent(loc, aliasLocale);
+            return aliasLocale;
+        } else {
+            return locale;
+        }
+    }
+
     @Override
     protected Set<String> createLanguageTagSet(String category) {
         // Assume all categories support the same set as AvailableLocales
@@ -194,7 +216,7 @@
     // Implementation of ResourceBundleBasedAdapter
     @Override
     public List<Locale> getCandidateLocales(String baseName, Locale locale) {
-        List<Locale> candidates = super.getCandidateLocales(baseName, locale);
+        List<Locale> candidates = super.getCandidateLocales(baseName, applyAliases(locale));
         return applyParentLocales(baseName, candidates);
     }
 
--- a/src/java.base/share/classes/sun/util/locale/provider/LocaleDataMetaInfo.java	Fri Apr 27 15:55:29 2018 -0700
+++ b/src/java.base/share/classes/sun/util/locale/provider/LocaleDataMetaInfo.java	Mon Apr 30 11:59:42 2018 +0530
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -58,4 +58,13 @@
     default public Map<String, String>  tzCanonicalIDs() {
         return null;
     }
+
+    /**
+     * Returns a map for  language aliases which specifies mapping from source language
+     * to from which it should be replaced.
+     * @return map of source language to replacement language, separated by a space.
+     */
+   default public Map<String, String> getLanguageAliasMap(){
+       return null;
+   }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/util/Locale/Bug8179071.java	Mon Apr 30 11:59:42 2018 +0530
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+ /*
+ * @test
+ * @bug 8179071
+ * @summary Test that language aliases of CLDR supplemental metadata are handled correctly.
+ * @modules jdk.localedata
+ * @run main/othervm -Djava.locale.providers=CLDR Bug8179071
+ */
+
+/**
+ * This fix is dependent on a particular version of CLDR data.
+ */
+
+import java.time.Month;
+import java.time.format.TextStyle;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+
+public class Bug8179071 {
+
+    // Deprecated and Legacy tags.
+    private static final Set<String> LegacyAliases = Set.of("pa-PK", "ug-Arab-CN", "kk-Cyrl-KZ",
+            "bs-BA", "ks-Arab-IN", "mn-Cyrl-MN", "ha-Latn-NE",
+            "shi-MA", "ha-Latn-NG", "ms-Latn-BN","ms-Latn-SG",
+            "ky-Cyrl-KG", "az-AZ", "zh-guoyu", "zh-min-nan", "i-klingon", "i-tsu",
+            "sr-XK", "sgn-CH-DE", "mo", "i-tay", "scc", "uz-UZ", "uz-AF", "sr-RS",
+            "i-hak", "sgn-BE-FR", "i-lux", "vai-LR", "tl", "zh-hakka", "i-ami", "aa-SAAHO", "ha-Latn-GH",
+            "zh-xiang", "i-pwn", "sgn-BE-NL", "jw", "sh", "tzm-Latn-MA", "i-bnn");
+    // expected month format data for  locales after language aliases replacement.
+    private static Map<String, String> shortJanuaryNames = Map.of( "pa-PK", "\u062c\u0646\u0648\u0631\u06cc",
+                                                          "uz-AF" , "\u062c\u0646\u0648",
+                                                          "sr-ME", "jan",
+                                                          "scc", "\u0458\u0430\u043d",
+                                                          "sh", "jan",
+                                                          "ha-Latn-NE", "Jan",
+                                                          "i-lux", "Jan.");
+
+
+    private static void test(String tag, String expected) {
+        Locale target = Locale.forLanguageTag(tag);
+        Month day = Month.JANUARY;
+        TextStyle style = TextStyle.SHORT;
+        String actual = day.getDisplayName(style, target);
+        if (!actual.equals(expected)) {
+            throw new RuntimeException("failed for locale  " + tag + " actual output " + actual +"  does not match with  " + expected);
+        }
+    }
+
+    /**
+     * getAvailableLocales() should not contain any deprecated or Legacy language tags
+     */
+    private static void checkInvalidTags() {
+        Set<String> invalidTags = new HashSet<>();
+        Arrays.asList(Locale.getAvailableLocales()).stream()
+                .map(loc -> loc.toLanguageTag())
+                .forEach( tag -> {if(LegacyAliases.contains(tag)) {invalidTags.add(tag);}});
+        if (!invalidTags.isEmpty()) {
+          throw new RuntimeException("failed: Deprecated and Legacy tags found  " + invalidTags  + " in AvailableLocales ");
+        }
+    }
+
+    public static void main(String[] args) {
+        shortJanuaryNames.forEach((key, value) -> test(key, value));
+        checkInvalidTags();
+    }
+}
--- a/test/jdk/sun/text/resources/LocaleData.cldr	Fri Apr 27 15:55:29 2018 -0700
+++ b/test/jdk/sun/text/resources/LocaleData.cldr	Mon Apr 30 11:59:42 2018 +0530
@@ -5419,10 +5419,10 @@
 FormatData/sr_BA/MonthNames/6=\u0458\u0443\u043b
 FormatData/sr_BA/DayNames/3=\u0441\u0440\u0435\u0434\u0430
 FormatData/sr_BA/DayAbbreviations/3=\u0441\u0440\u0435
-FormatData/sr_BA/TimePatterns/0=HH.mm.ss zzzz
-FormatData/sr_BA/TimePatterns/1=HH.mm.ss z
-FormatData/sr_BA/TimePatterns/2=HH.mm.ss
-FormatData/sr_BA/TimePatterns/3=HH.mm
+FormatData/sr_BA/TimePatterns/0=HH:mm:ss zzzz
+FormatData/sr_BA/TimePatterns/1=HH:mm:ss z
+FormatData/sr_BA/TimePatterns/2=HH:mm:ss
+FormatData/sr_BA/TimePatterns/3=HH:mm
 FormatData/sr_BA/DatePatterns/0=EEEE, dd. MMMM y.
 FormatData/sr_BA/DatePatterns/1=dd. MMMM y.
 FormatData/sr_BA/DatePatterns/2=dd.MM.y.
--- a/test/jdk/sun/text/resources/LocaleDataTest.java	Fri Apr 27 15:55:29 2018 -0700
+++ b/test/jdk/sun/text/resources/LocaleDataTest.java	Mon Apr 30 11:59:42 2018 +0530
@@ -38,7 +38,7 @@
  *      7114053 7074882 7040556 8008577 8013836 8021121 6192407 6931564 8027695
  *      8017142 8037343 8055222 8042126 8074791 8075173 8080774 8129361 8134916
  *      8145136 8145952 8164784 8037111 8081643 7037368 8178872 8185841 8190918
- *      8187946 8195478 8181157
+ *      8187946 8195478 8181157 8179071
  * @summary Verify locale data
  * @modules java.base/sun.util.resources
  * @modules jdk.localedata
--- a/test/jdk/tools/jlink/plugins/IncludeLocalesPluginTest.java	Fri Apr 27 15:55:29 2018 -0700
+++ b/test/jdk/tools/jlink/plugins/IncludeLocalesPluginTest.java	Mon Apr 30 11:59:42 2018 +0530
@@ -40,7 +40,7 @@
 
 /*
  * @test
- * @bug 8152143 8152704 8155649 8165804 8185841 8176841 8190918
+ * @bug 8152143 8152704 8155649 8165804 8185841 8176841 8190918 8179071
  * @summary IncludeLocalesPlugin tests
  * @author Naoto Sato
  * @requires (vm.compMode != "Xcomp" & os.maxMemory >= 2g)
@@ -256,7 +256,7 @@
                 "(root)", "as_IN", "as", "bn_IN", "bn", "bo_IN", "bo", "brx_IN", "brx",
                 "en", "en_001", "en_IN", "en_US", "en_US_POSIX", "gu_IN", "gu", "hi_IN",
                 "hi", "kn_IN", "kn", "kok_IN", "kok", "ks_IN", "ks", "ml_IN", "ml",
-                "mr_IN", "mr", "ne_IN", "ne", "or_IN", "or", "pa_IN", "pa", "pa_IN_#Guru",
+                "mr_IN", "mr", "ne_IN", "ne", "or_IN", "or", "pa", "pa_IN_#Guru",
                 "pa__#Guru", "ta_IN", "ta", "te_IN", "te", "ur_IN", "ur"),
             "",
         },