8159214: jlink --include-locales problems
authornaoto
Mon, 18 Jul 2016 11:22:53 -0700
changeset 39734 7701942a1e7b
parent 39733 32059758b7bc
child 39743 10d92aa550aa
8159214: jlink --include-locales problems Reviewed-by: jlaskey, okutsu
jdk/src/java.base/share/classes/module-info.java
jdk/src/jdk.jlink/share/classes/jdk/tools/jlink/internal/plugins/IncludeLocalesPlugin.java
jdk/test/tools/jlink/plugins/IncludeLocalesPluginTest.java
--- a/jdk/src/java.base/share/classes/module-info.java	Mon Jul 18 14:39:21 2016 +0300
+++ b/jdk/src/java.base/share/classes/module-info.java	Mon Jul 18 11:22:53 2016 -0700
@@ -282,15 +282,18 @@
         jdk.security.auth;
     exports sun.text.resources to
         jdk.localedata;
-    exports sun.util.resources to
-        jdk.localedata;
+    exports sun.util.cldr to
+        jdk.jlink;
     exports sun.util.locale.provider to
         java.desktop,
+        jdk.jlink,
         jdk.localedata;
     exports sun.util.logging to
         java.desktop,
         java.logging,
         java.prefs;
+    exports sun.util.resources to
+        jdk.localedata;
 
     // JDK-internal service types
     uses jdk.internal.logger.DefaultLoggerFinder;
--- a/jdk/src/jdk.jlink/share/classes/jdk/tools/jlink/internal/plugins/IncludeLocalesPlugin.java	Mon Jul 18 14:39:21 2016 +0300
+++ b/jdk/src/jdk.jlink/share/classes/jdk/tools/jlink/internal/plugins/IncludeLocalesPlugin.java	Mon Jul 18 11:22:53 2016 -0700
@@ -24,6 +24,7 @@
  */
 package jdk.tools.jlink.internal.plugins;
 
+import java.util.AbstractMap;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.IllformedLocaleException;
@@ -31,6 +32,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import static java.util.ResourceBundle.Control;
 import java.util.Set;
 import java.util.function.Predicate;
 import java.util.regex.Pattern;
@@ -45,6 +47,10 @@
 import jdk.tools.jlink.plugin.PluginException;
 import jdk.tools.jlink.plugin.ModulePool;
 import jdk.tools.jlink.plugin.Plugin;
+import sun.util.cldr.CLDRBaseLocaleDataMetaInfo;
+import sun.util.locale.provider.LocaleProviderAdapter;
+import sun.util.locale.provider.LocaleProviderAdapter.Type;
+import sun.util.locale.provider.ResourceBundleBasedAdapter;
 
 /**
  * Plugin to explicitly specify the locale data included in jdk.localedata
@@ -95,6 +101,42 @@
     private List<Locale> available;
     private List<String> filtered;
 
+    private static final ResourceBundleBasedAdapter CLDR_ADAPTER =
+        (ResourceBundleBasedAdapter)LocaleProviderAdapter.forType(Type.CLDR);
+    private static final Map<Locale, String[]> CLDR_PARENT_LOCALES =
+        new CLDRBaseLocaleDataMetaInfo().parentLocales();
+
+    // Equivalent map
+    private static final Map<String, List<String>> EQUIV_MAP =
+        Stream.concat(
+            // COMPAT equivalence
+            Map.of(
+                "zh-Hans", List.of("zh-Hans", "zh-CN", "zh-SG"),
+                "zh-Hant", List.of("zh-Hant", "zh-HK", "zh-MO", "zh-TW"))
+                .entrySet()
+                .stream(),
+
+            // CLDR parent locales
+            CLDR_PARENT_LOCALES.entrySet().stream()
+                .map(entry -> {
+                    String parent = entry.getKey().toLanguageTag();
+                    List<String> children = new ArrayList<>();
+                    children.add(parent);
+
+                    Arrays.stream(entry.getValue())
+                        .filter(child -> !child.isEmpty())
+                        .flatMap(child ->
+                            Stream.concat(
+                                Arrays.stream(CLDR_PARENT_LOCALES.getOrDefault(
+                                    Locale.forLanguageTag(child), new String[0]))
+                                        .filter(grandchild -> !grandchild.isEmpty()),
+                                List.of(child).stream()))
+                        .distinct()
+                        .forEach(children::add);
+                    return new AbstractMap.SimpleEntry<String, List<String>>(parent, children);
+                })
+        ).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+
     // Special COMPAT provider locales
     private static final String jaJPJPTag = "ja-JP-JP";
     private static final String noNONYTag = "no-NO-NY";
@@ -152,16 +194,14 @@
     @Override
     public void configure(Map<String, String> config) {
         userParam = config.get(NAME);
-        priorityList = Arrays.stream(userParam.split(","))
-            .map(s -> {
-                try {
-                    return new Locale.LanguageRange(s);
-                } catch (IllegalArgumentException iae) {
-                    throw new IllegalArgumentException(String.format(
-                        PluginsResourceBundle.getMessage(NAME + ".invalidtag"), s));
-                }
-            })
-            .collect(Collectors.toList());
+
+        try {
+            priorityList = Locale.LanguageRange.parse(userParam, EQUIV_MAP);
+        } catch (IllegalArgumentException iae) {
+            throw new IllegalArgumentException(String.format(
+                PluginsResourceBundle.getMessage(NAME + ".invalidtag"),
+                    iae.getMessage().replaceFirst("^range=", "")));
+        }
     }
 
     @Override
@@ -193,6 +233,7 @@
             // jdk.localedata is not added.
             throw new PluginException(PluginsResourceBundle.getMessage(NAME + ".localedatanotfound"));
         }
+
         filtered = filterLocales(available);
 
         if (filtered.isEmpty()) {
@@ -205,56 +246,26 @@
                 filtered.stream().flatMap(s -> includeLocaleFilePatterns(s).stream()))
             .map(s -> "regex:" + s)
             .collect(Collectors.toList());
+
         predicate = ResourceFilter.includeFilter(value);
     }
 
     private List<String> includeLocaleFilePatterns(String tag) {
-        List<String> files = new ArrayList<>();
-        String pTag = tag.replaceAll("-", "_");
-        int lastDelimiter = tag.length();
-        String isoSpecial = pTag.matches("^(he|yi|id).*") ?
-                            pTag.replaceFirst("he", "iw")
-                                .replaceFirst("yi", "ji")
-                                .replaceFirst("id", "in") : "";
-
-        // Add tag patterns including parents
-        while (true) {
-            pTag = pTag.substring(0, lastDelimiter);
-            files.addAll(includeLocaleFiles(pTag));
-
-            if (!isoSpecial.isEmpty()) {
-                isoSpecial = isoSpecial.substring(0, lastDelimiter);
-                files.addAll(includeLocaleFiles(isoSpecial));
-            }
-
-            lastDelimiter = pTag.lastIndexOf('_');
-            if (lastDelimiter == -1) {
-                break;
-            }
+        // Ignore extension variations
+        if (tag.matches(".+-[a-z]-.+")) {
+            return List.of();
         }
 
-        final String lang = pTag;
-
-        // Add possible special locales of the COMPAT provider
-        Set.of(jaJPJPTag, noNONYTag, thTHTHTag).stream()
-            .filter(stag -> lang.equals(stag.substring(0,2)))
-            .map(t -> includeLocaleFiles(t.replaceAll("-", "_")))
-            .forEach(files::addAll);
-
-        // Add possible UN.M49 files (unconditional for now) for each language
-        files.addAll(includeLocaleFiles(lang + "_[0-9]{3}"));
-        if (!isoSpecial.isEmpty()) {
-            files.addAll(includeLocaleFiles(isoSpecial + "_[0-9]{3}"));
-        }
+        List<String> files = new ArrayList<>(includeLocaleFiles(tag.replaceAll("-", "_")));
 
         // Add Thai BreakIterator related data files
-        if (lang.equals("th")) {
+        if (tag.equals("th")) {
             files.add(".+sun/text/resources/thai_dict");
             files.add(".+sun/text/resources/[^_]+BreakIteratorData_th");
         }
 
         // Add Taiwan resource bundles for Hong Kong
-        if (tag.startsWith("zh-HK")) {
+        if (tag.equals("zh-HK")) {
             files.addAll(includeLocaleFiles("zh_TW"));
         }
 
@@ -306,6 +317,11 @@
         byte[] filteredBytes = filterLocales(locales).stream()
             .collect(Collectors.joining(" "))
             .getBytes();
+
+        if (filteredBytes.length > b.length) {
+            throw new InternalError("Size of filtered locales is bigger than the original one");
+        }
+
         System.arraycopy(filteredBytes, 0, b, 0, filteredBytes.length);
         Arrays.fill(b, filteredBytes.length, b.length, (byte)' ');
         return true;
@@ -314,6 +330,9 @@
     private List<String> filterLocales(List<Locale> locales) {
         List<String> ret =
             Locale.filter(priorityList, locales, Locale.FilteringMode.EXTENDED_FILTERING).stream()
+                .flatMap(loc -> Stream.concat(Control.getNoFallbackControl(Control.FORMAT_DEFAULT)
+                                     .getCandidateLocales("", loc).stream(),
+                                CLDR_ADAPTER.getCandidateLocales("", loc).stream()))
                 .map(loc ->
                     // Locale.filter() does not preserve the case, which is
                     // significant for "variant" equality. Retrieve the original
@@ -321,15 +340,12 @@
                     locales.stream()
                         .filter(l -> l.toString().equalsIgnoreCase(loc.toString()))
                         .findAny()
-                        .orElse(Locale.ROOT)
-                        .toLanguageTag())
+                        .orElse(Locale.ROOT))
+                .filter(loc -> !loc.equals(Locale.ROOT))
+                .flatMap(IncludeLocalesPlugin::localeToTags)
+                .distinct()
                 .collect(Collectors.toList());
 
-        // no-NO-NY.toLanguageTag() returns "nn-NO", so specially handle it here
-        if (ret.contains("no-NO")) {
-            ret.add(noNONYTag);
-        }
-
         return ret;
     }
 
@@ -338,6 +354,7 @@
         // ISO3166 compatibility
         tag = tag.replaceFirst("^iw", "he").replaceFirst("^ji", "yi").replaceFirst("^in", "id");
 
+        // Special COMPAT provider locales
         switch (tag) {
             case jaJPJPTag:
                 return jaJPJP;
@@ -351,4 +368,42 @@
                 return LOCALE_BUILDER.build();
         }
     }
+
+    private static Stream<String> localeToTags(Locale loc) {
+        String tag = loc.toLanguageTag();
+        Stream<String> ret = null;
+
+        switch (loc.getLanguage()) {
+            // ISO3166 compatibility
+            case "iw":
+                ret = List.of(tag, tag.replaceFirst("^he", "iw")).stream();
+                break;
+            case "in":
+                ret = List.of(tag, tag.replaceFirst("^id", "in")).stream();
+                break;
+            case "ji":
+                ret = List.of(tag, tag.replaceFirst("^yi", "ji")).stream();
+                break;
+
+            // Special COMPAT provider locales
+            case "ja":
+                if (loc.getCountry() == "JP") {
+                    ret = List.of(tag, jaJPJPTag).stream();
+                }
+                break;
+            case "no":
+            case "nn":
+                if (loc.getCountry() == "NO") {
+                    ret = List.of(tag, noNONYTag).stream();
+                }
+                break;
+            case "th":
+                if (loc.getCountry() == "TH") {
+                    ret = List.of(tag, thTHTHTag).stream();
+                }
+                break;
+        }
+
+        return ret == null ? List.of(tag).stream() : ret;
+    }
 }
--- a/jdk/test/tools/jlink/plugins/IncludeLocalesPluginTest.java	Mon Jul 18 14:39:21 2016 +0300
+++ b/jdk/test/tools/jlink/plugins/IncludeLocalesPluginTest.java	Mon Jul 18 11:22:53 2016 -0700
@@ -90,6 +90,71 @@
             "",
         },
 
+        // Asterisk works exactly the same as above
+        {
+            "*",
+            "jdk.localedata",
+            List.of(
+                "/jdk.localedata/sun/text/resources/ext/FormatData_en_GB.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_ja.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_th.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_zh.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_en_001.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_ja.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_th.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_zh.class"),
+            List.of(),
+            Arrays.stream(Locale.getAvailableLocales())
+                  // "(root)" for Locale.ROOT rather than ""
+                  .map(loc -> loc.equals(Locale.ROOT) ? "(root)" : loc.toString())
+                  .collect(Collectors.toList()),
+            "",
+        },
+
+        // World English/Spanish in Latin America
+        {
+            "--include-locales=en-001,es-419",
+            "jdk.localedata",
+            List.of(
+                "/jdk.localedata/sun/text/resources/ext/FormatData_en_AU.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_es.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_es_AR.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_en_001.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_en_150.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_en_AT.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_es.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_es_419.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_es_AR.class"),
+            List.of(
+                "/jdk.localedata/sun/text/resources/LineBreakIteratorData_th",
+                "/jdk.localedata/sun/text/resources/thai_dict",
+                "/jdk.localedata/sun/text/resources/WordBreakIteratorData_th",
+                "/jdk.localedata/sun/text/resources/ext/BreakIteratorInfo_th.class",
+                "/jdk.localedata/sun/text/resources/ext/BreakIteratorRules_th.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_ja.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_th.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_ja.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_th.class"),
+            List.of(
+                "(root)", "en", "en_US", "en_US_POSIX", "en_001", "en_150", "en_AG", "en_AI",
+                "en_AT", "en_AU", "en_BB", "en_BE", "en_BM", "en_BS", "en_BW", "en_BZ",
+                "en_CA", "en_CC", "en_CH", "en_CK", "en_CM", "en_CX", "en_CY", "en_DE",
+                "en_DG", "en_DK", "en_DM", "en_ER", "en_FI", "en_FJ", "en_FK", "en_FM",
+                "en_GB", "en_GD", "en_GG", "en_GH", "en_GI", "en_GM", "en_GY", "en_HK",
+                "en_IE", "en_IL", "en_IM", "en_IN", "en_IO", "en_JE", "en_JM", "en_KE",
+                "en_KI", "en_KN", "en_KY", "en_LC", "en_LR", "en_LS", "en_MG", "en_MO",
+                "en_MS", "en_MT", "en_MU", "en_MW", "en_MY", "en_NA", "en_NF", "en_NG",
+                "en_NL", "en_NR", "en_NU", "en_NZ", "en_PG", "en_PH", "en_PK", "en_PN",
+                "en_PW", "en_RW", "en_SB", "en_SC", "en_SD", "en_SE", "en_SG", "en_SH",
+                "en_SI", "en_SL", "en_SS", "en_SX", "en_SZ", "en_TC", "en_TK", "en_TO",
+                "en_TT", "en_TV", "en_TZ", "en_UG", "en_VC", "en_VG", "en_VU", "en_WS",
+                "en_ZA", "en_ZM", "en_ZW", "es", "es_419", "es_AR", "es_BO", "es_BR",
+                "es_CL", "es_CO", "es_CR", "es_CU", "es_DO", "es_EC", "es_GT", "es_HN",
+                "es_MX", "es_NI", "es_PA", "es_PE", "es_PR", "es_PY", "es_SV", "es_US",
+                "es_UY", "es_VE"),
+            "",
+        },
+
         // All English and Japanese locales
         {
             "--include-locales=en,ja",
@@ -128,6 +193,35 @@
             "",
         },
 
+        // All locales in Austria
+        {
+            "--include-locales=*-AT",
+            "jdk.localedata",
+            List.of(
+                "/jdk.localedata/sun/text/resources/ext/FormatData_de.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_de_AT.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_de.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_de_AT.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_en_001.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_en_150.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_en_AT.class"),
+            List.of(
+                "/jdk.localedata/sun/text/resources/LineBreakIteratorData_th",
+                "/jdk.localedata/sun/text/resources/thai_dict",
+                "/jdk.localedata/sun/text/resources/WordBreakIteratorData_th",
+                "/jdk.localedata/sun/text/resources/ext/BreakIteratorInfo_th.class",
+                "/jdk.localedata/sun/text/resources/ext/BreakIteratorRules_th.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_en_GB.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_ja.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_th.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_ja.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_th.class"),
+            List.of(
+                "(root)", "en", "en_US", "en_US_POSIX", "en_001", "en_150", "en_AT",
+                "de", "de_AT"),
+            "",
+        },
+
         // All locales in India
         {
             "--include-locales=*-IN",
@@ -154,10 +248,11 @@
                 "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_th.class",
                 "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_zh.class"),
             List.of(
-                "(root)", "as_IN", "bn_IN", "bo_IN", "brx_IN", "en", /* "en_001", */
-                "en_IN", "en_US", "en_US_POSIX", "gu_IN", "hi_IN", "kn_IN", "kok_IN",
-                "ks_IN", "ml_IN", "mr_IN", "ne_IN", "or_IN", "pa_IN", "pa_IN_#Guru",
-                "ta_IN", "te_IN", "ur_IN"),
+                "(root)", "as_IN", "as", "bn_IN", "bn", "bo_IN", "bo", "brx_IN", "brx",
+                "en", "en_001", "en_IN", "en_US", "en_US_POSIX", "gu_IN", "gu", "hi_IN",
+                "hi", "kn_IN", "kn", "kok_IN", "kok", "ks_IN", "ks", "ml_IN", "ml",
+                "mr_IN", "mr", "ne_IN", "ne", "or_IN", "or", "pa_IN", "pa", "pa_IN_#Guru",
+                "pa__#Guru", "ta_IN", "ta", "te_IN", "te", "ur_IN", "ur"),
             "",
         },
 
@@ -203,12 +298,40 @@
                 "/jdk.localedata/sun/text/resources/ext/FormatData_en_GB.class",
                 "/jdk.localedata/sun/text/resources/ext/FormatData_ja.class",
                 "/jdk.localedata/sun/text/resources/ext/FormatData_th.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_zh_CN.class",
                 "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_en_001.class",
                 "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_ja.class",
                 "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_th.class"),
             List.of(
-                "(root)", "en", "en_US", "en_US_POSIX", "zh_HK", "zh_HK_#Hans",
-                "zh_HK_#Hant"),
+                "(root)", "en", "en_US", "en_US_POSIX", "zh", "zh__#Hans", "zh__#Hant",
+                "zh_HK", "zh_HK_#Hans", "zh_HK_#Hant"),
+            "",
+        },
+
+        // Simplified Chinese
+        {
+            "--include-locales=zh-Hans",
+            "jdk.localedata",
+            List.of(
+                "/jdk.localedata/sun/text/resources/ext/FormatData_zh.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_zh_CN.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_zh_SG.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_zh.class"),
+            List.of(
+                "/jdk.localedata/sun/text/resources/LineBreakIteratorData_th",
+                "/jdk.localedata/sun/text/resources/thai_dict",
+                "/jdk.localedata/sun/text/resources/WordBreakIteratorData_th",
+                "/jdk.localedata/sun/text/resources/ext/BreakIteratorInfo_th.class",
+                "/jdk.localedata/sun/text/resources/ext/BreakIteratorRules_th.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_en_GB.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_ja.class",
+                "/jdk.localedata/sun/text/resources/ext/FormatData_th.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_en_001.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_ja.class",
+                "/jdk.localedata/sun/text/resources/cldr/ext/FormatData_th.class"),
+            List.of(
+                "(root)", "en", "en_US", "en_US_POSIX", "zh", "zh__#Hans", "zh_CN",
+                "zh_CN_#Hans", "zh_HK_#Hans", "zh_MO_#Hans", "zh_SG", "zh_SG_#Hans"),
             "",
         },
 
@@ -290,7 +413,7 @@
             null,
             null,
             new PluginException(String.format(
-                PluginsResourceBundle.getMessage("include-locales.invalidtag"), "zh_HK"))
+                PluginsResourceBundle.getMessage("include-locales.invalidtag"), "zh_hk"))
                 .getMessage(),
         },