make/jdk/src/classes/build/tools/generatelsrequivmaps/EquivMapsGenerator.java
changeset 47216 71c04702a3d5
parent 46042 a614479300d8
child 48373 f9152f462cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/jdk/src/classes/build/tools/generatelsrequivmaps/EquivMapsGenerator.java	Tue Sep 12 19:03:39 2017 +0200
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package build.tools.generatelsrequivmaps;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.time.ZoneId;
+import java.time.ZonedDateTime;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.TreeMap;
+
+/**
+ * This tool reads the IANA Language Subtag Registry data file downloaded from
+ * http://www.iana.org/assignments/language-subtag-registry, which is specified
+ * in the command line and generates a .java source file as specified in
+ * command line. The generated .java source file contains equivalent language
+ * maps. These equivalent language maps are used by LocaleMatcher.java
+ * for the locale matching mechanism specified in RFC 4647 "Matching of Language
+ * Tags".
+ */
+public class EquivMapsGenerator {
+
+    public static void main(String[] args) throws Exception {
+        if (args.length != 2) {
+            System.err.println("Usage: java EquivMapsGenerator"
+                    + " language-subtag-registry.txt LocaleEquivalentMaps.java");
+            System.exit(1);
+        }
+        readLSRfile(args[0]);
+        generateEquivalentMap();
+        generateSourceCode(args[1]);
+    }
+
+    private static String LSRrevisionDate;
+    private static Map<String, StringBuilder> initialLanguageMap =
+        new TreeMap<>();
+    private static Map<String, StringBuilder> initialRegionVariantMap =
+        new TreeMap<>();
+
+    private static Map<String, String> sortedLanguageMap1 = new TreeMap<>();
+    private static Map<String, String[]> sortedLanguageMap2 = new TreeMap<>();
+    private static Map<String, String> sortedRegionVariantMap =
+        new TreeMap<>();
+
+    private static void readLSRfile(String filename) throws Exception {
+        String type = null;
+        String tag = null;
+        String preferred = null;
+
+        for (String line : Files.readAllLines(Paths.get(filename),
+                                              Charset.forName("UTF-8"))) {
+            line = line.toLowerCase(Locale.ROOT);
+            int index = line.indexOf(' ')+1;
+            if (line.startsWith("file-date:")) {
+                LSRrevisionDate = line.substring(index);
+            } else if (line.startsWith("type:")) {
+                type = line.substring(index);
+            } else if (line.startsWith("tag:") || line.startsWith("subtag:")) {
+                tag = line.substring(index);
+            } else if (line.startsWith("preferred-value:")
+                       && !type.equals("extlang")) {
+                preferred = line.substring(index);
+                processDeprecatedData(type, tag, preferred);
+            } else if (line.equals("%%")) {
+                type = null;
+                tag = null;
+            }
+        }
+    }
+
+    private static void processDeprecatedData(String type,
+                                              String tag,
+                                              String preferred) {
+        StringBuilder sb;
+        if (type.equals("region") || type.equals("variant")) {
+            if (!initialRegionVariantMap.containsKey(preferred)) {
+                sb = new StringBuilder("-");
+                sb.append(preferred);
+                sb.append(",-");
+                sb.append(tag);
+                initialRegionVariantMap.put("-"+preferred, sb);
+            } else {
+                throw new RuntimeException("New case, need implementation."
+                    + " A region/variant subtag \"" + preferred
+                    + "\" is registered for more than one subtags.");
+            }
+        } else { // language, grandfahered, and redundant
+            if (!initialLanguageMap.containsKey(preferred)) {
+                sb = new StringBuilder(preferred);
+                sb.append(',');
+                sb.append(tag);
+                initialLanguageMap.put(preferred, sb);
+            } else {
+                sb = initialLanguageMap.get(preferred);
+                sb.append(',');
+                sb.append(tag);
+                initialLanguageMap.put(preferred, sb);
+            }
+        }
+    }
+
+    private static void generateEquivalentMap() {
+        String[] subtags;
+        for (String preferred : initialLanguageMap.keySet()) {
+            subtags = initialLanguageMap.get(preferred).toString().split(",");
+
+            if (subtags.length == 2) {
+                sortedLanguageMap1.put(subtags[0], subtags[1]);
+                sortedLanguageMap1.put(subtags[1], subtags[0]);
+            } else if (subtags.length > 2) {
+                for (int i = 0; i < subtags.length; i++) {
+                    sortedLanguageMap2.put(subtags[i], createLangArray(i, subtags));
+                }
+            } else {
+                    throw new RuntimeException("New case, need implementation."
+                        + " A language subtag \"" + preferred
+                        + "\" is registered for more than two subtags. ");
+            }
+        }
+
+        for (String preferred : initialRegionVariantMap.keySet()) {
+            subtags =
+                initialRegionVariantMap.get(preferred).toString().split(",");
+
+            sortedRegionVariantMap.put(subtags[0], subtags[1]);
+            sortedRegionVariantMap.put(subtags[1], subtags[0]);
+        }
+
+    }
+
+    /* create the array of subtags excluding the subtag at index location */
+    private static String[] createLangArray(int index, String[] subtags) {
+        List<String> list = new ArrayList<>();
+        for (int i = 0; i < subtags.length; i++) {
+            if (i != index) {
+                list.add(subtags[i]);
+            }
+        }
+        return list.toArray(new String[list.size()]);
+    }
+
+    private static String generateValuesString(String[] values) {
+        String outputStr = "";
+        for (int i = 0; i < values.length; i++) {
+            if (i != values.length - 1) {
+                outputStr = outputStr + "\"" + values[i] + "\", ";
+            } else {
+                outputStr = outputStr + "\"" + values[i] + "\"";
+            }
+
+        }
+        return outputStr;
+    }
+
+    private static final String COPYRIGHT = "/*\n"
+        + " * Copyright (c) 2012, %d, Oracle and/or its affiliates. All rights reserved.\n"
+        + " * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.\n"
+        + " *\n"
+        + " * This code is free software; you can redistribute it and/or modify it\n"
+        + " * under the terms of the GNU General Public License version 2 only, as\n"
+        + " * published by the Free Software Foundation.  Oracle designates this\n"
+        + " * particular file as subject to the \"Classpath\" exception as provided\n"
+        + " * by Oracle in the LICENSE file that accompanied this code.\n"
+        + " *\n"
+        + " * This code is distributed in the hope that it will be useful, but WITHOUT\n"
+        + " * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n"
+        + " * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"
+        + " * version 2 for more details (a copy is included in the LICENSE file that\n"
+        + " * accompanied this code).\n"
+        + " *\n"
+        + " * You should have received a copy of the GNU General Public License version\n"
+        + " * 2 along with this work; if not, write to the Free Software Foundation,\n"
+        + " * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.\n"
+        + " *\n"
+        + " * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA\n"
+        + " * or visit www.oracle.com if you need additional information or have any\n"
+        + " * questions.\n"
+        + "*/\n\n";
+
+    private static final String headerText =
+        "package sun.util.locale;\n\n"
+        + "import java.util.HashMap;\n"
+        + "import java.util.Map;\n\n"
+        + "final class LocaleEquivalentMaps {\n\n"
+        + "    static final Map<String, String> singleEquivMap;\n"
+        + "    static final Map<String, String[]> multiEquivsMap;\n"
+        + "    static final Map<String, String> regionVariantEquivMap;\n\n"
+        + "    static {\n"
+        + "        singleEquivMap = new HashMap<>();\n"
+        + "        multiEquivsMap = new HashMap<>();\n"
+        + "        regionVariantEquivMap = new HashMap<>();\n\n"
+        + "        // This is an auto-generated file and should not be manually edited.\n";
+
+    private static final String footerText =
+        "    }\n\n"
+        + "}";
+
+    private static String getOpenJDKCopyright() {
+        int year = ZonedDateTime.now(ZoneId
+                .of("America/Los_Angeles")).getYear();
+        return String.format(Locale.US, COPYRIGHT, year);
+    }
+
+    /**
+     * The input lsr data file is in UTF-8, so theoretically for the characters
+     * beyond US-ASCII, the generated Java String literals need to be Unicode
+     * escaped (\\uXXXX) while writing to a file. But as of now, there is not
+     * the case since we don't use "description", "comment" or alike.
+     */
+    private static void generateSourceCode(String fileName) {
+
+        try (BufferedWriter writer = Files.newBufferedWriter(
+                Paths.get(fileName))) {
+            writer.write(getOpenJDKCopyright());
+            writer.write(headerText
+                + "        //   LSR Revision: " + LSRrevisionDate);
+            writer.newLine();
+
+            for (String key : sortedLanguageMap1.keySet()) {
+                String value = sortedLanguageMap1.get(key);
+                writer.write("        singleEquivMap.put(\""
+                    + key + "\", \"" + value + "\");");
+                writer.newLine();
+            }
+
+            writer.newLine();
+            for (String key : sortedLanguageMap2.keySet()) {
+                String[] values = sortedLanguageMap2.get(key);
+
+                if (values.length >= 2) {
+                    writer.write("        multiEquivsMap.put(\""
+                        + key + "\", new String[] {"
+                        + generateValuesString(values) + "});");
+                    writer.newLine();
+                }
+            }
+
+            writer.newLine();
+            for (String key : sortedRegionVariantMap.keySet()) {
+                String value = sortedRegionVariantMap.get(key);
+                writer.write("        regionVariantEquivMap.put(\""
+                    + key + "\", \"" + value + "\");");
+                writer.newLine();
+            }
+
+            writer.write(footerText);
+        } catch (IOException ex) {
+            ex.printStackTrace(System.err);
+            System.exit(1);
+        }
+
+    }
+
+}