8204938: Add a test case to automatically check the updated LSR data
authornishjain
Thu, 21 Jun 2018 14:09:43 +0530
changeset 50692 5b75d7485f2a
parent 50691 cb652b90be5e
child 50693 db0a17475826
8204938: Add a test case to automatically check the updated LSR data Reviewed-by: naoto, rriggs
test/jdk/java/util/Locale/LSRDataTest.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/util/Locale/LSRDataTest.java	Thu Jun 21 14:09:43 2018 +0530
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8204938
+ * @summary Checks the IANA language subtag registry data update
+ *          with Locale.LanguageRange parse method.
+ * @run main LSRDataTest
+ */
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Locale;
+import java.util.Locale.LanguageRange;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static java.util.Locale.LanguageRange.MAX_WEIGHT;
+import static java.util.Locale.LanguageRange.MIN_WEIGHT;
+
+public class LSRDataTest {
+
+    private static final char HYPHEN = '-';
+    private static final Map<String, String> singleLangEquivMap = new HashMap<>();
+    private static final Map<String, List<String>> multiLangEquivsMap = new HashMap<>();
+    private static final Map<String, String> regionVariantEquivMap = new HashMap<>();
+
+    // path to the lsr file from the make folder, this test relies on the
+    // relative path to the file in the make folder, considering
+    // test and make will always exist in the same jdk layout
+    private static final String LSR_FILE_PATH = System.getProperty("test.src", ".")
+                + "/../../../../../make/data/lsrdata/language-subtag-registry.txt";
+
+    public static void main(String[] args) throws IOException {
+
+        loadLSRData(Paths.get(LSR_FILE_PATH).toRealPath());
+
+        // checking the tags with weight
+        String ranges = "Accept-Language: aam, adp, aue, bcg, cqu, ema,"
+                + " en-gb-oed, gti, koj, kwq, kxe, lii, lmm, mtm, ngv,"
+                + " oyb, phr, pub, suj, taj;q=0.9, yug;q=0.5, gfx;q=0.4";
+        List<LanguageRange> expected = parse(ranges);
+        List<LanguageRange> actual = LanguageRange.parse(ranges);
+        checkEquality(actual, expected);
+
+        // checking all language ranges
+        ranges = generateLangRanges();
+        expected = parse(ranges);
+        actual = LanguageRange.parse(ranges);
+        checkEquality(actual, expected);
+
+        // checking all region/variant ranges
+        ranges = generateRegionRanges();
+        expected = parse(ranges);
+        actual = LanguageRange.parse(ranges);
+        checkEquality(actual, expected);
+
+    }
+
+    // generate range string containing all equiv language tags
+    private static String generateLangRanges() {
+        return Stream.concat(singleLangEquivMap.keySet().stream(), multiLangEquivsMap
+                .keySet().stream()).collect(Collectors.joining(","));
+    }
+
+    // generate range string containing all equiv region tags
+    private static String generateRegionRanges() {
+        return regionVariantEquivMap.keySet().stream()
+                .map(r -> "en".concat(r)).collect(Collectors.joining(", "));
+    }
+
+    // load LSR data from the file
+    private static void loadLSRData(Path path) throws IOException {
+        String type = null;
+        String tag = null;
+        String preferred;
+
+        for (String line : Files.readAllLines(path, Charset.forName("UTF-8"))) {
+            line = line.toLowerCase(Locale.ROOT);
+            int index = line.indexOf(' ') + 1;
+            if (line.startsWith("type:")) {
+                type = line.substring(index);
+            } else if (line.startsWith("tag:") || line.startsWith("subtag:")) {
+                tag = line.substring(index);
+            } else if (line.startsWith("preferred-value:") && !type.equals("extlang")) {
+                preferred = line.substring(index);
+                processDataAndGenerateMaps(type, tag, preferred);
+            } else if (line.equals("%%")) {
+                type = null;
+                tag = null;
+            }
+        }
+    }
+
+    private static void processDataAndGenerateMaps(String type,
+            String tag,
+            String preferred) {
+        StringBuilder sb;
+        if (type.equals("region") || type.equals("variant")) {
+            if (!regionVariantEquivMap.containsKey(preferred)) {
+                String tPref = HYPHEN + preferred;
+                String tTag = HYPHEN + tag;
+                regionVariantEquivMap.put(tPref, tTag);
+                regionVariantEquivMap.put(tTag, tPref);
+            } else {
+                throw new RuntimeException("New case, need implementation."
+                        + " A region/variant subtag \"" + preferred
+                        + "\" is registered for more than one subtags.");
+            }
+        } else { // language, grandfathered, and redundant
+            if (!singleLangEquivMap.containsKey(preferred)
+                    && !multiLangEquivsMap.containsKey(preferred)) {
+                // new entry add it into single equiv map
+                singleLangEquivMap.put(preferred, tag);
+                singleLangEquivMap.put(tag, preferred);
+            } else if (singleLangEquivMap.containsKey(preferred)
+                    && !multiLangEquivsMap.containsKey(preferred)) {
+                String value = singleLangEquivMap.get(preferred);
+                List<String> subtags = List.of(preferred, value, tag);
+                // remove from single eqiv map before adding to multi equiv
+                singleLangEquivMap.keySet().removeAll(subtags);
+                addEntriesToMultiEquivsMap(subtags);
+            } else if (multiLangEquivsMap.containsKey(preferred)
+                    && !singleLangEquivMap.containsKey(preferred)) {
+                List<String> subtags = multiLangEquivsMap.get(preferred);
+                // should use the order preferred, subtags, tag to keep the
+                // expected order same as the JDK API in multi equivalent maps
+                subtags.add(0, preferred);
+                subtags.add(tag);
+                addEntriesToMultiEquivsMap(subtags);
+            }
+        }
+    }
+
+    // Add entries into the multi equivalent map from the given subtags
+    private static void addEntriesToMultiEquivsMap(List<String> subtags) {
+        // for each subtag within the given subtags, add an entry in multi
+        // equivalent language map with subtag as the key and the value
+        // as the list of all subtags excluding the one which is getting
+        // traversed
+        subtags.forEach(subtag -> multiLangEquivsMap.put(subtag, subtags.stream()
+                .filter(t -> !t.equals(subtag))
+                .collect(Collectors.toList())));
+    }
+
+    private static List<LanguageRange> parse(String ranges) {
+        ranges = ranges.replace(" ", "").toLowerCase(Locale.ROOT);
+        if (ranges.startsWith("accept-language:")) {
+            ranges = ranges.substring(16);
+        }
+        String[] langRanges = ranges.split(",");
+        List<LanguageRange> priorityList = new ArrayList<>(langRanges.length);
+        int numOfRanges = 0;
+        for (String range : langRanges) {
+            int wIndex = range.indexOf(";q=");
+            String tag;
+            double weight = 0.0;
+            if (wIndex == -1) {
+                tag = range;
+                weight = MAX_WEIGHT;
+            } else {
+                tag = range.substring(0, wIndex);
+                try {
+                    weight = Double.parseDouble(range.substring(wIndex + 3));
+                } catch (RuntimeException ex) {
+                    throw new IllegalArgumentException("weight= " + weight + " for"
+                            + " language range \"" + tag + "\", should be"
+                            + " represented as a double");
+                }
+
+                if (weight < MIN_WEIGHT || weight > MAX_WEIGHT) {
+                    throw new IllegalArgumentException("weight=" + weight
+                            + " for language range \"" + tag
+                            + "\", must be between " + MIN_WEIGHT
+                            + " and " + MAX_WEIGHT + ".");
+                }
+            }
+
+            LanguageRange entry = new LanguageRange(tag, weight);
+            if (!priorityList.contains(entry)) {
+
+                int index = numOfRanges;
+                // find the index in the list to add the current range at the
+                // correct index sorted by the descending order of weight
+                for (int i = 0; i < priorityList.size(); i++) {
+                    if (priorityList.get(i).getWeight() < weight) {
+                        index = i;
+                        break;
+                    }
+                }
+                priorityList.add(index, entry);
+                numOfRanges++;
+
+                String equivalent = getEquivalentForRegionAndVariant(tag);
+                if (equivalent != null) {
+                    LanguageRange equivRange = new LanguageRange(equivalent, weight);
+                    if (!priorityList.contains(equivRange)) {
+                        priorityList.add(index + 1, equivRange);
+                        numOfRanges++;
+                    }
+                }
+
+                List<String> equivalents = getEquivalentsForLanguage(tag);
+                if (equivalents != null) {
+                    for (String equiv : equivalents) {
+                        LanguageRange equivRange = new LanguageRange(equiv, weight);
+                        if (!priorityList.contains(equivRange)) {
+                            priorityList.add(index + 1, equivRange);
+                            numOfRanges++;
+                        }
+
+                        equivalent = getEquivalentForRegionAndVariant(equiv);
+                        if (equivalent != null) {
+                            equivRange = new LanguageRange(equivalent, weight);
+                            if (!priorityList.contains(equivRange)) {
+                                priorityList.add(index + 1, equivRange);
+                                numOfRanges++;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        return priorityList;
+    }
+
+    /**
+     * A faster alternative approach to String.replaceFirst(), if the given
+     * string is a literal String, not a regex.
+     */
+    private static String replaceFirstSubStringMatch(String range,
+            String substr, String replacement) {
+        int pos = range.indexOf(substr);
+        if (pos == -1) {
+            return range;
+        } else {
+            return range.substring(0, pos) + replacement
+                    + range.substring(pos + substr.length());
+        }
+    }
+
+    private static List<String> getEquivalentsForLanguage(String range) {
+        String r = range;
+
+        while (r.length() > 0) {
+            if (singleLangEquivMap.containsKey(r)) {
+                String equiv = singleLangEquivMap.get(r);
+                // Return immediately for performance if the first matching
+                // subtag is found.
+                return List.of(replaceFirstSubStringMatch(range, r, equiv));
+            } else if (multiLangEquivsMap.containsKey(r)) {
+                List<String> equivs = multiLangEquivsMap.get(r);
+                List<String> result = new ArrayList(equivs.size());
+                for (int i = 0; i < equivs.size(); i++) {
+                    result.add(i, replaceFirstSubStringMatch(range,
+                            r, equivs.get(i)));
+                }
+                return result;
+            }
+
+            // Truncate the last subtag simply.
+            int index = r.lastIndexOf(HYPHEN);
+            if (index == -1) {
+                break;
+            }
+            r = r.substring(0, index);
+        }
+
+        return null;
+    }
+
+    private static String getEquivalentForRegionAndVariant(String range) {
+        int extensionKeyIndex = getExtentionKeyIndex(range);
+
+        for (String subtag : regionVariantEquivMap.keySet()) {
+            int index;
+            if ((index = range.indexOf(subtag)) != -1) {
+                // Check if the matching text is a valid region or variant.
+                if (extensionKeyIndex != Integer.MIN_VALUE
+                        && index > extensionKeyIndex) {
+                    continue;
+                }
+
+                int len = index + subtag.length();
+                if (range.length() == len || range.charAt(len) == HYPHEN) {
+                    return replaceFirstSubStringMatch(range, subtag,
+                            regionVariantEquivMap.get(subtag));
+                }
+            }
+        }
+
+        return null;
+    }
+
+    private static int getExtentionKeyIndex(String s) {
+        char[] c = s.toCharArray();
+        int index = Integer.MIN_VALUE;
+        for (int i = 1; i < c.length; i++) {
+            if (c[i] == HYPHEN) {
+                if (i - index == 2) {
+                    return index;
+                } else {
+                    index = i;
+                }
+            }
+        }
+        return Integer.MIN_VALUE;
+    }
+
+    private static void checkEquality(List<LanguageRange> expected,
+            List<LanguageRange> actual) {
+
+        int expectedSize = expected.size();
+        int actualSize = actual.size();
+
+        if (expectedSize != actualSize) {
+            throw new RuntimeException("[FAILED: Size of the priority list"
+                    + " does not match, Expected size=" + expectedSize + "]");
+        } else {
+            for (int i = 0; i < expectedSize; i++) {
+                LanguageRange lr1 = expected.get(i);
+                LanguageRange lr2 = actual.get(i);
+
+                if (!lr1.getRange().equals(lr2.getRange())
+                        || lr1.getWeight() != lr2.getWeight()) {
+                    throw new RuntimeException("[FAILED: Ranges at index "
+                            + i + " do not match Expected: range=" + lr1.getRange()
+                            + ", weight=" + lr1.getWeight() + ", Actual: range="
+                            + lr2.getRange() + ", weight=" + lr2.getWeight() + "]");
+                }
+            }
+        }
+    }
+}