8035133: Locale matching: Weight q=0 isn't handled correctly.
authornishjain
Wed, 03 Aug 2016 09:37:57 +0900
changeset 40114 0194b0ba95c6
parent 40113 7ba318a6b751
child 40115 d1847f68fb3f
8035133: Locale matching: Weight q=0 isn't handled correctly. Reviewed-by: okutsu, peytoia
jdk/src/java.base/share/classes/sun/util/locale/LocaleMatcher.java
jdk/test/java/util/Locale/Bug8035133.java
--- a/jdk/src/java.base/share/classes/sun/util/locale/LocaleMatcher.java	Tue Aug 02 10:58:16 2016 -0700
+++ b/jdk/src/java.base/share/classes/sun/util/locale/LocaleMatcher.java	Wed Aug 03 09:37:57 2016 +0900
@@ -28,16 +28,12 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Locale;
 import java.util.Locale.*;
 import static java.util.Locale.FilteringMode.*;
 import static java.util.Locale.LanguageRange.*;
 import java.util.Map;
-import java.util.Set;
 
 /**
  * Implementation for BCP47 Locale matching
@@ -111,10 +107,22 @@
 
     private static List<String> filterBasic(List<LanguageRange> priorityList,
                                             Collection<String> tags) {
+        int splitIndex = splitRanges(priorityList);
+        List<LanguageRange> nonZeroRanges;
+        List<LanguageRange> zeroRanges;
+        if (splitIndex != -1) {
+            nonZeroRanges = priorityList.subList(0, splitIndex);
+            zeroRanges = priorityList.subList(splitIndex, priorityList.size());
+        } else {
+            nonZeroRanges = priorityList;
+            zeroRanges = List.of();
+        }
+
         List<String> list = new ArrayList<>();
-        for (LanguageRange lr : priorityList) {
+        for (LanguageRange lr : nonZeroRanges) {
             String range = lr.getRange();
             if (range.equals("*")) {
+                tags = removeTagsMatchingBasicZeroRange(zeroRanges, tags);
                 return new ArrayList<String>(tags);
             } else {
                 for (String tag : tags) {
@@ -122,7 +130,8 @@
                     if (tag.startsWith(range)) {
                         int len = range.length();
                         if ((tag.length() == len || tag.charAt(len) == '-')
-                            && !list.contains(tag)) {
+                            && !list.contains(tag)
+                            && !shouldIgnoreFilterBasicMatch(zeroRanges, tag)) {
                             list.add(tag);
                         }
                     }
@@ -133,12 +142,76 @@
         return list;
     }
 
+    /**
+     * Removes the tag(s) which are falling in the basic exclusion range(s) i.e
+     * range(s) with q=0 and returns the updated collection. If the basic
+     * language ranges contains '*' as one of its non zero range then instead of
+     * returning all the tags, remove those which are matching the range with
+     * quality weight q=0.
+     */
+    private static Collection<String> removeTagsMatchingBasicZeroRange(
+            List<LanguageRange> zeroRange, Collection<String> tags) {
+        if (zeroRange.isEmpty()) {
+            return tags;
+        }
+
+        List<String> matchingTags = new ArrayList<>();
+        for (String tag : tags) {
+            tag = tag.toLowerCase(Locale.ROOT);
+            if (!shouldIgnoreFilterBasicMatch(zeroRange, tag)) {
+                matchingTags.add(tag);
+            }
+        }
+
+        return matchingTags;
+    }
+
+    /**
+     * The tag which is falling in the basic exclusion range(s) should not
+     * be considered as the matching tag. Ignores the tag matching with the
+     * non-zero ranges, if the tag also matches with one of the basic exclusion
+     * ranges i.e. range(s) having quality weight q=0
+     */
+    private static boolean shouldIgnoreFilterBasicMatch(
+            List<LanguageRange> zeroRange, String tag) {
+        if (zeroRange.isEmpty()) {
+            return false;
+        }
+
+        for (LanguageRange lr : zeroRange) {
+            String range = lr.getRange();
+            if (range.equals("*")) {
+                return true;
+            }
+            if (tag.startsWith(range)) {
+                int len = range.length();
+                if ((tag.length() == len || tag.charAt(len) == '-')) {
+                    return true;
+                }
+            }
+        }
+
+        return false;
+    }
+
     private static List<String> filterExtended(List<LanguageRange> priorityList,
                                                Collection<String> tags) {
+        int splitIndex = splitRanges(priorityList);
+        List<LanguageRange> nonZeroRanges;
+        List<LanguageRange> zeroRanges;
+        if (splitIndex != -1) {
+            nonZeroRanges = priorityList.subList(0, splitIndex);
+            zeroRanges = priorityList.subList(splitIndex, priorityList.size());
+        } else {
+            nonZeroRanges = priorityList;
+            zeroRanges = List.of();
+        }
+
         List<String> list = new ArrayList<>();
-        for (LanguageRange lr : priorityList) {
+        for (LanguageRange lr : nonZeroRanges) {
             String range = lr.getRange();
             if (range.equals("*")) {
+                tags = removeTagsMatchingExtendedZeroRange(zeroRanges, tags);
                 return new ArrayList<String>(tags);
             }
             String[] rangeSubtags = range.split("-");
@@ -150,33 +223,101 @@
                     continue;
                 }
 
-                int rangeIndex = 1;
-                int tagIndex = 1;
-
-                while (rangeIndex < rangeSubtags.length
-                       && tagIndex < tagSubtags.length) {
-                   if (rangeSubtags[rangeIndex].equals("*")) {
-                       rangeIndex++;
-                   } else if (rangeSubtags[rangeIndex].equals(tagSubtags[tagIndex])) {
-                       rangeIndex++;
-                       tagIndex++;
-                   } else if (tagSubtags[tagIndex].length() == 1
-                              && !tagSubtags[tagIndex].equals("*")) {
-                       break;
-                   } else {
-                       tagIndex++;
-                   }
-               }
-
-               if (rangeSubtags.length == rangeIndex && !list.contains(tag)) {
-                   list.add(tag);
-               }
+                int rangeIndex = matchFilterExtendedSubtags(rangeSubtags,
+                        tagSubtags);
+                if (rangeSubtags.length == rangeIndex && !list.contains(tag)
+                        && !shouldIgnoreFilterExtendedMatch(zeroRanges, tag)) {
+                    list.add(tag);
+                }
             }
         }
 
         return list;
     }
 
+    /**
+     * Removes the tag(s) which are falling in the extended exclusion range(s)
+     * i.e range(s) with q=0 and returns the updated collection. If the extended
+     * language ranges contains '*' as one of its non zero range then instead of
+     * returning all the tags, remove those which are matching the range with
+     * quality weight q=0.
+     */
+    private static Collection<String> removeTagsMatchingExtendedZeroRange(
+            List<LanguageRange> zeroRange, Collection<String> tags) {
+        if (zeroRange.isEmpty()) {
+            return tags;
+        }
+
+        List<String> matchingTags = new ArrayList<>();
+        for (String tag : tags) {
+            tag = tag.toLowerCase(Locale.ROOT);
+            if (!shouldIgnoreFilterExtendedMatch(zeroRange, tag)) {
+                matchingTags.add(tag);
+            }
+        }
+
+        return matchingTags;
+    }
+
+    /**
+     * The tag which is falling in the extended exclusion range(s) should
+     * not be considered as the matching tag. Ignores the tag matching with the
+     * non zero range(s), if the tag also matches with one of the extended
+     * exclusion range(s) i.e. range(s) having quality weight q=0
+     */
+    private static boolean shouldIgnoreFilterExtendedMatch(
+            List<LanguageRange> zeroRange, String tag) {
+        if (zeroRange.isEmpty()) {
+            return false;
+        }
+
+        String[] tagSubtags = tag.split("-");
+        for (LanguageRange lr : zeroRange) {
+            String range = lr.getRange();
+            if (range.equals("*")) {
+                return true;
+            }
+
+            String[] rangeSubtags = range.split("-");
+
+            if (!rangeSubtags[0].equals(tagSubtags[0])
+                    && !rangeSubtags[0].equals("*")) {
+                continue;
+            }
+
+            int rangeIndex = matchFilterExtendedSubtags(rangeSubtags,
+                    tagSubtags);
+            if (rangeSubtags.length == rangeIndex) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    private static int matchFilterExtendedSubtags(String[] rangeSubtags,
+            String[] tagSubtags) {
+        int rangeIndex = 1;
+        int tagIndex = 1;
+
+        while (rangeIndex < rangeSubtags.length
+                && tagIndex < tagSubtags.length) {
+            if (rangeSubtags[rangeIndex].equals("*")) {
+                rangeIndex++;
+            } else if (rangeSubtags[rangeIndex]
+                    .equals(tagSubtags[tagIndex])) {
+                rangeIndex++;
+                tagIndex++;
+            } else if (tagSubtags[tagIndex].length() == 1
+                    && !tagSubtags[tagIndex].equals("*")) {
+                break;
+            } else {
+                tagIndex++;
+            }
+        }
+        return rangeIndex;
+    }
+
     public static Locale lookup(List<LanguageRange> priorityList,
                                 Collection<Locale> locales) {
         if (priorityList.isEmpty() || locales.isEmpty()) {
@@ -205,7 +346,18 @@
             return null;
         }
 
-        for (LanguageRange lr : priorityList) {
+        int splitIndex = splitRanges(priorityList);
+        List<LanguageRange> nonZeroRanges;
+        List<LanguageRange> zeroRanges;
+        if (splitIndex != -1) {
+            nonZeroRanges = priorityList.subList(0, splitIndex);
+            zeroRanges = priorityList.subList(splitIndex, priorityList.size());
+        } else {
+            nonZeroRanges = priorityList;
+            zeroRanges = List.of();
+        }
+
+        for (LanguageRange lr : nonZeroRanges) {
             String range = lr.getRange();
 
             // Special language range ("*") is ignored in lookup.
@@ -217,31 +369,83 @@
             while (rangeForRegex.length() > 0) {
                 for (String tag : tags) {
                     tag = tag.toLowerCase(Locale.ROOT);
-                    if (tag.matches(rangeForRegex)) {
+                    if (tag.matches(rangeForRegex)
+                            && !shouldIgnoreLookupMatch(zeroRanges, tag)) {
                         return tag;
                     }
                 }
 
                 // Truncate from the end....
-                int index = rangeForRegex.lastIndexOf('-');
-                if (index >= 0) {
-                    rangeForRegex = rangeForRegex.substring(0, index);
-
-                    // if range ends with an extension key, truncate it.
-                    index = rangeForRegex.lastIndexOf('-');
-                    if (index >= 0 && index == rangeForRegex.length()-2) {
-                        rangeForRegex =
-                            rangeForRegex.substring(0, rangeForRegex.length()-2);
-                    }
-                } else {
-                    rangeForRegex = "";
-                }
+                rangeForRegex = truncateRange(rangeForRegex);
             }
         }
 
         return null;
     }
 
+    /**
+     * The tag which is falling in the exclusion range(s) should not be
+     * considered as the matching tag. Ignores the tag matching with the
+     * non zero range(s), if the tag also matches with one of the exclusion
+     * range(s) i.e. range(s) having quality weight q=0.
+     */
+    private static boolean shouldIgnoreLookupMatch(List<LanguageRange> zeroRange,
+            String tag) {
+        for (LanguageRange lr : zeroRange) {
+            String range = lr.getRange();
+
+            // Special language range ("*") is ignored in lookup.
+            if (range.equals("*")) {
+                continue;
+            }
+
+            String rangeForRegex = range.replaceAll("\\x2A", "\\\\p{Alnum}*");
+            while (rangeForRegex.length() > 0) {
+                if (tag.matches(rangeForRegex)) {
+                    return true;
+                }
+                // Truncate from the end....
+                rangeForRegex = truncateRange(rangeForRegex);
+            }
+        }
+
+        return false;
+    }
+
+    /* Truncate the range from end during the lookup match */
+    private static String truncateRange(String rangeForRegex) {
+        int index = rangeForRegex.lastIndexOf('-');
+        if (index >= 0) {
+            rangeForRegex = rangeForRegex.substring(0, index);
+
+            // if range ends with an extension key, truncate it.
+            index = rangeForRegex.lastIndexOf('-');
+            if (index >= 0 && index == rangeForRegex.length() - 2) {
+                rangeForRegex
+                        = rangeForRegex.substring(0, rangeForRegex.length() - 2);
+            }
+        } else {
+            rangeForRegex = "";
+        }
+
+        return rangeForRegex;
+    }
+
+    /* Returns the split index of the priority list, if it contains
+     * language range(s) with quality weight as 0 i.e. q=0, else -1
+     */
+    private static int splitRanges(List<LanguageRange> priorityList) {
+        int size = priorityList.size();
+        for (int index = 0; index < size; index++) {
+            LanguageRange range = priorityList.get(index);
+            if (range.getWeight() == 0) {
+                return index;
+            }
+        }
+
+        return -1; // no q=0 range exists
+    }
+
     public static List<LanguageRange> parse(String ranges) {
         ranges = ranges.replaceAll(" ", "").toLowerCase(Locale.ROOT);
         if (ranges.startsWith("accept-language:")) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/util/Locale/Bug8035133.java	Wed Aug 03 09:37:57 2016 +0900
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ * @test
+ * @bug 8035133
+ * @summary Checks that the tags matching the range with quality weight q=0
+ *          e.g. en;q=0 must be elimited and must not be the part of output
+ */
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+
+
+public class Bug8035133 {
+
+    private static boolean err = false;
+
+    public static void main(String[] args) {
+
+        // checking Locale.lookup with de-ch;q=0
+        checkLookup("en;q=0.1, *-ch;q=0.5, de-ch;q=0",
+                "de-ch, en, fr-ch", "fr-CH");
+
+        /* checking Locale.lookup with *;q=0 '*' should be ignored
+         * in lookup
+         */
+        checkLookup("en;q=0.1, *-ch;q=0.5, *;q=0",
+                "de-ch, en, fr-ch", "de-CH");
+
+        // checking Locale.filter with fr-ch;q=0 in BASIC_FILTERING
+        checkFilter("en;q=0.1, fr-ch;q=0.0, de-ch;q=0.5",
+                "de-ch, en, fr-ch", "de-CH, en");
+
+        // checking Locale.filter with *;q=0 in BASIC_FILTERING
+        checkFilter("de-ch;q=0.6, *;q=0", "de-ch, fr-ch", "");
+
+        // checking Locale.filter with *;q=0 in BASIC_FILTERING
+        checkFilter("de-ch;q=0.6, de;q=0", "de-ch", "");
+
+        // checking Locale.filter with *;q=0.6, en;q=0 in BASIC_FILTERING
+        checkFilter("*;q=0.6, en;q=0", "de-ch, hi-in, en", "de-CH, hi-IN");
+
+        // checking Locale.filter with de-ch;q=0 in EXTENDED_FILTERING
+        checkFilter("en;q=0.1, *-ch;q=0.5, de-ch;q=0",
+                "de-ch, en, fr-ch", "fr-CH, en");
+
+        /* checking Locale.filter with *-ch;q=0 in EXTENDED_FILTERING which
+         * must make filter to return "" empty or no match
+         */
+        checkFilter("de-ch;q=0.5, *-ch;q=0", "de-ch, fr-ch", "");
+
+        /* checking Locale.filter with *;q=0 in EXTENDED_FILTERING which
+         * must make filter to return "" empty or no match
+         */
+        checkFilter("*-ch;q=0.5, *;q=0", "de-ch, fr-ch", "");
+
+        /* checking Locale.filter with *;q=0.6, *-Latn;q=0 in
+         * EXTENDED_FILTERING
+         */
+        checkFilter("*;q=0.6, *-Latn;q=0", "de-ch, hi-in, en-Latn",
+                "de-CH, hi-IN");
+
+        if (err) {
+            throw new RuntimeException("[LocaleMatcher method(s) failed]");
+        }
+
+    }
+
+    private static void checkLookup(String ranges, String tags,
+            String expectedLocale) {
+
+        List<Locale.LanguageRange> priorityList = Locale.LanguageRange
+                .parse(ranges);
+        List<Locale> localeList = generateLocales(tags);
+        Locale loc = Locale.lookup(priorityList, localeList);
+        String actualLocale
+                = loc.toLanguageTag();
+
+        if (!actualLocale.equals(expectedLocale)) {
+            System.err.println("Locale.lookup failed with ranges: " + ranges
+                    + " Expected: " + expectedLocale
+                    + " Actual: " + actualLocale);
+            err = true;
+        }
+
+    }
+
+    private static void checkFilter(String ranges, String tags,
+            String expectedLocales) {
+
+        List<Locale.LanguageRange> priorityList = Locale.LanguageRange
+                .parse(ranges);
+        List<Locale> localeList = generateLocales(tags);
+        String actualLocales = getLocalesAsString(
+                Locale.filter(priorityList, localeList));
+
+        if (!actualLocales.equals(expectedLocales)) {
+            System.err.println("Locale.filter failed with ranges: " + ranges
+                    + " Expected: " + expectedLocales
+                    + " Actual: " + actualLocales);
+            err = true;
+        }
+
+    }
+
+    private static List<Locale> generateLocales(String tags) {
+        if (tags == null) {
+            return null;
+        }
+
+        List<Locale> localeList = new ArrayList<>();
+        if (tags.equals("")) {
+            return localeList;
+        }
+        String[] t = tags.split(", ");
+        for (String tag : t) {
+            localeList.add(Locale.forLanguageTag(tag));
+        }
+        return localeList;
+    }
+
+    private static String getLocalesAsString(List<Locale> locales) {
+        StringBuilder sb = new StringBuilder();
+
+        Iterator<Locale> itr = locales.iterator();
+        if (itr.hasNext()) {
+            sb.append(itr.next().toLanguageTag());
+        }
+        while (itr.hasNext()) {
+            sb.append(", ");
+            sb.append(itr.next().toLanguageTag());
+        }
+
+        return sb.toString().trim();
+    }
+
+}