8035133: Locale matching: Weight q=0 isn't handled correctly.
Reviewed-by: okutsu, peytoia
--- a/jdk/src/java.base/share/classes/sun/util/locale/LocaleMatcher.java Tue Aug 02 10:58:16 2016 -0700
+++ b/jdk/src/java.base/share/classes/sun/util/locale/LocaleMatcher.java Wed Aug 03 09:37:57 2016 +0900
@@ -28,16 +28,12 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Locale.*;
import static java.util.Locale.FilteringMode.*;
import static java.util.Locale.LanguageRange.*;
import java.util.Map;
-import java.util.Set;
/**
* Implementation for BCP47 Locale matching
@@ -111,10 +107,22 @@
private static List<String> filterBasic(List<LanguageRange> priorityList,
Collection<String> tags) {
+ int splitIndex = splitRanges(priorityList);
+ List<LanguageRange> nonZeroRanges;
+ List<LanguageRange> zeroRanges;
+ if (splitIndex != -1) {
+ nonZeroRanges = priorityList.subList(0, splitIndex);
+ zeroRanges = priorityList.subList(splitIndex, priorityList.size());
+ } else {
+ nonZeroRanges = priorityList;
+ zeroRanges = List.of();
+ }
+
List<String> list = new ArrayList<>();
- for (LanguageRange lr : priorityList) {
+ for (LanguageRange lr : nonZeroRanges) {
String range = lr.getRange();
if (range.equals("*")) {
+ tags = removeTagsMatchingBasicZeroRange(zeroRanges, tags);
return new ArrayList<String>(tags);
} else {
for (String tag : tags) {
@@ -122,7 +130,8 @@
if (tag.startsWith(range)) {
int len = range.length();
if ((tag.length() == len || tag.charAt(len) == '-')
- && !list.contains(tag)) {
+ && !list.contains(tag)
+ && !shouldIgnoreFilterBasicMatch(zeroRanges, tag)) {
list.add(tag);
}
}
@@ -133,12 +142,76 @@
return list;
}
+ /**
+ * Removes the tag(s) which are falling in the basic exclusion range(s) i.e
+ * range(s) with q=0 and returns the updated collection. If the basic
+ * language ranges contains '*' as one of its non zero range then instead of
+ * returning all the tags, remove those which are matching the range with
+ * quality weight q=0.
+ */
+ private static Collection<String> removeTagsMatchingBasicZeroRange(
+ List<LanguageRange> zeroRange, Collection<String> tags) {
+ if (zeroRange.isEmpty()) {
+ return tags;
+ }
+
+ List<String> matchingTags = new ArrayList<>();
+ for (String tag : tags) {
+ tag = tag.toLowerCase(Locale.ROOT);
+ if (!shouldIgnoreFilterBasicMatch(zeroRange, tag)) {
+ matchingTags.add(tag);
+ }
+ }
+
+ return matchingTags;
+ }
+
+ /**
+ * The tag which is falling in the basic exclusion range(s) should not
+ * be considered as the matching tag. Ignores the tag matching with the
+ * non-zero ranges, if the tag also matches with one of the basic exclusion
+ * ranges i.e. range(s) having quality weight q=0
+ */
+ private static boolean shouldIgnoreFilterBasicMatch(
+ List<LanguageRange> zeroRange, String tag) {
+ if (zeroRange.isEmpty()) {
+ return false;
+ }
+
+ for (LanguageRange lr : zeroRange) {
+ String range = lr.getRange();
+ if (range.equals("*")) {
+ return true;
+ }
+ if (tag.startsWith(range)) {
+ int len = range.length();
+ if ((tag.length() == len || tag.charAt(len) == '-')) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
private static List<String> filterExtended(List<LanguageRange> priorityList,
Collection<String> tags) {
+ int splitIndex = splitRanges(priorityList);
+ List<LanguageRange> nonZeroRanges;
+ List<LanguageRange> zeroRanges;
+ if (splitIndex != -1) {
+ nonZeroRanges = priorityList.subList(0, splitIndex);
+ zeroRanges = priorityList.subList(splitIndex, priorityList.size());
+ } else {
+ nonZeroRanges = priorityList;
+ zeroRanges = List.of();
+ }
+
List<String> list = new ArrayList<>();
- for (LanguageRange lr : priorityList) {
+ for (LanguageRange lr : nonZeroRanges) {
String range = lr.getRange();
if (range.equals("*")) {
+ tags = removeTagsMatchingExtendedZeroRange(zeroRanges, tags);
return new ArrayList<String>(tags);
}
String[] rangeSubtags = range.split("-");
@@ -150,33 +223,101 @@
continue;
}
- int rangeIndex = 1;
- int tagIndex = 1;
-
- while (rangeIndex < rangeSubtags.length
- && tagIndex < tagSubtags.length) {
- if (rangeSubtags[rangeIndex].equals("*")) {
- rangeIndex++;
- } else if (rangeSubtags[rangeIndex].equals(tagSubtags[tagIndex])) {
- rangeIndex++;
- tagIndex++;
- } else if (tagSubtags[tagIndex].length() == 1
- && !tagSubtags[tagIndex].equals("*")) {
- break;
- } else {
- tagIndex++;
- }
- }
-
- if (rangeSubtags.length == rangeIndex && !list.contains(tag)) {
- list.add(tag);
- }
+ int rangeIndex = matchFilterExtendedSubtags(rangeSubtags,
+ tagSubtags);
+ if (rangeSubtags.length == rangeIndex && !list.contains(tag)
+ && !shouldIgnoreFilterExtendedMatch(zeroRanges, tag)) {
+ list.add(tag);
+ }
}
}
return list;
}
+ /**
+ * Removes the tag(s) which are falling in the extended exclusion range(s)
+ * i.e range(s) with q=0 and returns the updated collection. If the extended
+ * language ranges contains '*' as one of its non zero range then instead of
+ * returning all the tags, remove those which are matching the range with
+ * quality weight q=0.
+ */
+ private static Collection<String> removeTagsMatchingExtendedZeroRange(
+ List<LanguageRange> zeroRange, Collection<String> tags) {
+ if (zeroRange.isEmpty()) {
+ return tags;
+ }
+
+ List<String> matchingTags = new ArrayList<>();
+ for (String tag : tags) {
+ tag = tag.toLowerCase(Locale.ROOT);
+ if (!shouldIgnoreFilterExtendedMatch(zeroRange, tag)) {
+ matchingTags.add(tag);
+ }
+ }
+
+ return matchingTags;
+ }
+
+ /**
+ * The tag which is falling in the extended exclusion range(s) should
+ * not be considered as the matching tag. Ignores the tag matching with the
+ * non zero range(s), if the tag also matches with one of the extended
+ * exclusion range(s) i.e. range(s) having quality weight q=0
+ */
+ private static boolean shouldIgnoreFilterExtendedMatch(
+ List<LanguageRange> zeroRange, String tag) {
+ if (zeroRange.isEmpty()) {
+ return false;
+ }
+
+ String[] tagSubtags = tag.split("-");
+ for (LanguageRange lr : zeroRange) {
+ String range = lr.getRange();
+ if (range.equals("*")) {
+ return true;
+ }
+
+ String[] rangeSubtags = range.split("-");
+
+ if (!rangeSubtags[0].equals(tagSubtags[0])
+ && !rangeSubtags[0].equals("*")) {
+ continue;
+ }
+
+ int rangeIndex = matchFilterExtendedSubtags(rangeSubtags,
+ tagSubtags);
+ if (rangeSubtags.length == rangeIndex) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ private static int matchFilterExtendedSubtags(String[] rangeSubtags,
+ String[] tagSubtags) {
+ int rangeIndex = 1;
+ int tagIndex = 1;
+
+ while (rangeIndex < rangeSubtags.length
+ && tagIndex < tagSubtags.length) {
+ if (rangeSubtags[rangeIndex].equals("*")) {
+ rangeIndex++;
+ } else if (rangeSubtags[rangeIndex]
+ .equals(tagSubtags[tagIndex])) {
+ rangeIndex++;
+ tagIndex++;
+ } else if (tagSubtags[tagIndex].length() == 1
+ && !tagSubtags[tagIndex].equals("*")) {
+ break;
+ } else {
+ tagIndex++;
+ }
+ }
+ return rangeIndex;
+ }
+
public static Locale lookup(List<LanguageRange> priorityList,
Collection<Locale> locales) {
if (priorityList.isEmpty() || locales.isEmpty()) {
@@ -205,7 +346,18 @@
return null;
}
- for (LanguageRange lr : priorityList) {
+ int splitIndex = splitRanges(priorityList);
+ List<LanguageRange> nonZeroRanges;
+ List<LanguageRange> zeroRanges;
+ if (splitIndex != -1) {
+ nonZeroRanges = priorityList.subList(0, splitIndex);
+ zeroRanges = priorityList.subList(splitIndex, priorityList.size());
+ } else {
+ nonZeroRanges = priorityList;
+ zeroRanges = List.of();
+ }
+
+ for (LanguageRange lr : nonZeroRanges) {
String range = lr.getRange();
// Special language range ("*") is ignored in lookup.
@@ -217,31 +369,83 @@
while (rangeForRegex.length() > 0) {
for (String tag : tags) {
tag = tag.toLowerCase(Locale.ROOT);
- if (tag.matches(rangeForRegex)) {
+ if (tag.matches(rangeForRegex)
+ && !shouldIgnoreLookupMatch(zeroRanges, tag)) {
return tag;
}
}
// Truncate from the end....
- int index = rangeForRegex.lastIndexOf('-');
- if (index >= 0) {
- rangeForRegex = rangeForRegex.substring(0, index);
-
- // if range ends with an extension key, truncate it.
- index = rangeForRegex.lastIndexOf('-');
- if (index >= 0 && index == rangeForRegex.length()-2) {
- rangeForRegex =
- rangeForRegex.substring(0, rangeForRegex.length()-2);
- }
- } else {
- rangeForRegex = "";
- }
+ rangeForRegex = truncateRange(rangeForRegex);
}
}
return null;
}
+ /**
+ * The tag which is falling in the exclusion range(s) should not be
+ * considered as the matching tag. Ignores the tag matching with the
+ * non zero range(s), if the tag also matches with one of the exclusion
+ * range(s) i.e. range(s) having quality weight q=0.
+ */
+ private static boolean shouldIgnoreLookupMatch(List<LanguageRange> zeroRange,
+ String tag) {
+ for (LanguageRange lr : zeroRange) {
+ String range = lr.getRange();
+
+ // Special language range ("*") is ignored in lookup.
+ if (range.equals("*")) {
+ continue;
+ }
+
+ String rangeForRegex = range.replaceAll("\\x2A", "\\\\p{Alnum}*");
+ while (rangeForRegex.length() > 0) {
+ if (tag.matches(rangeForRegex)) {
+ return true;
+ }
+ // Truncate from the end....
+ rangeForRegex = truncateRange(rangeForRegex);
+ }
+ }
+
+ return false;
+ }
+
+ /* Truncate the range from end during the lookup match */
+ private static String truncateRange(String rangeForRegex) {
+ int index = rangeForRegex.lastIndexOf('-');
+ if (index >= 0) {
+ rangeForRegex = rangeForRegex.substring(0, index);
+
+ // if range ends with an extension key, truncate it.
+ index = rangeForRegex.lastIndexOf('-');
+ if (index >= 0 && index == rangeForRegex.length() - 2) {
+ rangeForRegex
+ = rangeForRegex.substring(0, rangeForRegex.length() - 2);
+ }
+ } else {
+ rangeForRegex = "";
+ }
+
+ return rangeForRegex;
+ }
+
+ /* Returns the split index of the priority list, if it contains
+ * language range(s) with quality weight as 0 i.e. q=0, else -1
+ */
+ private static int splitRanges(List<LanguageRange> priorityList) {
+ int size = priorityList.size();
+ for (int index = 0; index < size; index++) {
+ LanguageRange range = priorityList.get(index);
+ if (range.getWeight() == 0) {
+ return index;
+ }
+ }
+
+ return -1; // no q=0 range exists
+ }
+
public static List<LanguageRange> parse(String ranges) {
ranges = ranges.replaceAll(" ", "").toLowerCase(Locale.ROOT);
if (ranges.startsWith("accept-language:")) {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/util/Locale/Bug8035133.java Wed Aug 03 09:37:57 2016 +0900
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ * @test
+ * @bug 8035133
+ * @summary Checks that the tags matching the range with quality weight q=0
+ * e.g. en;q=0 must be elimited and must not be the part of output
+ */
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+
+
+public class Bug8035133 {
+
+ private static boolean err = false;
+
+ public static void main(String[] args) {
+
+ // checking Locale.lookup with de-ch;q=0
+ checkLookup("en;q=0.1, *-ch;q=0.5, de-ch;q=0",
+ "de-ch, en, fr-ch", "fr-CH");
+
+ /* checking Locale.lookup with *;q=0 '*' should be ignored
+ * in lookup
+ */
+ checkLookup("en;q=0.1, *-ch;q=0.5, *;q=0",
+ "de-ch, en, fr-ch", "de-CH");
+
+ // checking Locale.filter with fr-ch;q=0 in BASIC_FILTERING
+ checkFilter("en;q=0.1, fr-ch;q=0.0, de-ch;q=0.5",
+ "de-ch, en, fr-ch", "de-CH, en");
+
+ // checking Locale.filter with *;q=0 in BASIC_FILTERING
+ checkFilter("de-ch;q=0.6, *;q=0", "de-ch, fr-ch", "");
+
+ // checking Locale.filter with *;q=0 in BASIC_FILTERING
+ checkFilter("de-ch;q=0.6, de;q=0", "de-ch", "");
+
+ // checking Locale.filter with *;q=0.6, en;q=0 in BASIC_FILTERING
+ checkFilter("*;q=0.6, en;q=0", "de-ch, hi-in, en", "de-CH, hi-IN");
+
+ // checking Locale.filter with de-ch;q=0 in EXTENDED_FILTERING
+ checkFilter("en;q=0.1, *-ch;q=0.5, de-ch;q=0",
+ "de-ch, en, fr-ch", "fr-CH, en");
+
+ /* checking Locale.filter with *-ch;q=0 in EXTENDED_FILTERING which
+ * must make filter to return "" empty or no match
+ */
+ checkFilter("de-ch;q=0.5, *-ch;q=0", "de-ch, fr-ch", "");
+
+ /* checking Locale.filter with *;q=0 in EXTENDED_FILTERING which
+ * must make filter to return "" empty or no match
+ */
+ checkFilter("*-ch;q=0.5, *;q=0", "de-ch, fr-ch", "");
+
+ /* checking Locale.filter with *;q=0.6, *-Latn;q=0 in
+ * EXTENDED_FILTERING
+ */
+ checkFilter("*;q=0.6, *-Latn;q=0", "de-ch, hi-in, en-Latn",
+ "de-CH, hi-IN");
+
+ if (err) {
+ throw new RuntimeException("[LocaleMatcher method(s) failed]");
+ }
+
+ }
+
+ private static void checkLookup(String ranges, String tags,
+ String expectedLocale) {
+
+ List<Locale.LanguageRange> priorityList = Locale.LanguageRange
+ .parse(ranges);
+ List<Locale> localeList = generateLocales(tags);
+ Locale loc = Locale.lookup(priorityList, localeList);
+ String actualLocale
+ = loc.toLanguageTag();
+
+ if (!actualLocale.equals(expectedLocale)) {
+ System.err.println("Locale.lookup failed with ranges: " + ranges
+ + " Expected: " + expectedLocale
+ + " Actual: " + actualLocale);
+ err = true;
+ }
+
+ }
+
+ private static void checkFilter(String ranges, String tags,
+ String expectedLocales) {
+
+ List<Locale.LanguageRange> priorityList = Locale.LanguageRange
+ .parse(ranges);
+ List<Locale> localeList = generateLocales(tags);
+ String actualLocales = getLocalesAsString(
+ Locale.filter(priorityList, localeList));
+
+ if (!actualLocales.equals(expectedLocales)) {
+ System.err.println("Locale.filter failed with ranges: " + ranges
+ + " Expected: " + expectedLocales
+ + " Actual: " + actualLocales);
+ err = true;
+ }
+
+ }
+
+ private static List<Locale> generateLocales(String tags) {
+ if (tags == null) {
+ return null;
+ }
+
+ List<Locale> localeList = new ArrayList<>();
+ if (tags.equals("")) {
+ return localeList;
+ }
+ String[] t = tags.split(", ");
+ for (String tag : t) {
+ localeList.add(Locale.forLanguageTag(tag));
+ }
+ return localeList;
+ }
+
+ private static String getLocalesAsString(List<Locale> locales) {
+ StringBuilder sb = new StringBuilder();
+
+ Iterator<Locale> itr = locales.iterator();
+ if (itr.hasNext()) {
+ sb.append(itr.next().toLanguageTag());
+ }
+ while (itr.hasNext()) {
+ sb.append(", ");
+ sb.append(itr.next().toLanguageTag());
+ }
+
+ return sb.toString().trim();
+ }
+
+}