8032842: Locale.filterTags()/lookupTag() methods return lowercased language tags
authornishjain
Fri, 07 Jul 2017 12:19:00 +0530
changeset 45838 c6223c3b4594
parent 45837 bc6fdf743662
child 45839 6df5e24443fc
8032842: Locale.filterTags()/lookupTag() methods return lowercased language tags 8175539: Duplicate matching tags returned by Locale.filterTags() for LanguageRange("*") Reviewed-by: naoto Contributed-by: nishit.jain@oracle.com
jdk/src/java.base/share/classes/java/util/Locale.java
jdk/src/java.base/share/classes/sun/util/locale/LocaleMatcher.java
jdk/test/java/util/Locale/Bug7069824.java
jdk/test/java/util/Locale/Bug8032842.java
--- a/jdk/src/java.base/share/classes/java/util/Locale.java	Thu Jul 06 22:40:35 2017 -0700
+++ b/jdk/src/java.base/share/classes/java/util/Locale.java	Fri Jul 07 12:19:00 2017 +0530
@@ -3257,6 +3257,9 @@
      * Returns a list of matching {@code Locale} instances using the filtering
      * mechanism defined in RFC 4647.
      *
+     * This filter operation on the given {@code locales} ensures that only
+     * unique matching locale(s) are returned.
+     *
      * @param priorityList user's Language Priority List in which each language
      *     tag is sorted in descending order based on priority or weight
      * @param locales {@code Locale} instances used for matching
@@ -3284,6 +3287,9 @@
      * {@link #filter(List, Collection, FilteringMode)} when {@code mode} is
      * {@link FilteringMode#AUTOSELECT_FILTERING}.
      *
+     * This filter operation on the given {@code locales} ensures that only
+     * unique matching locale(s) are returned.
+     *
      * @param priorityList user's Language Priority List in which each language
      *     tag is sorted in descending order based on priority or weight
      * @param locales {@code Locale} instances used for matching
@@ -3304,6 +3310,17 @@
      * Returns a list of matching languages tags using the basic filtering
      * mechanism defined in RFC 4647.
      *
+     * This filter operation on the given {@code tags} ensures that only
+     * unique matching tag(s) are returned with preserved case. In case of
+     * duplicate matching tags with the case difference, the first matching
+     * tag with preserved case is returned.
+     * For example, "de-ch" is returned out of the duplicate matching tags
+     * "de-ch" and "de-CH", if "de-ch" is checked first for matching in the
+     * given {@code tags}. Note that if the given {@code tags} is an unordered
+     * {@code Collection}, the returned matching tag out of duplicate tags is
+     * subject to change, depending on the implementation of the
+     * {@code Collection}.
+     *
      * @param priorityList user's Language Priority List in which each language
      *     tag is sorted in descending order based on priority or weight
      * @param tags language tags
@@ -3331,6 +3348,17 @@
      * {@link #filterTags(List, Collection, FilteringMode)} when {@code mode}
      * is {@link FilteringMode#AUTOSELECT_FILTERING}.
      *
+     * This filter operation on the given {@code tags} ensures that only
+     * unique matching tag(s) are returned with preserved case. In case of
+     * duplicate matching tags with the case difference, the first matching
+     * tag with preserved case is returned.
+     * For example, "de-ch" is returned out of the duplicate matching tags
+     * "de-ch" and "de-CH", if "de-ch" is checked first for matching in the
+     * given {@code tags}. Note that if the given {@code tags} is an unordered
+     * {@code Collection}, the returned matching tag out of duplicate tags is
+     * subject to change, depending on the implementation of the
+     * {@code Collection}.
+     *
      * @param priorityList user's Language Priority List in which each language
      *     tag is sorted in descending order based on priority or weight
      * @param tags language tags
@@ -3370,6 +3398,9 @@
      * Returns the best-matching language tag using the lookup mechanism
      * defined in RFC 4647.
      *
+     * This lookup operation on the given {@code tags} ensures that the
+     * first matching tag with preserved case is returned.
+     *
      * @param priorityList user's Language Priority List in which each language
      *     tag is sorted in descending order based on priority or weight
      * @param tags language tangs used for matching
--- a/jdk/src/java.base/share/classes/sun/util/locale/LocaleMatcher.java	Thu Jul 06 22:40:35 2017 -0700
+++ b/jdk/src/java.base/share/classes/sun/util/locale/LocaleMatcher.java	Fri Jul 07 12:19:00 2017 +0530
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,6 +34,9 @@
 import static java.util.Locale.FilteringMode.*;
 import static java.util.Locale.LanguageRange.*;
 import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
 
 /**
  * Implementation for BCP47 Locale matching
@@ -126,12 +129,16 @@
                 return new ArrayList<String>(tags);
             } else {
                 for (String tag : tags) {
-                    tag = tag.toLowerCase(Locale.ROOT);
-                    if (tag.startsWith(range)) {
+                    // change to lowercase for case-insensitive matching
+                    String lowerCaseTag = tag.toLowerCase(Locale.ROOT);
+                    if (lowerCaseTag.startsWith(range)) {
                         int len = range.length();
-                        if ((tag.length() == len || tag.charAt(len) == '-')
-                            && !list.contains(tag)
-                            && !shouldIgnoreFilterBasicMatch(zeroRanges, tag)) {
+                        if ((lowerCaseTag.length() == len
+                                || lowerCaseTag.charAt(len) == '-')
+                            && !caseInsensitiveMatch(list, lowerCaseTag)
+                            && !shouldIgnoreFilterBasicMatch(zeroRanges,
+                                    lowerCaseTag)) {
+                            // preserving the case of the input tag
                             list.add(tag);
                         }
                     }
@@ -152,14 +159,17 @@
     private static Collection<String> removeTagsMatchingBasicZeroRange(
             List<LanguageRange> zeroRange, Collection<String> tags) {
         if (zeroRange.isEmpty()) {
+            tags = removeDuplicates(tags);
             return tags;
         }
 
         List<String> matchingTags = new ArrayList<>();
         for (String tag : tags) {
-            tag = tag.toLowerCase(Locale.ROOT);
-            if (!shouldIgnoreFilterBasicMatch(zeroRange, tag)) {
-                matchingTags.add(tag);
+            // change to lowercase for case-insensitive matching
+            String lowerCaseTag = tag.toLowerCase(Locale.ROOT);
+            if (!shouldIgnoreFilterBasicMatch(zeroRange, lowerCaseTag)
+                    && !caseInsensitiveMatch(matchingTags, lowerCaseTag)) {
+                matchingTags.add(tag); // preserving the case of the input tag
             }
         }
 
@@ -167,6 +177,26 @@
     }
 
     /**
+     * Remove duplicate tags from the given {@code tags} by
+     * ignoring case considerations.
+     */
+    private static Collection<String> removeDuplicates(
+            Collection<String> tags) {
+        Set<String> distinctTags = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
+        return tags.stream().filter(x -> distinctTags.add(x))
+                .collect(Collectors.toList());
+    }
+
+    /**
+     * Returns true if the given {@code list} contains an element which matches
+     * with the given {@code tag} ignoring case considerations.
+     */
+    private static boolean caseInsensitiveMatch(List<String> list, String tag) {
+        return list.stream().anyMatch((element)
+                -> (element.equalsIgnoreCase(tag)));
+    }
+
+    /**
      * The tag which is falling in the basic exclusion range(s) should not
      * be considered as the matching tag. Ignores the tag matching with the
      * non-zero ranges, if the tag also matches with one of the basic exclusion
@@ -216,8 +246,9 @@
             }
             String[] rangeSubtags = range.split("-");
             for (String tag : tags) {
-                tag = tag.toLowerCase(Locale.ROOT);
-                String[] tagSubtags = tag.split("-");
+                // change to lowercase for case-insensitive matching
+                String lowerCaseTag = tag.toLowerCase(Locale.ROOT);
+                String[] tagSubtags = lowerCaseTag.split("-");
                 if (!rangeSubtags[0].equals(tagSubtags[0])
                     && !rangeSubtags[0].equals("*")) {
                     continue;
@@ -225,9 +256,11 @@
 
                 int rangeIndex = matchFilterExtendedSubtags(rangeSubtags,
                         tagSubtags);
-                if (rangeSubtags.length == rangeIndex && !list.contains(tag)
-                        && !shouldIgnoreFilterExtendedMatch(zeroRanges, tag)) {
-                    list.add(tag);
+                if (rangeSubtags.length == rangeIndex
+                        && !caseInsensitiveMatch(list, lowerCaseTag)
+                        && !shouldIgnoreFilterExtendedMatch(zeroRanges,
+                                lowerCaseTag)) {
+                    list.add(tag); // preserve the case of the input tag
                 }
             }
         }
@@ -245,14 +278,17 @@
     private static Collection<String> removeTagsMatchingExtendedZeroRange(
             List<LanguageRange> zeroRange, Collection<String> tags) {
         if (zeroRange.isEmpty()) {
+            tags = removeDuplicates(tags);
             return tags;
         }
 
         List<String> matchingTags = new ArrayList<>();
         for (String tag : tags) {
-            tag = tag.toLowerCase(Locale.ROOT);
-            if (!shouldIgnoreFilterExtendedMatch(zeroRange, tag)) {
-                matchingTags.add(tag);
+            // change to lowercase for case-insensitive matching
+            String lowerCaseTag = tag.toLowerCase(Locale.ROOT);
+            if (!shouldIgnoreFilterExtendedMatch(zeroRange, lowerCaseTag)
+                    && !caseInsensitiveMatch(matchingTags, lowerCaseTag)) {
+                matchingTags.add(tag); // preserve the case of the input tag
             }
         }
 
@@ -368,10 +404,11 @@
             String rangeForRegex = range.replace("*", "\\p{Alnum}*");
             while (rangeForRegex.length() > 0) {
                 for (String tag : tags) {
-                    tag = tag.toLowerCase(Locale.ROOT);
-                    if (tag.matches(rangeForRegex)
-                            && !shouldIgnoreLookupMatch(zeroRanges, tag)) {
-                        return tag;
+                    // change to lowercase for case-insensitive matching
+                    String lowerCaseTag = tag.toLowerCase(Locale.ROOT);
+                    if (lowerCaseTag.matches(rangeForRegex)
+                            && !shouldIgnoreLookupMatch(zeroRanges, lowerCaseTag)) {
+                        return tag; // preserve the case of the input tag
                     }
                 }
 
--- a/jdk/test/java/util/Locale/Bug7069824.java	Thu Jul 06 22:40:35 2017 -0700
+++ b/jdk/test/java/util/Locale/Bug7069824.java	Fri Jul 07 12:19:00 2017 +0530
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,7 +23,7 @@
 
 /*
  * @test
- * @bug 7069824 8042360
+ * @bug 7069824 8042360 8032842 8175539
  * @summary Verify implementation for Locale matching.
  * @run main Bug7069824
  */
@@ -747,7 +747,7 @@
         priorityList = LanguageRange.parse(ranges);
         tagList = generateLanguageTags(tags);
         actualTags = showLanguageTags(Locale.filterTags(priorityList, tagList));
-        expectedTags = "ja-jp-hepburn, en";
+        expectedTags = "ja-JP-hepburn, en";
 
         if (!expectedTags.equals(actualTags)) {
             error = true;
@@ -763,7 +763,7 @@
         priorityList = LanguageRange.parse(ranges);
         tagList = generateLanguageTags(tags);
         actualTags = showLanguageTags(Locale.filterTags(priorityList, tagList, mode));
-        expectedTags = "de-de, de-de-x-goethe";
+        expectedTags = "de-DE, de-DE-x-goethe";
 
         if (!expectedTags.equals(actualTags)) {
             error = true;
@@ -779,8 +779,8 @@
         priorityList = LanguageRange.parse(ranges);
         tagList = generateLanguageTags(tags);
         actualTags = showLanguageTags(Locale.filterTags(priorityList, tagList, mode));
-        expectedTags = "de-de, de-latn-de, de-latf-de, de-de-x-goethe, "
-                       + "de-latn-de-1996, de-deva-de";
+        expectedTags = "de-DE, de-Latn-DE, de-Latf-DE, de-DE-x-goethe, "
+                       + "de-Latn-DE-1996, de-Deva-DE";
 
         if (!expectedTags.equals(actualTags)) {
             error = true;
@@ -796,8 +796,8 @@
         priorityList = LanguageRange.parse(ranges);
         tagList = generateLanguageTags(tags);
         actualTags = showLanguageTags(Locale.filterTags(priorityList, tagList, mode));
-        expectedTags = "de-de, de-latn-de, de-latf-de, de-de-x-goethe, "
-                       + "de-latn-de-1996, de-deva-de";
+        expectedTags = "de-DE, de-Latn-DE, de-Latf-DE, de-DE-x-goethe, "
+                       + "de-Latn-DE-1996, de-Deva-DE";
 
         if (!expectedTags.equals(actualTags)) {
             error = true;
@@ -884,7 +884,7 @@
         priorityList = LanguageRange.parse(ranges);
         tagList = generateLanguageTags(tags);
         actualTag = Locale.lookupTag(priorityList, tagList);
-        expectedTag = "fr-jp";
+        expectedTag = "fr-JP";
 
         if (!expectedTag.equals(actualTag)) {
             error = true;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/util/Locale/Bug8032842.java	Fri Jul 07 12:19:00 2017 +0530
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ * @test
+ * @bug 8032842 8175539
+ * @summary Checks that the filterTags() and lookup() methods
+ *          preserve the case of matching language tag(s).
+ *          Before 8032842 fix these methods return the matching
+ *          language tag(s) in lowercase.
+ *          Also, checks the filterTags() to return only unique
+ *          (ignoring case considerations) matching tags.
+ *
+ */
+
+import java.util.List;
+import java.util.Locale;
+import java.util.Locale.FilteringMode;
+import java.util.Locale.LanguageRange;
+
+public class Bug8032842 {
+
+    public static void main(String[] args) {
+
+        // test filterBasic() for preserving the case of matching tags for
+        // the language range '*', with no duplicates in the matching tags
+        testFilter("*", List.of("de-CH", "hi-in", "En-GB", "ja-Latn-JP",
+                "JA-JP", "en-GB"),
+                List.of("de-CH", "hi-in", "En-GB", "ja-Latn-JP", "JA-JP"),
+                FilteringMode.AUTOSELECT_FILTERING);
+
+        // test filterBasic() for preserving the case of matching tags for
+        // basic ranges other than *, with no duplicates in the matching tags
+        testFilter("mtm-RU, en-GB", List.of("En-Gb", "mTm-RU", "en-US",
+                "en-latn", "en-GB"),
+                List.of("mTm-RU", "En-Gb"), FilteringMode.AUTOSELECT_FILTERING);
+
+        // test filterExtended() for preserving the case of matching tags for
+        // the language range '*', with no duplicates in the matching tags
+        testFilter("*", List.of("de-CH", "hi-in", "En-GB", "hi-IN",
+                "ja-Latn-JP", "JA-JP"),
+                List.of("de-CH", "hi-in", "En-GB", "ja-Latn-JP", "JA-JP"),
+                FilteringMode.EXTENDED_FILTERING);
+
+        // test filterExtended() for preserving the case of matching tags for
+        // extended ranges other than *, with no duplicates in the matching tags
+        testFilter("*-ch;q=0.5, *-Latn;q=0.4", List.of("fr-CH", "de-Ch",
+                "en-latn", "en-US", "en-Latn"),
+                List.of("fr-CH", "de-Ch", "en-latn"),
+                FilteringMode.EXTENDED_FILTERING);
+
+        // test lookupTag() for preserving the case of matching tag
+        testLookup("*-ch;q=0.5", List.of("en", "fR-cH"), "fR-cH");
+
+    }
+
+    public static void testFilter(String ranges, List<String> tags,
+            List<String> expected, FilteringMode mode) {
+        List<LanguageRange> priorityList = LanguageRange.parse(ranges);
+        List<String> actual = Locale.filterTags(priorityList, tags, mode);
+        if (!actual.equals(expected)) {
+            throw new RuntimeException("[filterTags() failed for the language"
+                    + " range: " + ranges + ", Expected: " + expected
+                    + ", Found: " + actual + "]");
+        }
+    }
+
+    public static void testLookup(String ranges, List<String> tags,
+            String expected) {
+        List<LanguageRange> priorityList = LanguageRange.parse(ranges);
+        String actual = Locale.lookupTag(priorityList, tags);
+        if (!actual.equals(expected)) {
+            throw new RuntimeException("[lookupTag() failed for the language"
+                    + " range: " + ranges + ", Expected: " + expected
+                    + ", Found: " + actual + "]");
+        }
+    }
+
+}
+