8138824: java.lang.String: spec doesn't match impl when ignoring case - equalsIgnoreCase(), regionMatches()
authorbchristi
Tue, 27 Oct 2015 09:20:24 -0700
changeset 33314 777bf87e5050
parent 33313 de27000120de
child 33320 acfcfc603220
8138824: java.lang.String: spec doesn't match impl when ignoring case - equalsIgnoreCase(), regionMatches() Reviewed-by: naoto, rriggs
jdk/src/java.base/share/classes/java/lang/String.java
jdk/test/java/lang/String/EqualsIgnoreCase.java
--- a/jdk/src/java.base/share/classes/java/lang/String.java	Tue Oct 27 10:14:36 2015 +0530
+++ b/jdk/src/java.base/share/classes/java/lang/String.java	Tue Oct 27 09:20:24 2015 -0700
@@ -102,6 +102,10 @@
  * Unicode code points (i.e., characters), in addition to those for
  * dealing with Unicode code units (i.e., {@code char} values).
  *
+ * <p>Unless otherwise noted, methods for comparing Strings do not take locale
+ * into account.  The {@link java.text.Collator} class provides methods for
+ * finer-grain, locale-sensitive String comparison.
+ *
  * @author  Lee Boynton
  * @author  Arthur van Hoff
  * @author  Martin Buchholz
@@ -971,6 +975,9 @@
      * String} object that represents the same sequence of characters as this
      * object.
      *
+     * <p>For finer-grained String comparison, refer to
+     * {@link java.text.Collator}.
+     *
      * @param  anObject
      *         The object to compare this {@code String} against
      *
@@ -1008,6 +1015,9 @@
      * sequence of characters as the specified {@code StringBuffer}. This method
      * synchronizes on the {@code StringBuffer}.
      *
+     * <p>For finer-grained String comparison, refer to
+     * {@link java.text.Collator}.
+     *
      * @param  sb
      *         The {@code StringBuffer} to compare this {@code String} against
      *
@@ -1043,6 +1053,9 @@
      * {@code CharSequence} is a {@code StringBuffer} then the method
      * synchronizes on it.
      *
+     * <p>For finer-grained String comparison, refer to
+     * {@link java.text.Collator}.
+     *
      * @param  cs
      *         The sequence to compare this {@code String} against
      *
@@ -1092,14 +1105,14 @@
      * <ul>
      *   <li> The two characters are the same (as compared by the
      *        {@code ==} operator)
-     *   <li> Applying the method {@link
-     *        java.lang.Character#toUpperCase(char)} to each character
-     *        produces the same result
-     *   <li> Applying the method {@link
-     *        java.lang.Character#toLowerCase(char)} to each character
-     *        produces the same result
+     *   <li> Calling {@code Character.toLowerCase(Character.toUpperCase(char))}
+     *        on each character produces the same result
      * </ul>
      *
+     * <p>Note that this method does <em>not</em> take locale into account, and
+     * will result in unsatisfactory results for certain locales.  The
+     * {@link java.text.Collator} class provides locale-sensitive comparison.
+     *
      * @param  anotherString
      *         The {@code String} to compare this {@code String} against
      *
@@ -1150,6 +1163,9 @@
      * this.length()-anotherString.length()
      * </pre></blockquote>
      *
+     * <p>For finer-grained String comparison, refer to
+     * {@link java.text.Collator}.
+     *
      * @param   anotherString   the {@code String} to be compared.
      * @return  the value {@code 0} if the argument string is equal to
      *          this string; a value less than {@code 0} if this string
@@ -1181,10 +1197,9 @@
      * <p>
      * Note that this Comparator does <em>not</em> take locale into account,
      * and will result in an unsatisfactory ordering for certain locales.
-     * The java.text package provides <em>Collators</em> to allow
-     * locale-sensitive ordering.
+     * The {@link java.text.Collator} class provides locale-sensitive comparison.
      *
-     * @see     java.text.Collator#compare(String, String)
+     * @see     java.text.Collator
      * @since   1.2
      */
     public static final Comparator<String> CASE_INSENSITIVE_ORDER
@@ -1231,14 +1246,13 @@
      * <p>
      * Note that this method does <em>not</em> take locale into account,
      * and will result in an unsatisfactory ordering for certain locales.
-     * The java.text package provides <em>collators</em> to allow
-     * locale-sensitive ordering.
+     * The {@link java.text.Collator} class provides locale-sensitive comparison.
      *
      * @param   str   the {@code String} to be compared.
      * @return  a negative integer, zero, or a positive integer as the
      *          specified String is greater than, equal to, or less
      *          than this String, ignoring case considerations.
-     * @see     java.text.Collator#compare(String, String)
+     * @see     java.text.Collator
      * @since   1.2
      */
     public int compareToIgnoreCase(String str) {
@@ -1268,6 +1282,9 @@
      * <i>k</i>{@code )}
      * </ul>
      *
+     * <p>Note that this method does <em>not</em> take locale into account.  The
+     * {@link java.text.Collator} class provides locale-sensitive comparison.
+     *
      * @param   toffset   the starting offset of the subregion in this string.
      * @param   other     the string argument.
      * @param   ooffset   the starting offset of the subregion in the string
@@ -1323,16 +1340,16 @@
      * <li>{@code ignoreCase} is {@code true} and there is some nonnegative
      * integer <i>k</i> less than {@code len} such that:
      * <blockquote><pre>
-     * Character.toLowerCase(this.charAt(toffset+k)) !=
-     Character.toLowerCase(other.charAt(ooffset+k))
-     * </pre></blockquote>
-     * and:
-     * <blockquote><pre>
-     * Character.toUpperCase(this.charAt(toffset+k)) !=
-     *         Character.toUpperCase(other.charAt(ooffset+k))
+     * Character.toLowerCase(Character.toUpperCase(this.charAt(toffset+k))) !=
+     Character.toLowerCase(Character.toUpperCase(other.charAt(ooffset+k)))
      * </pre></blockquote>
      * </ul>
      *
+     * <p>Note that this method does <em>not</em> take locale into account,
+     * and will result in unsatisfactory results for certain locales when
+     * {@code ignoreCase} is {@code true}.  The {@link java.text.Collator} class
+     * provides locale-sensitive comparison.
+     *
      * @param   ignoreCase   if {@code true}, ignore case when comparing
      *                       characters.
      * @param   toffset      the starting offset of the subregion in this
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/lang/String/EqualsIgnoreCase.java	Tue Oct 27 09:20:24 2015 -0700
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/* @test
+ * @bug 8138824
+ * @summary Test expected equalsIgnoreCase behavior for some known asymmetric case mappings
+ */
+
+public class EqualsIgnoreCase {
+    private static final String SMALL_I = "i";
+    private static final String CAPITAL_I = "I";
+    // Characters that do not map symmetrically between upper/lower case
+    private static final String SMALL_DOTLESS_I = "\u0131";
+    private static final String CAPITAL_I_WITH_DOT = "\u0130";
+    private static final String LOWER_GREEK_THETA = "\u03D1";
+    private static final String CAPITAL_GREEK_THETA = "\u03F4";
+
+    public static void main(String[] args) {
+        compareFuncs(SMALL_I, CAPITAL_I, true, true);
+        compareFuncs(CAPITAL_I_WITH_DOT, SMALL_DOTLESS_I, true, false);
+        compareFuncs(LOWER_GREEK_THETA, CAPITAL_GREEK_THETA, true, false);
+    }
+
+    /**
+     * Compare the actual results of equalsIgnoreCase():
+     *   toUpperCase(toLowerCase(eachChar))
+     * to the behavior described in the equalsIgnoreCase() spec prior to 8138824:
+     *   toUpperCase(eachChar)
+     *   toLowerCase(eachChar)
+     *
+     * @param s1 A string
+     * @param s2 Another string
+     * @param expectEquals Expected result of equalsIgnoreCase()
+     * @param expectTuTl Expected result of toUpperToLowerOriginals()
+     */
+    private static void compareFuncs(String s1, String s2, boolean expectEquals, boolean expectTuTl) {
+        System.out.println(s1 + ", " + s2);
+        boolean equalsResult = s1.equalsIgnoreCase(s2);
+        System.out.println("equalsIgnoreCase:" + equalsResult);
+
+        boolean tuTlResult = toUpperToLowerOriginals(s1, s2);
+        System.out.println("tUtLO:" + tuTlResult);
+        boolean failed = false;
+
+        if (equalsResult != expectEquals) {
+            System.out.println("Expected " + expectEquals + " from equalsIgnoreCase() but got " + equalsResult);
+            failed = true;
+        }
+        if (tuTlResult != expectTuTl) {
+            System.out.println("Expected " + expectTuTl + " from toUpperToLowerOriginals() but got " + tuTlResult);
+            failed = true;
+        }
+        if (failed) { throw new RuntimeException("Test Failed"); }
+    }
+
+    /**
+     * Apply toUpperCase() and toLowerCase() to corresponding chars of both
+     * Strings.  Returns true if each pair of corresponding chars are either:
+     *   1. == after both are converted to upper case
+     * or
+     *   2. == after both are converted to lower case
+     * and the String lengths are equal.
+     */
+    private static boolean toUpperToLowerOriginals(String str1, String str2) {
+        if (str1.length() != str2.length()) { return false; }
+        for (int i = 0; i < str1.length(); i++) {
+            char c1 = str1.charAt(i);
+            char c2 = str2.charAt(i);
+
+            char uc1 = Character.toUpperCase(c1);
+            char uc2 = Character.toUpperCase(c2);
+            boolean upperMatch = uc1 == uc2;
+
+            char lc1 = Character.toLowerCase(c1);
+            char lc2 = Character.toLowerCase(c2);
+            boolean lowerMatch = lc1 == lc2;
+
+            if (!(upperMatch || lowerMatch)) {
+                return false;
+            }
+        }
+        return true;
+    }
+}