# HG changeset patch # User naoto # Date 1400089971 25200 # Node ID a38282cba2fc9b3963074b6234ec6f85ef0515f6 # Parent d20f4a10c235b6f1467b608de2a1c40aeac00395 8041791: String.toLowerCase regression - violates Unicode standard Reviewed-by: peytoia diff -r d20f4a10c235 -r a38282cba2fc jdk/src/share/classes/java/lang/ConditionalSpecialCasing.java --- a/jdk/src/share/classes/java/lang/ConditionalSpecialCasing.java Wed May 14 16:40:53 2014 +0100 +++ b/jdk/src/share/classes/java/lang/ConditionalSpecialCasing.java Wed May 14 10:52:51 2014 -0700 @@ -62,6 +62,7 @@ //# Conditional mappings //# ================================================================================ new Entry(0x03A3, new char[]{0x03C2}, new char[]{0x03A3}, null, FINAL_CASED), // # GREEK CAPITAL LETTER SIGMA + new Entry(0x0130, new char[]{0x0069, 0x0307}, new char[]{0x0130}, null, 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE //# ================================================================================ //# Locale-sensitive mappings @@ -77,8 +78,8 @@ //# ================================================================================ //# Turkish and Azeri -// new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE -// new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE + new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE + new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE new Entry(0x0307, new char[]{}, new char[]{0x0307}, "tr", AFTER_I), // # COMBINING DOT ABOVE new Entry(0x0307, new char[]{}, new char[]{0x0307}, "az", AFTER_I), // # COMBINING DOT ABOVE new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "tr", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I @@ -147,21 +148,25 @@ private static char[] lookUpTable(String src, int index, Locale locale, boolean bLowerCasing) { HashSet set = entryTable.get(new Integer(src.codePointAt(index))); + char[] ret = null; if (set != null) { Iterator iter = set.iterator(); String currentLang = locale.getLanguage(); while (iter.hasNext()) { Entry entry = iter.next(); - String conditionLang= entry.getLanguage(); + String conditionLang = entry.getLanguage(); if (((conditionLang == null) || (conditionLang.equals(currentLang))) && isConditionMet(src, index, locale, entry.getCondition())) { - return (bLowerCasing ? entry.getLowerCase() : entry.getUpperCase()); + ret = bLowerCasing ? entry.getLowerCase() : entry.getUpperCase(); + if (conditionLang != null) { + break; + } } } } - return null; + return ret; } private static boolean isConditionMet(String src, int index, Locale locale, int condition) { diff -r d20f4a10c235 -r a38282cba2fc jdk/src/share/classes/java/lang/String.java --- a/jdk/src/share/classes/java/lang/String.java Wed May 14 16:40:53 2014 +0100 +++ b/jdk/src/share/classes/java/lang/String.java Wed May 14 10:52:51 2014 -0700 @@ -2583,6 +2583,9 @@ if (cp == '\u03A3') { // GREEK CAPITAL LETTER SIGMA return toLowerCaseEx(result, i, locale, false); } + if (cp == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE + return toLowerCaseEx(result, i, locale, true); + } cp = Character.toLowerCase(cp); if (!Character.isBmpCodePoint(cp)) { return toLowerCaseEx(result, i, locale, false); diff -r d20f4a10c235 -r a38282cba2fc jdk/test/java/lang/String/ToLowerCase.java --- a/jdk/test/java/lang/String/ToLowerCase.java Wed May 14 16:40:53 2014 +0100 +++ b/jdk/test/java/lang/String/ToLowerCase.java Wed May 14 10:52:51 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,7 +23,7 @@ /* @test - @bug 4217441 4533872 4900935 8020037 8032012 + @bug 4217441 4533872 4900935 8020037 8032012 8041791 @summary toLowerCase should lower-case Greek Sigma correctly depending on the context (final/non-final). Also it should handle Locale specific (lt, tr, and az) lowercasings and supplementary @@ -72,8 +72,10 @@ // I-dot tests test("\u0130", turkish, "i"); test("\u0130", az, "i"); - test("\u0130", lt, "i"); - test("\u0130", Locale.US, "i"); + test("\u0130", lt, "\u0069\u0307"); + test("\u0130", Locale.US, "\u0069\u0307"); + test("\u0130", Locale.JAPAN, "\u0069\u0307"); + test("\u0130", Locale.ROOT, "\u0069\u0307"); // Remove dot_above in the sequence I + dot_above (Turkish and Azeri) test("I\u0307", turkish, "i"); @@ -111,6 +113,12 @@ if (cp >= Character.MIN_HIGH_SURROGATE && cp <= Character.MAX_HIGH_SURROGATE) { continue; } + if (cp == 0x0130) { + // Although UnicodeData.txt has the lower case char as \u0069, it should be + // handled with the rules in SpecialCasing.txt, i.e., \u0069\u0307 in + // non Turkic locales. + continue; + } int lowerCase = Character.toLowerCase(cp); if (lowerCase == -1) { //Character.ERROR continue;