# HG changeset patch # User sherman # Date 1324332854 28800 # Node ID 3db172a5433cb5a435c3455e5aa1bcb403082072 # Parent a49be3718db90bca11e3507e48e97ae915165569 6990617: Regular expression doesn't match if unicode character next to a digit. Summary: updated RemoveQEQuotation() to deal with this case correctly Reviewed-by: sherman Contributed-by: stephen.flores@oracle.com diff -r a49be3718db9 -r 3db172a5433c jdk/src/share/classes/java/util/regex/Pattern.java --- a/jdk/src/share/classes/java/util/regex/Pattern.java Sat Dec 17 20:07:14 2011 +0000 +++ b/jdk/src/share/classes/java/util/regex/Pattern.java Mon Dec 19 14:14:14 2011 -0800 @@ -1583,13 +1583,26 @@ return; int j = i; i += 2; - int[] newtemp = new int[j + 2*(pLen-i) + 2]; + int[] newtemp = new int[j + 3*(pLen-i) + 2]; System.arraycopy(temp, 0, newtemp, 0, j); boolean inQuote = true; + boolean beginQuote = true; while (i < pLen) { int c = temp[i++]; - if (! ASCII.isAscii(c) || ASCII.isAlnum(c)) { + if (!ASCII.isAscii(c) || ASCII.isAlpha(c)) { + newtemp[j++] = c; + } else if (ASCII.isDigit(c)) { + if (beginQuote) { + /* + * A unicode escape \[0xu] could be before this quote, + * and we don't want this numeric char to processed as + * part of the escape. + */ + newtemp[j++] = '\\'; + newtemp[j++] = 'x'; + newtemp[j++] = '3'; + } newtemp[j++] = c; } else if (c != '\\') { if (inQuote) newtemp[j++] = '\\'; @@ -1606,12 +1619,16 @@ if (temp[i] == 'Q') { i++; inQuote = true; + beginQuote = true; + continue; } else { newtemp[j++] = c; if (i != pLen) newtemp[j++] = temp[i++]; } } + + beginQuote = false; } patternLength = j; diff -r a49be3718db9 -r 3db172a5433c jdk/test/java/util/regex/RegExTest.java --- a/jdk/test/java/util/regex/RegExTest.java Sat Dec 17 20:07:14 2011 +0000 +++ b/jdk/test/java/util/regex/RegExTest.java Mon Dec 19 14:14:14 2011 -0800 @@ -32,7 +32,7 @@ * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 - * 6350801 6676425 6878475 6919132 6931676 6948903 7014645 7039066 + * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 */ import java.util.regex.*; @@ -50,6 +50,7 @@ private static Random generator = new Random(); private static boolean failure = false; private static int failCount = 0; + private static String firstFailure = null; /** * Main to interpret arguments and run several tests. @@ -133,15 +134,19 @@ hitEndTest(); toMatchResultTest(); surrogatesInClassTest(); + removeQEQuotingTest(); namedGroupCaptureTest(); nonBmpClassComplementTest(); unicodePropertiesTest(); unicodeHexNotationTest(); unicodeClassesTest(); - if (failure) - throw new RuntimeException("Failure in the RE handling."); - else + if (failure) { + throw new + RuntimeException("RegExTest failed, 1st failure: " + + firstFailure); + } else { System.err.println("OKAY: All tests passed."); + } } // Utility functions @@ -215,8 +220,14 @@ String paddedName = paddedNameBuffer.toString(); System.err.println(paddedName + ": " + (failCount==0 ? "Passed":"Failed("+failCount+")")); - if (failCount > 0) + if (failCount > 0) { failure = true; + + if (firstFailure == null) { + firstFailure = testName; + } + } + failCount = 0; } @@ -295,6 +306,22 @@ Matcher matcher = pattern.matcher("\ud834\udd22"); if (!matcher.find()) failCount++; + + report("Surrogate pair in Unicode escape"); + } + + // This is for bug6990617 + // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode + // char encoding is only 2 or 3 digits instead of 4 and the first quoted + // char is an octal digit. + private static void removeQEQuotingTest() throws Exception { + Pattern pattern = + Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); + Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); + if (!matcher.find()) + failCount++; + + report("Remove Q/E Quoting"); } // This is for bug 4988891