6635133: Exception thrown when using a Unicode escape
Summary: Update regex engine to handle unicode escape correctly in character class
Reviewed-by: okutsu
--- a/jdk/src/share/classes/java/util/regex/Pattern.java Fri Apr 11 10:26:26 2008 -0700
+++ b/jdk/src/share/classes/java/util/regex/Pattern.java Mon Apr 14 21:45:45 2008 -0700
@@ -2844,7 +2844,15 @@
/**
* Utility method for parsing unicode escape sequences.
*/
- private int u() {
+ private int cursor() {
+ return cursor;
+ }
+
+ private void setcursor(int pos) {
+ cursor = pos;
+ }
+
+ private int uxxxx() {
int n = 0;
for (int i = 0; i < 4; i++) {
int ch = read();
@@ -2856,6 +2864,20 @@
return n;
}
+ private int u() {
+ int n = uxxxx();
+ if (Character.isHighSurrogate((char)n)) {
+ int cur = cursor();
+ if (read() == '\\' && read() == 'u') {
+ int n2 = uxxxx();
+ if (Character.isLowSurrogate((char)n2))
+ return Character.toCodePoint((char)n, (char)n2);
+ }
+ setcursor(cur);
+ }
+ return n;
+ }
+
//
// Utility methods for code point support
//