8048805: Request to investigate and update lexer error recovery in javac
authorjlahoda
Mon, 14 Jul 2014 12:02:58 +0200
changeset 25600 7f93cb0536fd
parent 25456 39cfdc2dcaf3
child 25601 acbea3f287fe
8048805: Request to investigate and update lexer error recovery in javac 8046620: Further investigation needed for few error messages for negative unicode tests in langtools regression ws 8048803: javac should report complete character code in the error messages Summary: Improving error reporting and recovery in the lexer Reviewed-by: jjg, mcimadamore Contributed-by: jan.lahoda@oracle.com, sonali.goel@oracle.com
langtools/src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java
langtools/src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java
langtools/test/tools/javac/diags/examples/EmptyCharLiteral.java
langtools/test/tools/javac/unicode/NonasciiDigit.java
langtools/test/tools/javac/unicode/NonasciiDigit.out
langtools/test/tools/javac/unicode/NonasciiDigit2.java
langtools/test/tools/javac/unicode/NonasciiDigit2.out
langtools/test/tools/javac/unicode/SupplementaryJavaID2.out
langtools/test/tools/javac/unicode/SupplementaryJavaID3.out
langtools/test/tools/javac/unicode/SupplementaryJavaID4.java
langtools/test/tools/javac/unicode/SupplementaryJavaID4.out
langtools/test/tools/javac/unicode/SupplementaryJavaID5.java
langtools/test/tools/javac/unicode/SupplementaryJavaID5.out
langtools/test/tools/javac/unicode/TripleQuote.java
langtools/test/tools/javac/unicode/TripleQuote.out
--- a/langtools/src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java	Wed Jul 05 19:50:06 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java	Mon Jul 14 12:02:58 2014 +0200
@@ -213,7 +213,7 @@
                 reader.putChar(true);
             }
             skipIllegalUnderscores();
-            if ('0' <= reader.ch && reader.ch <= '9') {
+            if (reader.digit(pos, 10) >= 0) {
                 scanDigits(pos, 10);
                 if (!hexFloatsWork)
                     lexError(pos, "unsupported.cross.fp.lit");
@@ -239,7 +239,7 @@
      */
     private void scanFraction(int pos) {
         skipIllegalUnderscores();
-        if ('0' <= reader.ch && reader.ch <= '9') {
+        if (reader.digit(pos, 10) >= 0) {
             scanDigits(pos, 10);
         }
         int sp1 = reader.sp;
@@ -250,7 +250,7 @@
                 reader.putChar(true);
             }
             skipIllegalUnderscores();
-            if ('0' <= reader.ch && reader.ch <= '9') {
+            if (reader.digit(pos, 10) >= 0) {
                 scanDigits(pos, 10);
                 return;
             }
@@ -384,11 +384,11 @@
                         reader.scanChar();
                         continue;
                     } else {
-                        high = reader.scanSurrogates();
-                        if (high != 0) {
-                            reader.putChar(high);
-                            isJavaIdentifierPart = Character.isJavaIdentifierPart(
-                                Character.toCodePoint(high, reader.ch));
+                        int codePoint = reader.peekSurrogates();
+                        if (codePoint >= 0) {
+                            if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
+                                reader.putChar(true);
+                            }
                         } else {
                             isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
                         }
@@ -530,7 +530,7 @@
                     break loop;
                 case '.':
                     reader.scanChar();
-                    if ('0' <= reader.ch && reader.ch <= '9') {
+                    if (reader.digit(pos, 10) >= 0) {
                         reader.putChar('.');
                         scanFractionAndSuffix(pos);
                     } else if (reader.ch == '.') {
@@ -613,11 +613,11 @@
                     reader.scanChar();
                     if (reader.ch == '\'') {
                         lexError(pos, "empty.char.lit");
+                        reader.scanChar();
                     } else {
                         if (reader.ch == CR || reader.ch == LF)
                             lexError(pos, "illegal.line.end.in.char.lit");
                         scanLitChar(pos);
-                        char ch2 = reader.ch;
                         if (reader.ch == '\'') {
                             reader.scanChar();
                             tk = TokenKind.CHARLITERAL;
@@ -642,29 +642,39 @@
                         scanOperator();
                     } else {
                         boolean isJavaIdentifierStart;
+                        int codePoint = -1;
                         if (reader.ch < '\u0080') {
                             // all ASCII range chars already handled, above
                             isJavaIdentifierStart = false;
                         } else {
-                            char high = reader.scanSurrogates();
-                            if (high != 0) {
-                                reader.putChar(high);
-
-                                isJavaIdentifierStart = Character.isJavaIdentifierStart(
-                                    Character.toCodePoint(high, reader.ch));
+                            codePoint = reader.peekSurrogates();
+                            if (codePoint >= 0) {
+                                if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
+                                    reader.putChar(true);
+                                }
                             } else {
                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
                             }
                         }
                         if (isJavaIdentifierStart) {
                             scanIdent();
+                        } else if (reader.digit(pos, 10) >= 0) {
+                            scanNumber(pos, 10);
                         } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
                             tk = TokenKind.EOF;
                             pos = reader.buflen;
                         } else {
-                            String arg = (32 < reader.ch && reader.ch < 127) ?
-                                            String.format("%s", reader.ch) :
-                                            String.format("\\u%04x", (int)reader.ch);
+                            String arg;
+
+                            if (codePoint >= 0) {
+                                char high = reader.ch;
+                                reader.scanChar();
+                                arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
+                            } else {
+                                arg = (32 < reader.ch && reader.ch < 127) ?
+                                                String.format("%s", reader.ch) :
+                                                String.format("\\u%04x", (int)reader.ch);
+                            }
                             lexError(pos, "illegal.char", arg);
                             reader.scanChar();
                         }
--- a/langtools/src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java	Wed Jul 05 19:50:06 2017 +0200
+++ b/langtools/src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java	Mon Jul 14 12:02:58 2014 +0200
@@ -197,24 +197,28 @@
     }
 
     /** Scan surrogate pairs.  If 'ch' is a high surrogate and
-     *  the next character is a low surrogate, then put the low
-     *  surrogate in 'ch', and return the high surrogate.
-     *  otherwise, just return 0.
+     *  the next character is a low surrogate, returns the code point
+     *  constructed from these surrogates. Otherwise, returns -1.
+     *  This method will not consume any of the characters.
      */
-    protected char scanSurrogates() {
+    protected int peekSurrogates() {
         if (surrogatesSupported && Character.isHighSurrogate(ch)) {
             char high = ch;
+            int prevBP = bp;
 
             scanChar();
 
-            if (Character.isLowSurrogate(ch)) {
-                return high;
-            }
+            char low = ch;
 
             ch = high;
+            bp = prevBP;
+
+            if (Character.isLowSurrogate(low)) {
+                return Character.toCodePoint(high, low);
+            }
         }
 
-        return 0;
+        return -1;
     }
 
     /** Convert an ASCII digit from its base (8, 10, or 16)
@@ -222,9 +226,14 @@
      */
     protected int digit(int pos, int base) {
         char c = ch;
-        int result = Character.digit(c, base);
+        if ('0' <= c && c <= '9')
+            return Character.digit(c, base); //a fast common case
+        int codePoint = peekSurrogates();
+        int result = codePoint >= 0 ? Character.digit(codePoint, base) : Character.digit(c, base);
         if (result >= 0 && c > 0x7f) {
             log.error(pos + 1, "illegal.nonascii.digit");
+            if (codePoint >= 0)
+                scanChar();
             ch = "0123456789abcdef".charAt(result);
         }
         return result;
--- a/langtools/test/tools/javac/diags/examples/EmptyCharLiteral.java	Wed Jul 05 19:50:06 2017 +0200
+++ b/langtools/test/tools/javac/diags/examples/EmptyCharLiteral.java	Mon Jul 14 12:02:58 2014 +0200
@@ -22,7 +22,6 @@
  */
 
 // key: compiler.err.empty.char.lit
-// key: compiler.err.unclosed.char.lit
 
 class X {
     char c = '';
--- a/langtools/test/tools/javac/unicode/NonasciiDigit.java	Wed Jul 05 19:50:06 2017 +0200
+++ b/langtools/test/tools/javac/unicode/NonasciiDigit.java	Mon Jul 14 12:02:58 2014 +0200
@@ -1,6 +1,6 @@
 /*
  * @test /nodynamiccopyright/
- * @bug 4707960 6183529
+ * @bug 4707960 6183529 8046620
  * @summary javac accepts unicode digits - sometimes crashing
  * @author gafter
  *
@@ -8,7 +8,16 @@
  */
 public class NonasciiDigit {
     public static void main(String[] args) {
+        // error: only ASCII allowed in constants
+        int i1 = \uff11;
+        int i2 = 1\uff11;
+        int i3 = \ud835\udfff;
         // error: floating literals use ascii only
-        float f = 0.\uff11;
+        double d1 = \uff11.0;
+        double d2 = 0.\uff11;
+        double d3 = 0x0P\uff11;
+        double d4 = 0E\uff11;
+        double d5 = .\uff11;
+        double d6 = \ud835\udfff.0;
     }
 }
--- a/langtools/test/tools/javac/unicode/NonasciiDigit.out	Wed Jul 05 19:50:06 2017 +0200
+++ b/langtools/test/tools/javac/unicode/NonasciiDigit.out	Mon Jul 14 12:02:58 2014 +0200
@@ -1,2 +1,10 @@
-NonasciiDigit.java:12:26: compiler.err.illegal.char: \uff11
-1 error
+NonasciiDigit.java:12:24: compiler.err.illegal.nonascii.digit
+NonasciiDigit.java:13:19: compiler.err.illegal.nonascii.digit
+NonasciiDigit.java:14:24: compiler.err.illegal.nonascii.digit
+NonasciiDigit.java:16:27: compiler.err.illegal.nonascii.digit
+NonasciiDigit.java:17:22: compiler.err.illegal.nonascii.digit
+NonasciiDigit.java:18:22: compiler.err.illegal.nonascii.digit
+NonasciiDigit.java:19:22: compiler.err.illegal.nonascii.digit
+NonasciiDigit.java:20:22: compiler.err.illegal.nonascii.digit
+NonasciiDigit.java:21:27: compiler.err.illegal.nonascii.digit
+9 errors
--- a/langtools/test/tools/javac/unicode/NonasciiDigit2.java	Wed Jul 05 19:50:06 2017 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-/*
- * @test /nodynamiccopyright/
- * @bug 4707960 6183529
- * @summary javac accepts unicode digits - sometimes crashing
- * @author gafter
- *
- * @compile/fail/ref=NonasciiDigit2.out -XDrawDiagnostics  NonasciiDigit2.java
- */
-public class NonasciiDigit2 {
-    public static void main(String[] args) {
-        // error: only ASCII allowed in constants
-        int i = 1\uff11;
-    }
-}
--- a/langtools/test/tools/javac/unicode/NonasciiDigit2.out	Wed Jul 05 19:50:06 2017 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-NonasciiDigit2.java:12:18: compiler.err.illegal.nonascii.digit
-1 error
--- a/langtools/test/tools/javac/unicode/SupplementaryJavaID2.out	Wed Jul 05 19:50:06 2017 +0200
+++ b/langtools/test/tools/javac/unicode/SupplementaryJavaID2.out	Mon Jul 14 12:02:58 2014 +0200
@@ -1,3 +1,4 @@
 SupplementaryJavaID2.java:12:14: compiler.err.illegal.char: \ud801
+SupplementaryJavaID2.java:12:20: compiler.err.illegal.char: \ud801
 SupplementaryJavaID2.java:12:24: compiler.err.expected: token.identifier
-2 errors
+3 errors
--- a/langtools/test/tools/javac/unicode/SupplementaryJavaID3.out	Wed Jul 05 19:50:06 2017 +0200
+++ b/langtools/test/tools/javac/unicode/SupplementaryJavaID3.out	Mon Jul 14 12:02:58 2014 +0200
@@ -1,2 +1,3 @@
+SupplementaryJavaID3.java:12:17: compiler.err.illegal.char: \ud801
 SupplementaryJavaID3.java:12:23: compiler.err.illegal.char: \ud801
-1 error
+2 errors
--- a/langtools/test/tools/javac/unicode/SupplementaryJavaID4.java	Wed Jul 05 19:50:06 2017 +0200
+++ b/langtools/test/tools/javac/unicode/SupplementaryJavaID4.java	Mon Jul 14 12:02:58 2014 +0200
@@ -1,35 +1,12 @@
 /*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/*
- * @test
- * @bug 4914724
+ * @test /nodynamiccopyright/
+ * @bug 4914724 8048803
  * @summary Ensure that a supplementary character that cannot be the start of a Java
  *          identifier causes a compilation failure, if it is used as the start of an
  *          identifier
  * @author Naoto Sato
  *
- * @compile/fail SupplementaryJavaID4.java
+ * @compile/fail/ref=SupplementaryJavaID4.out -XDrawDiagnostics  SupplementaryJavaID4.java
  */
 
 public class SupplementaryJavaID4 {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/langtools/test/tools/javac/unicode/SupplementaryJavaID4.out	Mon Jul 14 12:02:58 2014 +0200
@@ -0,0 +1,2 @@
+SupplementaryJavaID4.java:14:14: compiler.err.illegal.char: \ud834\udd7b
+1 error
--- a/langtools/test/tools/javac/unicode/SupplementaryJavaID5.java	Wed Jul 05 19:50:06 2017 +0200
+++ b/langtools/test/tools/javac/unicode/SupplementaryJavaID5.java	Mon Jul 14 12:02:58 2014 +0200
@@ -1,35 +1,12 @@
 /*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/*
- * @test
- * @bug 4914724
+ * @test /nodynamiccopyright/
+ * @bug 4914724 8048803
  * @summary Ensure that a supplementary character that cannot be the part of a Java
  *          identifier causes a compilation failure, if it is used as the part of an
  *          identifier
  * @author Naoto Sato
  *
- * @compile/fail SupplementaryJavaID5.java
+ * @compile/fail/ref=SupplementaryJavaID5.out -XDrawDiagnostics  SupplementaryJavaID5.java
  */
 
 public class SupplementaryJavaID5 {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/langtools/test/tools/javac/unicode/SupplementaryJavaID5.out	Mon Jul 14 12:02:58 2014 +0200
@@ -0,0 +1,2 @@
+SupplementaryJavaID5.java:14:17: compiler.err.illegal.char: \ud834\udd00
+1 error
--- a/langtools/test/tools/javac/unicode/TripleQuote.java	Wed Jul 05 19:50:06 2017 +0200
+++ b/langtools/test/tools/javac/unicode/TripleQuote.java	Mon Jul 14 12:02:58 2014 +0200
@@ -1,6 +1,6 @@
 /*
  * @test /nodynamiccopyright/
- * @bug 1265387
+ * @bug 1265387 8048805
  * @summary ''' and '\u0027' are not legal char literals.
  * @author turnidge
  *
--- a/langtools/test/tools/javac/unicode/TripleQuote.out	Wed Jul 05 19:50:06 2017 +0200
+++ b/langtools/test/tools/javac/unicode/TripleQuote.out	Mon Jul 14 12:02:58 2014 +0200
@@ -1,7 +1,5 @@
 TripleQuote.java:12:14: compiler.err.empty.char.lit
-TripleQuote.java:12:20: compiler.err.empty.char.lit
 TripleQuote.java:12:21: compiler.err.unclosed.char.lit
 TripleQuote.java:13:14: compiler.err.empty.char.lit
-TripleQuote.java:13:15: compiler.err.empty.char.lit
 TripleQuote.java:13:16: compiler.err.unclosed.char.lit
-6 errors
+4 errors