# HG changeset patch # User jlahoda # Date 1405332178 -7200 # Node ID 7f93cb0536fd72a9b8ab4840a7f1cd0d01fe115a # Parent 39cfdc2dcaf3f195c55398e4e677ab053b07e3d2 8048805: Request to investigate and update lexer error recovery in javac 8046620: Further investigation needed for few error messages for negative unicode tests in langtools regression ws 8048803: javac should report complete character code in the error messages Summary: Improving error reporting and recovery in the lexer Reviewed-by: jjg, mcimadamore Contributed-by: jan.lahoda@oracle.com, sonali.goel@oracle.com diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java --- a/langtools/src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java Wed Jul 05 19:50:06 2017 +0200 +++ b/langtools/src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java Mon Jul 14 12:02:58 2014 +0200 @@ -213,7 +213,7 @@ reader.putChar(true); } skipIllegalUnderscores(); - if ('0' <= reader.ch && reader.ch <= '9') { + if (reader.digit(pos, 10) >= 0) { scanDigits(pos, 10); if (!hexFloatsWork) lexError(pos, "unsupported.cross.fp.lit"); @@ -239,7 +239,7 @@ */ private void scanFraction(int pos) { skipIllegalUnderscores(); - if ('0' <= reader.ch && reader.ch <= '9') { + if (reader.digit(pos, 10) >= 0) { scanDigits(pos, 10); } int sp1 = reader.sp; @@ -250,7 +250,7 @@ reader.putChar(true); } skipIllegalUnderscores(); - if ('0' <= reader.ch && reader.ch <= '9') { + if (reader.digit(pos, 10) >= 0) { scanDigits(pos, 10); return; } @@ -384,11 +384,11 @@ reader.scanChar(); continue; } else { - high = reader.scanSurrogates(); - if (high != 0) { - reader.putChar(high); - isJavaIdentifierPart = Character.isJavaIdentifierPart( - Character.toCodePoint(high, reader.ch)); + int codePoint = reader.peekSurrogates(); + if (codePoint >= 0) { + if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) { + reader.putChar(true); + } } else { isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch); } @@ -530,7 +530,7 @@ break loop; case '.': reader.scanChar(); - if ('0' <= reader.ch && reader.ch <= '9') { + if (reader.digit(pos, 10) >= 0) { reader.putChar('.'); scanFractionAndSuffix(pos); } else if (reader.ch == '.') { @@ -613,11 +613,11 @@ reader.scanChar(); if (reader.ch == '\'') { lexError(pos, "empty.char.lit"); + reader.scanChar(); } else { if (reader.ch == CR || reader.ch == LF) lexError(pos, "illegal.line.end.in.char.lit"); scanLitChar(pos); - char ch2 = reader.ch; if (reader.ch == '\'') { reader.scanChar(); tk = TokenKind.CHARLITERAL; @@ -642,29 +642,39 @@ scanOperator(); } else { boolean isJavaIdentifierStart; + int codePoint = -1; if (reader.ch < '\u0080') { // all ASCII range chars already handled, above isJavaIdentifierStart = false; } else { - char high = reader.scanSurrogates(); - if (high != 0) { - reader.putChar(high); - - isJavaIdentifierStart = Character.isJavaIdentifierStart( - Character.toCodePoint(high, reader.ch)); + codePoint = reader.peekSurrogates(); + if (codePoint >= 0) { + if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) { + reader.putChar(true); + } } else { isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch); } } if (isJavaIdentifierStart) { scanIdent(); + } else if (reader.digit(pos, 10) >= 0) { + scanNumber(pos, 10); } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5 tk = TokenKind.EOF; pos = reader.buflen; } else { - String arg = (32 < reader.ch && reader.ch < 127) ? - String.format("%s", reader.ch) : - String.format("\\u%04x", (int)reader.ch); + String arg; + + if (codePoint >= 0) { + char high = reader.ch; + reader.scanChar(); + arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch); + } else { + arg = (32 < reader.ch && reader.ch < 127) ? + String.format("%s", reader.ch) : + String.format("\\u%04x", (int)reader.ch); + } lexError(pos, "illegal.char", arg); reader.scanChar(); } diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java --- a/langtools/src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java Wed Jul 05 19:50:06 2017 +0200 +++ b/langtools/src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java Mon Jul 14 12:02:58 2014 +0200 @@ -197,24 +197,28 @@ } /** Scan surrogate pairs. If 'ch' is a high surrogate and - * the next character is a low surrogate, then put the low - * surrogate in 'ch', and return the high surrogate. - * otherwise, just return 0. + * the next character is a low surrogate, returns the code point + * constructed from these surrogates. Otherwise, returns -1. + * This method will not consume any of the characters. */ - protected char scanSurrogates() { + protected int peekSurrogates() { if (surrogatesSupported && Character.isHighSurrogate(ch)) { char high = ch; + int prevBP = bp; scanChar(); - if (Character.isLowSurrogate(ch)) { - return high; - } + char low = ch; ch = high; + bp = prevBP; + + if (Character.isLowSurrogate(low)) { + return Character.toCodePoint(high, low); + } } - return 0; + return -1; } /** Convert an ASCII digit from its base (8, 10, or 16) @@ -222,9 +226,14 @@ */ protected int digit(int pos, int base) { char c = ch; - int result = Character.digit(c, base); + if ('0' <= c && c <= '9') + return Character.digit(c, base); //a fast common case + int codePoint = peekSurrogates(); + int result = codePoint >= 0 ? Character.digit(codePoint, base) : Character.digit(c, base); if (result >= 0 && c > 0x7f) { log.error(pos + 1, "illegal.nonascii.digit"); + if (codePoint >= 0) + scanChar(); ch = "0123456789abcdef".charAt(result); } return result; diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/test/tools/javac/diags/examples/EmptyCharLiteral.java --- a/langtools/test/tools/javac/diags/examples/EmptyCharLiteral.java Wed Jul 05 19:50:06 2017 +0200 +++ b/langtools/test/tools/javac/diags/examples/EmptyCharLiteral.java Mon Jul 14 12:02:58 2014 +0200 @@ -22,7 +22,6 @@ */ // key: compiler.err.empty.char.lit -// key: compiler.err.unclosed.char.lit class X { char c = ''; diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/test/tools/javac/unicode/NonasciiDigit.java --- a/langtools/test/tools/javac/unicode/NonasciiDigit.java Wed Jul 05 19:50:06 2017 +0200 +++ b/langtools/test/tools/javac/unicode/NonasciiDigit.java Mon Jul 14 12:02:58 2014 +0200 @@ -1,6 +1,6 @@ /* * @test /nodynamiccopyright/ - * @bug 4707960 6183529 + * @bug 4707960 6183529 8046620 * @summary javac accepts unicode digits - sometimes crashing * @author gafter * @@ -8,7 +8,16 @@ */ public class NonasciiDigit { public static void main(String[] args) { + // error: only ASCII allowed in constants + int i1 = \uff11; + int i2 = 1\uff11; + int i3 = \ud835\udfff; // error: floating literals use ascii only - float f = 0.\uff11; + double d1 = \uff11.0; + double d2 = 0.\uff11; + double d3 = 0x0P\uff11; + double d4 = 0E\uff11; + double d5 = .\uff11; + double d6 = \ud835\udfff.0; } } diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/test/tools/javac/unicode/NonasciiDigit.out --- a/langtools/test/tools/javac/unicode/NonasciiDigit.out Wed Jul 05 19:50:06 2017 +0200 +++ b/langtools/test/tools/javac/unicode/NonasciiDigit.out Mon Jul 14 12:02:58 2014 +0200 @@ -1,2 +1,10 @@ -NonasciiDigit.java:12:26: compiler.err.illegal.char: \uff11 -1 error +NonasciiDigit.java:12:24: compiler.err.illegal.nonascii.digit +NonasciiDigit.java:13:19: compiler.err.illegal.nonascii.digit +NonasciiDigit.java:14:24: compiler.err.illegal.nonascii.digit +NonasciiDigit.java:16:27: compiler.err.illegal.nonascii.digit +NonasciiDigit.java:17:22: compiler.err.illegal.nonascii.digit +NonasciiDigit.java:18:22: compiler.err.illegal.nonascii.digit +NonasciiDigit.java:19:22: compiler.err.illegal.nonascii.digit +NonasciiDigit.java:20:22: compiler.err.illegal.nonascii.digit +NonasciiDigit.java:21:27: compiler.err.illegal.nonascii.digit +9 errors diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/test/tools/javac/unicode/NonasciiDigit2.java --- a/langtools/test/tools/javac/unicode/NonasciiDigit2.java Wed Jul 05 19:50:06 2017 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -/* - * @test /nodynamiccopyright/ - * @bug 4707960 6183529 - * @summary javac accepts unicode digits - sometimes crashing - * @author gafter - * - * @compile/fail/ref=NonasciiDigit2.out -XDrawDiagnostics NonasciiDigit2.java - */ -public class NonasciiDigit2 { - public static void main(String[] args) { - // error: only ASCII allowed in constants - int i = 1\uff11; - } -} diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/test/tools/javac/unicode/NonasciiDigit2.out --- a/langtools/test/tools/javac/unicode/NonasciiDigit2.out Wed Jul 05 19:50:06 2017 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -NonasciiDigit2.java:12:18: compiler.err.illegal.nonascii.digit -1 error diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/test/tools/javac/unicode/SupplementaryJavaID2.out --- a/langtools/test/tools/javac/unicode/SupplementaryJavaID2.out Wed Jul 05 19:50:06 2017 +0200 +++ b/langtools/test/tools/javac/unicode/SupplementaryJavaID2.out Mon Jul 14 12:02:58 2014 +0200 @@ -1,3 +1,4 @@ SupplementaryJavaID2.java:12:14: compiler.err.illegal.char: \ud801 +SupplementaryJavaID2.java:12:20: compiler.err.illegal.char: \ud801 SupplementaryJavaID2.java:12:24: compiler.err.expected: token.identifier -2 errors +3 errors diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/test/tools/javac/unicode/SupplementaryJavaID3.out --- a/langtools/test/tools/javac/unicode/SupplementaryJavaID3.out Wed Jul 05 19:50:06 2017 +0200 +++ b/langtools/test/tools/javac/unicode/SupplementaryJavaID3.out Mon Jul 14 12:02:58 2014 +0200 @@ -1,2 +1,3 @@ +SupplementaryJavaID3.java:12:17: compiler.err.illegal.char: \ud801 SupplementaryJavaID3.java:12:23: compiler.err.illegal.char: \ud801 -1 error +2 errors diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/test/tools/javac/unicode/SupplementaryJavaID4.java --- a/langtools/test/tools/javac/unicode/SupplementaryJavaID4.java Wed Jul 05 19:50:06 2017 +0200 +++ b/langtools/test/tools/javac/unicode/SupplementaryJavaID4.java Mon Jul 14 12:02:58 2014 +0200 @@ -1,35 +1,12 @@ /* - * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -/* - * @test - * @bug 4914724 + * @test /nodynamiccopyright/ + * @bug 4914724 8048803 * @summary Ensure that a supplementary character that cannot be the start of a Java * identifier causes a compilation failure, if it is used as the start of an * identifier * @author Naoto Sato * - * @compile/fail SupplementaryJavaID4.java + * @compile/fail/ref=SupplementaryJavaID4.out -XDrawDiagnostics SupplementaryJavaID4.java */ public class SupplementaryJavaID4 { diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/test/tools/javac/unicode/SupplementaryJavaID4.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/langtools/test/tools/javac/unicode/SupplementaryJavaID4.out Mon Jul 14 12:02:58 2014 +0200 @@ -0,0 +1,2 @@ +SupplementaryJavaID4.java:14:14: compiler.err.illegal.char: \ud834\udd7b +1 error diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/test/tools/javac/unicode/SupplementaryJavaID5.java --- a/langtools/test/tools/javac/unicode/SupplementaryJavaID5.java Wed Jul 05 19:50:06 2017 +0200 +++ b/langtools/test/tools/javac/unicode/SupplementaryJavaID5.java Mon Jul 14 12:02:58 2014 +0200 @@ -1,35 +1,12 @@ /* - * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -/* - * @test - * @bug 4914724 + * @test /nodynamiccopyright/ + * @bug 4914724 8048803 * @summary Ensure that a supplementary character that cannot be the part of a Java * identifier causes a compilation failure, if it is used as the part of an * identifier * @author Naoto Sato * - * @compile/fail SupplementaryJavaID5.java + * @compile/fail/ref=SupplementaryJavaID5.out -XDrawDiagnostics SupplementaryJavaID5.java */ public class SupplementaryJavaID5 { diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/test/tools/javac/unicode/SupplementaryJavaID5.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/langtools/test/tools/javac/unicode/SupplementaryJavaID5.out Mon Jul 14 12:02:58 2014 +0200 @@ -0,0 +1,2 @@ +SupplementaryJavaID5.java:14:17: compiler.err.illegal.char: \ud834\udd00 +1 error diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/test/tools/javac/unicode/TripleQuote.java --- a/langtools/test/tools/javac/unicode/TripleQuote.java Wed Jul 05 19:50:06 2017 +0200 +++ b/langtools/test/tools/javac/unicode/TripleQuote.java Mon Jul 14 12:02:58 2014 +0200 @@ -1,6 +1,6 @@ /* * @test /nodynamiccopyright/ - * @bug 1265387 + * @bug 1265387 8048805 * @summary ''' and '\u0027' are not legal char literals. * @author turnidge * diff -r 39cfdc2dcaf3 -r 7f93cb0536fd langtools/test/tools/javac/unicode/TripleQuote.out --- a/langtools/test/tools/javac/unicode/TripleQuote.out Wed Jul 05 19:50:06 2017 +0200 +++ b/langtools/test/tools/javac/unicode/TripleQuote.out Mon Jul 14 12:02:58 2014 +0200 @@ -1,7 +1,5 @@ TripleQuote.java:12:14: compiler.err.empty.char.lit -TripleQuote.java:12:20: compiler.err.empty.char.lit TripleQuote.java:12:21: compiler.err.unclosed.char.lit TripleQuote.java:13:14: compiler.err.empty.char.lit -TripleQuote.java:13:15: compiler.err.empty.char.lit TripleQuote.java:13:16: compiler.err.unclosed.char.lit -6 errors +4 errors