/*
* Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.tools.javac.parser;
import com.sun.tools.javac.code.Lint;
import com.sun.tools.javac.code.Lint.LintCategory;
import com.sun.tools.javac.code.Preview;
import com.sun.tools.javac.code.Source;
import com.sun.tools.javac.code.Source.Feature;
import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
import com.sun.tools.javac.resources.CompilerProperties.Errors;
import com.sun.tools.javac.resources.CompilerProperties.Warnings;
import com.sun.tools.javac.util.*;
import com.sun.tools.javac.util.JCDiagnostic.*;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.nio.CharBuffer;
import java.util.HashSet;
import java.util.Set;
import static com.sun.tools.javac.parser.Tokens.*;
import static com.sun.tools.javac.util.LayoutCharacters.*;
/** The lexical analyzer maps an input stream consisting of
* ASCII characters and Unicode escapes into a token sequence.
*
* <p><b>This is NOT part of any supported API.
* If you write code that depends on this, you do so at your own risk.
* This code and its internal interfaces are subject to change or
* deletion without notice.</b>
*/
public class JavaTokenizer {
private static final boolean scannerDebug = false;
/** The source language setting.
*/
private Source source;
/** The preview language setting. */
private Preview preview;
/** The log to be used for error reporting.
*/
private final Log log;
/** The token factory. */
private final Tokens tokens;
/** The token kind, set by nextToken().
*/
protected TokenKind tk;
/** The token's radix, set by nextToken().
*/
protected int radix;
/** The token's name, set by nextToken().
*/
protected Name name;
/** The position where a lexical error occurred;
*/
protected int errPos = Position.NOPOS;
/** The Unicode reader (low-level stream reader).
*/
protected UnicodeReader reader;
/** Should the string stripped of indentation?
*/
protected boolean shouldStripIndent;
/** Should the string's escapes be translated?
*/
protected boolean shouldTranslateEscapes;
/** Has the string broken escapes?
*/
protected boolean hasBrokenEscapes;
protected ScannerFactory fac;
// The set of lint options currently in effect. It is initialized
// from the context, and then is set/reset as needed by Attr as it
// visits all the various parts of the trees during attribution.
protected Lint lint;
private static final boolean hexFloatsWork = hexFloatsWork();
private static boolean hexFloatsWork() {
try {
Float.valueOf("0x1.0p1");
return true;
} catch (NumberFormatException ex) {
return false;
}
}
/**
* Create a scanner from the input array. This method might
* modify the array. To avoid copying the input array, ensure
* that {@code inputLength < input.length} or
* {@code input[input.length -1]} is a white space character.
*
* @param fac the factory which created this Scanner
* @param buf the input, might be modified
* Must be positive and less than or equal to input.length.
*/
protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
this(fac, new UnicodeReader(fac, buf));
}
protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
this(fac, new UnicodeReader(fac, buf, inputLength));
}
protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
this.fac = fac;
this.log = fac.log;
this.tokens = fac.tokens;
this.source = fac.source;
this.preview = fac.preview;
this.reader = reader;
this.lint = fac.lint;
}
protected void checkSourceLevel(int pos, Feature feature) {
if (preview.isPreview(feature) && !preview.isEnabled()) {
//preview feature without --preview flag, error
lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
} else if (!feature.allowedInSource(source)) {
//incompatible source level, error
lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name));
} else if (preview.isPreview(feature)) {
//use of preview feature, warn
preview.warnPreview(pos, feature);
}
}
/** Report an error at the given position using the provided arguments.
*/
protected void lexError(int pos, JCDiagnostic.Error key) {
log.error(pos, key);
tk = TokenKind.ERROR;
errPos = pos;
}
protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
log.error(flags, pos, key);
tk = TokenKind.ERROR;
errPos = pos;
}
protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) {
DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ;
log.warning(lc, dp, key);
}
/** Read next character in character or string literal and copy into sbuf.
*/
private void scanLitChar(int pos) {
if (reader.ch == '\\') {
if (reader.peekChar() == '\\' && !reader.isUnicode()) {
reader.skipChar();
reader.putChar('\\', true);
} else {
reader.scanChar();
switch (reader.ch) {
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
char leadch = reader.ch;
int oct = reader.digit(pos, 8);
reader.scanChar();
if ('0' <= reader.ch && reader.ch <= '7') {
oct = oct * 8 + reader.digit(pos, 8);
reader.scanChar();
if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
oct = oct * 8 + reader.digit(pos, 8);
reader.scanChar();
}
}
reader.putChar((char)oct);
break;
case 'b':
reader.putChar('\b', true); break;
case 't':
reader.putChar('\t', true); break;
case 'n':
reader.putChar('\n', true); break;
case 'f':
reader.putChar('\f', true); break;
case 'r':
reader.putChar('\r', true); break;
case '\'':
reader.putChar('\'', true); break;
case '\"':
reader.putChar('\"', true); break;
case '\\':
reader.putChar('\\', true); break;
default:
lexError(reader.bp, Errors.IllegalEscChar);
}
}
} else if (reader.bp != reader.buflen) {
reader.putChar(true);
}
}
/** Read next character in character or string literal and copy into sbuf
* without translating escapes. Used by text blocks to preflight verify
* escapes sequences.
*/
private void scanLitCharRaw(int pos) {
if (reader.ch == '\\') {
if (reader.peekChar() == '\\' && !reader.isUnicode()) {
reader.skipChar();
reader.putChar('\\', false);
reader.putChar('\\', true);
} else {
reader.putChar('\\', true);
switch (reader.ch) {
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
char leadch = reader.ch;
reader.putChar(true);
if ('0' <= reader.ch && reader.ch <= '7') {
reader.putChar(true);
if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
reader.putChar(true);
}
}
break;
// Effectively list of valid escape sequences.
case 'b':
case 't':
case 'n':
case 'f':
case 'r':
case '\'':
case '\"':
case '\\':
reader.putChar(true); break;
default:
hasBrokenEscapes = true;
lexError(reader.bp, Errors.IllegalEscChar);
}
}
} else if (reader.bp != reader.buflen) {
reader.putChar(true);
}
}
/** Interim access to String methods used to support text blocks.
* Required to handle bootstrapping with pre-text block jdks.
* Could be reworked in the 'next' jdk.
*/
static class TextBlockSupport {
/** Reflection method to remove incidental indentation.
*/
private static final Method stripIndent;
/** Reflection method to translate escape sequences.
*/
private static final Method translateEscapes;
/** true if stripIndent and translateEscapes are available in the bootstrap jdk.
*/
private static final boolean hasSupport;
/** Get a string method via refection or null if not available.
*/
private static Method getStringMethodOrNull(String name) {
try {
return String.class.getMethod(name);
} catch (Exception ex) {
// Method not available, return null.
}
return null;
}
static {
// Get text block string methods.
stripIndent = getStringMethodOrNull("stripIndent");
translateEscapes = getStringMethodOrNull("translateEscapes");
// true if stripIndent and translateEscapes are available in the bootstrap jdk.
hasSupport = stripIndent != null && translateEscapes != null;
}
/** Return true if stripIndent and translateEscapes are available in the bootstrap jdk.
*/
static boolean hasSupport() {
return hasSupport;
}
/** Return the leading whitespace count (indentation) of the line.
*/
private static int indent(String line) {
return line.length() - line.stripLeading().length();
}
enum WhitespaceChecks {
INCONSISTENT,
TRAILING
};
/** Check that the use of white space in content is not problematic.
*/
static Set<WhitespaceChecks> checkWhitespace(String string) {
// Start with empty result set.
Set<WhitespaceChecks> checks = new HashSet<>();
// No need to check empty strings.
if (string.isEmpty()) {
return checks;
}
// Maximum common indentation.
int outdent = 0;
// No need to check indentation if opting out (last line is empty.)
char lastChar = string.charAt(string.length() - 1);
boolean optOut = lastChar == '\n' || lastChar == '\r';
// Split string based at line terminators.
String[] lines = string.split("\\R");
int length = lines.length;
// Extract last line.
String lastLine = lines[length - 1];
if (!optOut) {
// Prime with the last line indentation (may be blank.)
outdent = indent(lastLine);
for (String line : lines) {
// Blanks lines have no influence (last line accounted for.)
if (!line.isBlank()) {
outdent = Integer.min(outdent, indent(line));
if (outdent == 0) {
break;
}
}
}
}
// Last line is representative.
String start = lastLine.substring(0, outdent);
for (String line : lines) {
// Fail if a line does not have the same indentation.
if (!line.isBlank() && !line.startsWith(start)) {
// Mix of different white space
checks.add(WhitespaceChecks.INCONSISTENT);
}
// Line has content even after indent is removed.
if (outdent < line.length()) {
// Is the last character a white space.
lastChar = line.charAt(line.length() - 1);
if (Character.isWhitespace(lastChar)) {
// Has trailing white space.
checks.add(WhitespaceChecks.TRAILING);
}
}
}
return checks;
}
/** Invoke String::stripIndent through reflection.
*/
static String stripIndent(String string) {
try {
string = (String)stripIndent.invoke(string);
} catch (InvocationTargetException | IllegalAccessException ex) {
throw new RuntimeException(ex);
}
return string;
}
/** Invoke String::translateEscapes through reflection.
*/
static String translateEscapes(String string) {
try {
string = (String)translateEscapes.invoke(string);
} catch (InvocationTargetException | IllegalAccessException ex) {
throw new RuntimeException(ex);
}
return string;
}
}
/** Test for EOLN.
*/
private boolean isEOLN() {
return reader.ch == LF || reader.ch == CR;
}
/** Test for CRLF.
*/
private boolean isCRLF() {
return reader.ch == CR && reader.peekChar() == LF;
}
/** Count and skip repeated occurances of the specified character.
*/
private int countChar(char ch, int max) {
int count = 0;
for ( ; count < max && reader.bp < reader.buflen && reader.ch == ch; count++) {
reader.scanChar();
}
return count;
}
/** Scan a string literal or text block.
*/
private void scanString(int pos) {
// Clear flags.
shouldStripIndent = false;
shouldTranslateEscapes = false;
hasBrokenEscapes = false;
// Check if text block string methods are present.
boolean hasTextBlockSupport = TextBlockSupport.hasSupport();
// Track the end of first line for error recovery.
int firstEOLN = -1;
// Attempt to scan for up to 3 double quotes.
int openCount = countChar('\"', 3);
switch (openCount) {
case 1: // Starting a string literal.
break;
case 2: // Starting an empty string literal.
// Start again but only consume one quote.
reader.reset(pos);
openCount = countChar('\"', 1);
break;
case 3: // Starting a text block.
// Check if preview feature is enabled for text blocks.
checkSourceLevel(pos, Feature.TEXT_BLOCKS);
// Only proceed if text block string methods are present.
if (hasTextBlockSupport) {
// Indicate that the final string should have incidental indentation removed.
shouldStripIndent = true;
// Verify the open delimiter sequence.
boolean hasOpenEOLN = false;
while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) {
hasOpenEOLN = isEOLN();
if (hasOpenEOLN) {
break;
}
reader.scanChar();
}
// Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>.
if (!hasOpenEOLN) {
lexError(reader.bp, Errors.IllegalTextBlockOpen);
return;
}
// Skip line terminator.
int start = reader.bp;
if (isCRLF()) {
reader.scanChar();
}
reader.scanChar();
processLineTerminator(start, reader.bp);
} else {
// No text block string methods are present, so reset and treat like string literal.
reader.reset(pos);
openCount = countChar('\"', 1);
}
break;
}
// While characters are available.
while (reader.bp < reader.buflen) {
// If possible close delimiter sequence.
if (reader.ch == '\"') {
// Check to see if enough double quotes are present.
int closeCount = countChar('\"', openCount);
if (openCount == closeCount) {
// Good result.
tk = Tokens.TokenKind.STRINGLITERAL;
return;
}
// False alarm, add double quotes to string buffer.
reader.repeat('\"', closeCount);
} else if (isEOLN()) {
// Line terminator in string literal is an error.
// Fall out to unclosed string literal error.
if (openCount == 1) {
break;
}
// Add line terminator to string buffer.
int start = reader.bp;
if (isCRLF()) {
reader.scanChar();
}
reader.putChar('\n', true);
processLineTerminator(start, reader.bp);
// Record first line terminator for error recovery.
if (firstEOLN == -1) {
firstEOLN = reader.bp;
}
} else if (reader.ch == '\\') {
// Handle escape sequences.
if (hasTextBlockSupport) {
// Indicate that the final string should have escapes translated.
shouldTranslateEscapes = true;
// Validate escape sequence and add to string buffer.
scanLitCharRaw(pos);
} else {
// Translate escape sequence and add result to string buffer.
scanLitChar(pos);
}
} else {
// Add character to string buffer.
reader.putChar(true);
}
}
// String ended without close delimiter sequence.
lexError(pos, openCount == 1 ? Errors.UnclosedStrLit : Errors.UnclosedTextBlock);
if (firstEOLN != -1) {
// Reset recovery position to point after open delimiter sequence.
reader.reset(firstEOLN);
}
}
private void scanDigits(int pos, int digitRadix) {
char saveCh;
int savePos;
do {
if (reader.ch != '_') {
reader.putChar(false);
}
saveCh = reader.ch;
savePos = reader.bp;
reader.scanChar();
} while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
if (saveCh == '_')
lexError(savePos, Errors.IllegalUnderscore);
}
/** Read fractional part of hexadecimal floating point number.
*/
private void scanHexExponentAndSuffix(int pos) {
if (reader.ch == 'p' || reader.ch == 'P') {
reader.putChar(true);
skipIllegalUnderscores();
if (reader.ch == '+' || reader.ch == '-') {
reader.putChar(true);
}
skipIllegalUnderscores();
if (reader.digit(pos, 10) >= 0) {
scanDigits(pos, 10);
if (!hexFloatsWork)
lexError(pos, Errors.UnsupportedCrossFpLit);
} else
lexError(pos, Errors.MalformedFpLit);
} else {
lexError(pos, Errors.MalformedFpLit);
}
if (reader.ch == 'f' || reader.ch == 'F') {
reader.putChar(true);
tk = TokenKind.FLOATLITERAL;
radix = 16;
} else {
if (reader.ch == 'd' || reader.ch == 'D') {
reader.putChar(true);
}
tk = TokenKind.DOUBLELITERAL;
radix = 16;
}
}
/** Read fractional part of floating point number.
*/
private void scanFraction(int pos) {
skipIllegalUnderscores();
if (reader.digit(pos, 10) >= 0) {
scanDigits(pos, 10);
}
int sp1 = reader.sp;
if (reader.ch == 'e' || reader.ch == 'E') {
reader.putChar(true);
skipIllegalUnderscores();
if (reader.ch == '+' || reader.ch == '-') {
reader.putChar(true);
}
skipIllegalUnderscores();
if (reader.digit(pos, 10) >= 0) {
scanDigits(pos, 10);
return;
}
lexError(pos, Errors.MalformedFpLit);
reader.sp = sp1;
}
}
/** Read fractional part and 'd' or 'f' suffix of floating point number.
*/
private void scanFractionAndSuffix(int pos) {
radix = 10;
scanFraction(pos);
if (reader.ch == 'f' || reader.ch == 'F') {
reader.putChar(true);
tk = TokenKind.FLOATLITERAL;
} else {
if (reader.ch == 'd' || reader.ch == 'D') {
reader.putChar(true);
}
tk = TokenKind.DOUBLELITERAL;
}
}
/** Read fractional part and 'd' or 'f' suffix of floating point number.
*/
private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
radix = 16;
Assert.check(reader.ch == '.');
reader.putChar(true);
skipIllegalUnderscores();
if (reader.digit(pos, 16) >= 0) {
seendigit = true;
scanDigits(pos, 16);
}
if (!seendigit)
lexError(pos, Errors.InvalidHexNumber);
else
scanHexExponentAndSuffix(pos);
}
private void skipIllegalUnderscores() {
if (reader.ch == '_') {
lexError(reader.bp, Errors.IllegalUnderscore);
while (reader.ch == '_')
reader.scanChar();
}
}
/** Read a number.
* @param radix The radix of the number; one of 2, 8, 10, 16.
*/
private void scanNumber(int pos, int radix) {
// for octal, allow base-10 digit in case it's a float literal
this.radix = radix;
int digitRadix = (radix == 8 ? 10 : radix);
int firstDigit = reader.digit(pos, Math.max(10, digitRadix));
boolean seendigit = firstDigit >= 0;
boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
if (seendigit) {
scanDigits(pos, digitRadix);
}
if (radix == 16 && reader.ch == '.') {
scanHexFractionAndSuffix(pos, seendigit);
} else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
scanHexExponentAndSuffix(pos);
} else if (digitRadix == 10 && reader.ch == '.') {
reader.putChar(true);
scanFractionAndSuffix(pos);
} else if (digitRadix == 10 &&
(reader.ch == 'e' || reader.ch == 'E' ||
reader.ch == 'f' || reader.ch == 'F' ||
reader.ch == 'd' || reader.ch == 'D')) {
scanFractionAndSuffix(pos);
} else {
if (!seenValidDigit) {
switch (radix) {
case 2:
lexError(pos, Errors.InvalidBinaryNumber);
break;
case 16:
lexError(pos, Errors.InvalidHexNumber);
break;
}
}
if (reader.ch == 'l' || reader.ch == 'L') {
reader.scanChar();
tk = TokenKind.LONGLITERAL;
} else {
tk = TokenKind.INTLITERAL;
}
}
}
/** Read an identifier.
*/
private void scanIdent() {
boolean isJavaIdentifierPart;
char high;
reader.putChar(true);
do {
switch (reader.ch) {
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
case '$': case '_':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
break;
case '\u0000': case '\u0001': case '\u0002': case '\u0003':
case '\u0004': case '\u0005': case '\u0006': case '\u0007':
case '\u0008': case '\u000E': case '\u000F': case '\u0010':
case '\u0011': case '\u0012': case '\u0013': case '\u0014':
case '\u0015': case '\u0016': case '\u0017':
case '\u0018': case '\u0019': case '\u001B':
case '\u007F':
reader.scanChar();
continue;
case '\u001A': // EOI is also a legal identifier part
if (reader.bp >= reader.buflen) {
name = reader.name();
tk = tokens.lookupKind(name);
return;
}
reader.scanChar();
continue;
default:
if (reader.ch < '\u0080') {
// all ASCII range chars already handled, above
isJavaIdentifierPart = false;
} else {
if (Character.isIdentifierIgnorable(reader.ch)) {
reader.scanChar();
continue;
} else {
int codePoint = reader.peekSurrogates();
if (codePoint >= 0) {
if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
reader.putChar(true);
}
} else {
isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
}
}
}
if (!isJavaIdentifierPart) {
name = reader.name();
tk = tokens.lookupKind(name);
return;
}
}
reader.putChar(true);
} while (true);
}
/** Return true if reader.ch can be part of an operator.
*/
private boolean isSpecial(char ch) {
switch (ch) {
case '!': case '%': case '&': case '*': case '?':
case '+': case '-': case ':': case '<': case '=':
case '>': case '^': case '|': case '~':
case '@':
return true;
default:
return false;
}
}
/** Read longest possible sequence of special characters and convert
* to token.
*/
private void scanOperator() {
while (true) {
reader.putChar(false);
Name newname = reader.name();
TokenKind tk1 = tokens.lookupKind(newname);
if (tk1 == TokenKind.IDENTIFIER) {
reader.sp--;
break;
}
tk = tk1;
reader.scanChar();
if (!isSpecial(reader.ch)) break;
}
}
/** Read token.
*/
public Token readToken() {
reader.sp = 0;
name = null;
radix = 0;
int pos = 0;
int endPos = 0;
List<Comment> comments = null;
try {
loop: while (true) {
pos = reader.bp;
switch (reader.ch) {
case ' ': // (Spec 3.6)
case '\t': // (Spec 3.6)
case FF: // (Spec 3.6)
do {
reader.scanChar();
} while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
processWhiteSpace(pos, reader.bp);
break;
case LF: // (Spec 3.4)
reader.scanChar();
processLineTerminator(pos, reader.bp);
break;
case CR: // (Spec 3.4)
reader.scanChar();
if (reader.ch == LF) {
reader.scanChar();
}
processLineTerminator(pos, reader.bp);
break;
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
case '$': case '_':
scanIdent();
break loop;
case '0':
reader.scanChar();
if (reader.ch == 'x' || reader.ch == 'X') {
reader.scanChar();
skipIllegalUnderscores();
scanNumber(pos, 16);
} else if (reader.ch == 'b' || reader.ch == 'B') {
reader.scanChar();
skipIllegalUnderscores();
scanNumber(pos, 2);
} else {
reader.putChar('0');
if (reader.ch == '_') {
int savePos = reader.bp;
do {
reader.scanChar();
} while (reader.ch == '_');
if (reader.digit(pos, 10) < 0) {
lexError(savePos, Errors.IllegalUnderscore);
}
}
scanNumber(pos, 8);
}
break loop;
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
scanNumber(pos, 10);
break loop;
case '.':
reader.scanChar();
if (reader.digit(pos, 10) >= 0) {
reader.putChar('.');
scanFractionAndSuffix(pos);
} else if (reader.ch == '.') {
int savePos = reader.bp;
reader.putChar('.'); reader.putChar('.', true);
if (reader.ch == '.') {
reader.scanChar();
reader.putChar('.');
tk = TokenKind.ELLIPSIS;
} else {
lexError(savePos, Errors.IllegalDot);
}
} else {
tk = TokenKind.DOT;
}
break loop;
case ',':
reader.scanChar(); tk = TokenKind.COMMA; break loop;
case ';':
reader.scanChar(); tk = TokenKind.SEMI; break loop;
case '(':
reader.scanChar(); tk = TokenKind.LPAREN; break loop;
case ')':
reader.scanChar(); tk = TokenKind.RPAREN; break loop;
case '[':
reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
case ']':
reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
case '{':
reader.scanChar(); tk = TokenKind.LBRACE; break loop;
case '}':
reader.scanChar(); tk = TokenKind.RBRACE; break loop;
case '/':
reader.scanChar();
if (reader.ch == '/') {
do {
reader.scanCommentChar();
} while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
if (reader.bp < reader.buflen) {
comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
}
break;
} else if (reader.ch == '*') {
boolean isEmpty = false;
reader.scanChar();
CommentStyle style;
if (reader.ch == '*') {
style = CommentStyle.JAVADOC;
reader.scanCommentChar();
if (reader.ch == '/') {
isEmpty = true;
}
} else {
style = CommentStyle.BLOCK;
}
while (!isEmpty && reader.bp < reader.buflen) {
if (reader.ch == '*') {
reader.scanChar();
if (reader.ch == '/') break;
} else {
reader.scanCommentChar();
}
}
if (reader.ch == '/') {
reader.scanChar();
comments = addComment(comments, processComment(pos, reader.bp, style));
break;
} else {
lexError(pos, Errors.UnclosedComment);
break loop;
}
} else if (reader.ch == '=') {
tk = TokenKind.SLASHEQ;
reader.scanChar();
} else {
tk = TokenKind.SLASH;
}
break loop;
case '\'':
reader.scanChar();
if (reader.ch == '\'') {
lexError(pos, Errors.EmptyCharLit);
reader.scanChar();
} else {
if (isEOLN())
lexError(pos, Errors.IllegalLineEndInCharLit);
scanLitChar(pos);
if (reader.ch == '\'') {
reader.scanChar();
tk = TokenKind.CHARLITERAL;
} else {
lexError(pos, Errors.UnclosedCharLit);
}
}
break loop;
case '\"':
scanString(pos);
break loop;
default:
if (isSpecial(reader.ch)) {
scanOperator();
} else {
boolean isJavaIdentifierStart;
int codePoint = -1;
if (reader.ch < '\u0080') {
// all ASCII range chars already handled, above
isJavaIdentifierStart = false;
} else {
codePoint = reader.peekSurrogates();
if (codePoint >= 0) {
if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
reader.putChar(true);
}
} else {
isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
}
}
if (isJavaIdentifierStart) {
scanIdent();
} else if (reader.digit(pos, 10) >= 0) {
scanNumber(pos, 10);
} else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
tk = TokenKind.EOF;
pos = reader.realLength;
} else {
String arg;
if (codePoint >= 0) {
char high = reader.ch;
reader.scanChar();
arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
} else {
arg = (32 < reader.ch && reader.ch < 127) ?
String.format("%s", reader.ch) :
String.format("\\u%04x", (int)reader.ch);
}
lexError(pos, Errors.IllegalChar(arg));
reader.scanChar();
}
}
break loop;
}
}
endPos = reader.bp;
switch (tk.tag) {
case DEFAULT: return new Token(tk, pos, endPos, comments);
case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
case STRING: {
// Get characters from string buffer.
String string = reader.chars();
// If a text block.
if (shouldStripIndent) {
// Verify that the incidental indentation is consistent.
if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
Set<TextBlockSupport.WhitespaceChecks> checks =
TextBlockSupport.checkWhitespace(string);
if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) {
lexWarning(LintCategory.TEXT_BLOCKS, pos,
Warnings.InconsistentWhiteSpaceIndentation);
}
if (checks.contains(TextBlockSupport.WhitespaceChecks.TRAILING)) {
lexWarning(LintCategory.TEXT_BLOCKS, pos,
Warnings.TrailingWhiteSpaceWillBeRemoved);
}
}
// Remove incidental indentation.
string = TextBlockSupport.stripIndent(string);
}
// Translate escape sequences if present.
if (shouldTranslateEscapes && !hasBrokenEscapes) {
string = TextBlockSupport.translateEscapes(string);
}
// Build string token.
return new StringToken(tk, pos, endPos, string, comments);
}
case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
default: throw new AssertionError();
}
}
finally {
if (scannerDebug) {
System.out.println("nextToken(" + pos
+ "," + endPos + ")=|" +
new String(reader.getRawCharacters(pos, endPos))
+ "|");
}
}
}
//where
List<Comment> addComment(List<Comment> comments, Comment comment) {
return comments == null ?
List.of(comment) :
comments.prepend(comment);
}
/** Return the position where a lexical error occurred;
*/
public int errPos() {
return errPos;
}
/** Set the position where a lexical error occurred;
*/
public void errPos(int pos) {
errPos = pos;
}
/**
* Called when a complete comment has been scanned. pos and endPos
* will mark the comment boundary.
*/
protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
if (scannerDebug)
System.out.println("processComment(" + pos
+ "," + endPos + "," + style + ")=|"
+ new String(reader.getRawCharacters(pos, endPos))
+ "|");
char[] buf = reader.getRawCharacters(pos, endPos);
return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style);
}
/**
* Called when a complete whitespace run has been scanned. pos and endPos
* will mark the whitespace boundary.
*/
protected void processWhiteSpace(int pos, int endPos) {
if (scannerDebug)
System.out.println("processWhitespace(" + pos
+ "," + endPos + ")=|" +
new String(reader.getRawCharacters(pos, endPos))
+ "|");
}
/**
* Called when a line terminator has been processed.
*/
protected void processLineTerminator(int pos, int endPos) {
if (scannerDebug)
System.out.println("processTerminator(" + pos
+ "," + endPos + ")=|" +
new String(reader.getRawCharacters(pos, endPos))
+ "|");
}
/** Build a map for translating between line numbers and
* positions in the input.
*
* @return a LineMap */
public Position.LineMap getLineMap() {
return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
}
/**
* Scan a documentation comment; determine if a deprecated tag is present.
* Called once the initial /, * have been skipped, positioned at the second *
* (which is treated as the beginning of the first line).
* Stops positioned at the closing '/'.
*/
protected static class BasicComment<U extends UnicodeReader> implements Comment {
CommentStyle cs;
U comment_reader;
protected boolean deprecatedFlag = false;
protected boolean scanned = false;
protected BasicComment(U comment_reader, CommentStyle cs) {
this.comment_reader = comment_reader;
this.cs = cs;
}
public String getText() {
return null;
}
public int getSourcePos(int pos) {
return -1;
}
public CommentStyle getStyle() {
return cs;
}
public boolean isDeprecated() {
if (!scanned && cs == CommentStyle.JAVADOC) {
scanDocComment();
}
return deprecatedFlag;
}
@SuppressWarnings("fallthrough")
protected void scanDocComment() {
try {
boolean deprecatedPrefix = false;
comment_reader.bp += 3; // '/**'
comment_reader.ch = comment_reader.buf[comment_reader.bp];
forEachLine:
while (comment_reader.bp < comment_reader.buflen) {
// Skip optional WhiteSpace at beginning of line
while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
comment_reader.scanCommentChar();
}
// Skip optional consecutive Stars
while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
comment_reader.scanCommentChar();
if (comment_reader.ch == '/') {
return;
}
}
// Skip optional WhiteSpace after Stars
while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
comment_reader.scanCommentChar();
}
deprecatedPrefix = false;
// At beginning of line in the JavaDoc sense.
if (!deprecatedFlag) {
String deprecated = "@deprecated";
int i = 0;
while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
comment_reader.scanCommentChar();
i++;
if (i == deprecated.length()) {
deprecatedPrefix = true;
break;
}
}
}
if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
if (Character.isWhitespace(comment_reader.ch)) {
deprecatedFlag = true;
} else if (comment_reader.ch == '*') {
comment_reader.scanCommentChar();
if (comment_reader.ch == '/') {
deprecatedFlag = true;
return;
}
}
}
// Skip rest of line
while (comment_reader.bp < comment_reader.buflen) {
switch (comment_reader.ch) {
case '*':
comment_reader.scanCommentChar();
if (comment_reader.ch == '/') {
return;
}
break;
case CR: // (Spec 3.4)
comment_reader.scanCommentChar();
if (comment_reader.ch != LF) {
continue forEachLine;
}
/* fall through to LF case */
case LF: // (Spec 3.4)
comment_reader.scanCommentChar();
continue forEachLine;
default:
comment_reader.scanCommentChar();
}
} // rest of line
} // forEachLine
return;
} finally {
scanned = true;
}
}
}
}