src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java
changeset 47216 71c04702a3d5
parent 34997 8174a7d851fb
child 48054 702043a4cdeb
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java	Tue Sep 12 19:03:39 2017 +0200
@@ -0,0 +1,892 @@
+/*
+ * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package com.sun.tools.javac.parser;
+
+import com.sun.tools.javac.code.Source;
+import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
+import com.sun.tools.javac.util.*;
+
+import java.nio.CharBuffer;
+
+import static com.sun.tools.javac.parser.Tokens.*;
+import static com.sun.tools.javac.util.LayoutCharacters.*;
+
+/** The lexical analyzer maps an input stream consisting of
+ *  ASCII characters and Unicode escapes into a token sequence.
+ *
+ *  <p><b>This is NOT part of any supported API.
+ *  If you write code that depends on this, you do so at your own risk.
+ *  This code and its internal interfaces are subject to change or
+ *  deletion without notice.</b>
+ */
+public class JavaTokenizer {
+
+    private static final boolean scannerDebug = false;
+
+    /** Allow binary literals.
+     */
+    private boolean allowBinaryLiterals;
+
+    /** Allow underscores in literals.
+     */
+    private boolean allowUnderscoresInLiterals;
+
+    /** The source language setting.
+     */
+    private Source source;
+
+    /** The log to be used for error reporting.
+     */
+    private final Log log;
+
+    /** The token factory. */
+    private final Tokens tokens;
+
+    /** The token kind, set by nextToken().
+     */
+    protected TokenKind tk;
+
+    /** The token's radix, set by nextToken().
+     */
+    protected int radix;
+
+    /** The token's name, set by nextToken().
+     */
+    protected Name name;
+
+    /** The position where a lexical error occurred;
+     */
+    protected int errPos = Position.NOPOS;
+
+    /** The Unicode reader (low-level stream reader).
+     */
+    protected UnicodeReader reader;
+
+    protected ScannerFactory fac;
+
+    private static final boolean hexFloatsWork = hexFloatsWork();
+    private static boolean hexFloatsWork() {
+        try {
+            Float.valueOf("0x1.0p1");
+            return true;
+        } catch (NumberFormatException ex) {
+            return false;
+        }
+    }
+
+    /**
+     * Create a scanner from the input array.  This method might
+     * modify the array.  To avoid copying the input array, ensure
+     * that {@code inputLength < input.length} or
+     * {@code input[input.length -1]} is a white space character.
+     *
+     * @param fac the factory which created this Scanner
+     * @param buf the input, might be modified
+     * Must be positive and less than or equal to input.length.
+     */
+    protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
+        this(fac, new UnicodeReader(fac, buf));
+    }
+
+    protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
+        this(fac, new UnicodeReader(fac, buf, inputLength));
+    }
+
+    protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
+        this.fac = fac;
+        this.log = fac.log;
+        this.tokens = fac.tokens;
+        this.source = fac.source;
+        this.reader = reader;
+        this.allowBinaryLiterals = source.allowBinaryLiterals();
+        this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
+    }
+
+    /** Report an error at the given position using the provided arguments.
+     */
+    protected void lexError(int pos, String key, Object... args) {
+        log.error(pos, key, args);
+        tk = TokenKind.ERROR;
+        errPos = pos;
+    }
+
+    /** Read next character in character or string literal and copy into sbuf.
+     */
+    private void scanLitChar(int pos) {
+        if (reader.ch == '\\') {
+            if (reader.peekChar() == '\\' && !reader.isUnicode()) {
+                reader.skipChar();
+                reader.putChar('\\', true);
+            } else {
+                reader.scanChar();
+                switch (reader.ch) {
+                case '0': case '1': case '2': case '3':
+                case '4': case '5': case '6': case '7':
+                    char leadch = reader.ch;
+                    int oct = reader.digit(pos, 8);
+                    reader.scanChar();
+                    if ('0' <= reader.ch && reader.ch <= '7') {
+                        oct = oct * 8 + reader.digit(pos, 8);
+                        reader.scanChar();
+                        if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
+                            oct = oct * 8 + reader.digit(pos, 8);
+                            reader.scanChar();
+                        }
+                    }
+                    reader.putChar((char)oct);
+                    break;
+                case 'b':
+                    reader.putChar('\b', true); break;
+                case 't':
+                    reader.putChar('\t', true); break;
+                case 'n':
+                    reader.putChar('\n', true); break;
+                case 'f':
+                    reader.putChar('\f', true); break;
+                case 'r':
+                    reader.putChar('\r', true); break;
+                case '\'':
+                    reader.putChar('\'', true); break;
+                case '\"':
+                    reader.putChar('\"', true); break;
+                case '\\':
+                    reader.putChar('\\', true); break;
+                default:
+                    lexError(reader.bp, "illegal.esc.char");
+                }
+            }
+        } else if (reader.bp != reader.buflen) {
+            reader.putChar(true);
+        }
+    }
+
+    private void scanDigits(int pos, int digitRadix) {
+        char saveCh;
+        int savePos;
+        do {
+            if (reader.ch != '_') {
+                reader.putChar(false);
+            } else {
+                if (!allowUnderscoresInLiterals) {
+                    lexError(pos, "unsupported.underscore.lit", source.name);
+                    allowUnderscoresInLiterals = true;
+                }
+            }
+            saveCh = reader.ch;
+            savePos = reader.bp;
+            reader.scanChar();
+        } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
+        if (saveCh == '_')
+            lexError(savePos, "illegal.underscore");
+    }
+
+    /** Read fractional part of hexadecimal floating point number.
+     */
+    private void scanHexExponentAndSuffix(int pos) {
+        if (reader.ch == 'p' || reader.ch == 'P') {
+            reader.putChar(true);
+            skipIllegalUnderscores();
+            if (reader.ch == '+' || reader.ch == '-') {
+                reader.putChar(true);
+            }
+            skipIllegalUnderscores();
+            if (reader.digit(pos, 10) >= 0) {
+                scanDigits(pos, 10);
+                if (!hexFloatsWork)
+                    lexError(pos, "unsupported.cross.fp.lit");
+            } else
+                lexError(pos, "malformed.fp.lit");
+        } else {
+            lexError(pos, "malformed.fp.lit");
+        }
+        if (reader.ch == 'f' || reader.ch == 'F') {
+            reader.putChar(true);
+            tk = TokenKind.FLOATLITERAL;
+            radix = 16;
+        } else {
+            if (reader.ch == 'd' || reader.ch == 'D') {
+                reader.putChar(true);
+            }
+            tk = TokenKind.DOUBLELITERAL;
+            radix = 16;
+        }
+    }
+
+    /** Read fractional part of floating point number.
+     */
+    private void scanFraction(int pos) {
+        skipIllegalUnderscores();
+        if (reader.digit(pos, 10) >= 0) {
+            scanDigits(pos, 10);
+        }
+        int sp1 = reader.sp;
+        if (reader.ch == 'e' || reader.ch == 'E') {
+            reader.putChar(true);
+            skipIllegalUnderscores();
+            if (reader.ch == '+' || reader.ch == '-') {
+                reader.putChar(true);
+            }
+            skipIllegalUnderscores();
+            if (reader.digit(pos, 10) >= 0) {
+                scanDigits(pos, 10);
+                return;
+            }
+            lexError(pos, "malformed.fp.lit");
+            reader.sp = sp1;
+        }
+    }
+
+    /** Read fractional part and 'd' or 'f' suffix of floating point number.
+     */
+    private void scanFractionAndSuffix(int pos) {
+        radix = 10;
+        scanFraction(pos);
+        if (reader.ch == 'f' || reader.ch == 'F') {
+            reader.putChar(true);
+            tk = TokenKind.FLOATLITERAL;
+        } else {
+            if (reader.ch == 'd' || reader.ch == 'D') {
+                reader.putChar(true);
+            }
+            tk = TokenKind.DOUBLELITERAL;
+        }
+    }
+
+    /** Read fractional part and 'd' or 'f' suffix of floating point number.
+     */
+    private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
+        radix = 16;
+        Assert.check(reader.ch == '.');
+        reader.putChar(true);
+        skipIllegalUnderscores();
+        if (reader.digit(pos, 16) >= 0) {
+            seendigit = true;
+            scanDigits(pos, 16);
+        }
+        if (!seendigit)
+            lexError(pos, "invalid.hex.number");
+        else
+            scanHexExponentAndSuffix(pos);
+    }
+
+    private void skipIllegalUnderscores() {
+        if (reader.ch == '_') {
+            lexError(reader.bp, "illegal.underscore");
+            while (reader.ch == '_')
+                reader.scanChar();
+        }
+    }
+
+    /** Read a number.
+     *  @param radix  The radix of the number; one of 2, 8, 10, 16.
+     */
+    private void scanNumber(int pos, int radix) {
+        // for octal, allow base-10 digit in case it's a float literal
+        this.radix = radix;
+        int digitRadix = (radix == 8 ? 10 : radix);
+        int firstDigit = reader.digit(pos, Math.max(10, digitRadix));
+        boolean seendigit = firstDigit >= 0;
+        boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
+        if (seendigit) {
+            scanDigits(pos, digitRadix);
+        }
+        if (radix == 16 && reader.ch == '.') {
+            scanHexFractionAndSuffix(pos, seendigit);
+        } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
+            scanHexExponentAndSuffix(pos);
+        } else if (digitRadix == 10 && reader.ch == '.') {
+            reader.putChar(true);
+            scanFractionAndSuffix(pos);
+        } else if (digitRadix == 10 &&
+                   (reader.ch == 'e' || reader.ch == 'E' ||
+                    reader.ch == 'f' || reader.ch == 'F' ||
+                    reader.ch == 'd' || reader.ch == 'D')) {
+            scanFractionAndSuffix(pos);
+        } else {
+            if (!seenValidDigit) {
+                switch (radix) {
+                case 2:
+                    lexError(pos, "invalid.binary.number");
+                    break;
+                case 16:
+                    lexError(pos, "invalid.hex.number");
+                    break;
+                }
+            }
+            if (reader.ch == 'l' || reader.ch == 'L') {
+                reader.scanChar();
+                tk = TokenKind.LONGLITERAL;
+            } else {
+                tk = TokenKind.INTLITERAL;
+            }
+        }
+    }
+
+    /** Read an identifier.
+     */
+    private void scanIdent() {
+        boolean isJavaIdentifierPart;
+        char high;
+        reader.putChar(true);
+        do {
+            switch (reader.ch) {
+            case 'A': case 'B': case 'C': case 'D': case 'E':
+            case 'F': case 'G': case 'H': case 'I': case 'J':
+            case 'K': case 'L': case 'M': case 'N': case 'O':
+            case 'P': case 'Q': case 'R': case 'S': case 'T':
+            case 'U': case 'V': case 'W': case 'X': case 'Y':
+            case 'Z':
+            case 'a': case 'b': case 'c': case 'd': case 'e':
+            case 'f': case 'g': case 'h': case 'i': case 'j':
+            case 'k': case 'l': case 'm': case 'n': case 'o':
+            case 'p': case 'q': case 'r': case 's': case 't':
+            case 'u': case 'v': case 'w': case 'x': case 'y':
+            case 'z':
+            case '$': case '_':
+            case '0': case '1': case '2': case '3': case '4':
+            case '5': case '6': case '7': case '8': case '9':
+                break;
+            case '\u0000': case '\u0001': case '\u0002': case '\u0003':
+            case '\u0004': case '\u0005': case '\u0006': case '\u0007':
+            case '\u0008': case '\u000E': case '\u000F': case '\u0010':
+            case '\u0011': case '\u0012': case '\u0013': case '\u0014':
+            case '\u0015': case '\u0016': case '\u0017':
+            case '\u0018': case '\u0019': case '\u001B':
+            case '\u007F':
+                reader.scanChar();
+                continue;
+            case '\u001A': // EOI is also a legal identifier part
+                if (reader.bp >= reader.buflen) {
+                    name = reader.name();
+                    tk = tokens.lookupKind(name);
+                    return;
+                }
+                reader.scanChar();
+                continue;
+            default:
+                if (reader.ch < '\u0080') {
+                    // all ASCII range chars already handled, above
+                    isJavaIdentifierPart = false;
+                } else {
+                    if (Character.isIdentifierIgnorable(reader.ch)) {
+                        reader.scanChar();
+                        continue;
+                    } else {
+                        int codePoint = reader.peekSurrogates();
+                        if (codePoint >= 0) {
+                            if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
+                                reader.putChar(true);
+                            }
+                        } else {
+                            isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
+                        }
+                    }
+                }
+                if (!isJavaIdentifierPart) {
+                    name = reader.name();
+                    tk = tokens.lookupKind(name);
+                    return;
+                }
+            }
+            reader.putChar(true);
+        } while (true);
+    }
+
+    /** Return true if reader.ch can be part of an operator.
+     */
+    private boolean isSpecial(char ch) {
+        switch (ch) {
+        case '!': case '%': case '&': case '*': case '?':
+        case '+': case '-': case ':': case '<': case '=':
+        case '>': case '^': case '|': case '~':
+        case '@':
+            return true;
+        default:
+            return false;
+        }
+    }
+
+    /** Read longest possible sequence of special characters and convert
+     *  to token.
+     */
+    private void scanOperator() {
+        while (true) {
+            reader.putChar(false);
+            Name newname = reader.name();
+            TokenKind tk1 = tokens.lookupKind(newname);
+            if (tk1 == TokenKind.IDENTIFIER) {
+                reader.sp--;
+                break;
+            }
+            tk = tk1;
+            reader.scanChar();
+            if (!isSpecial(reader.ch)) break;
+        }
+    }
+
+    /** Read token.
+     */
+    public Token readToken() {
+
+        reader.sp = 0;
+        name = null;
+        radix = 0;
+
+        int pos = 0;
+        int endPos = 0;
+        List<Comment> comments = null;
+
+        try {
+            loop: while (true) {
+                pos = reader.bp;
+                switch (reader.ch) {
+                case ' ': // (Spec 3.6)
+                case '\t': // (Spec 3.6)
+                case FF: // (Spec 3.6)
+                    do {
+                        reader.scanChar();
+                    } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
+                    processWhiteSpace(pos, reader.bp);
+                    break;
+                case LF: // (Spec 3.4)
+                    reader.scanChar();
+                    processLineTerminator(pos, reader.bp);
+                    break;
+                case CR: // (Spec 3.4)
+                    reader.scanChar();
+                    if (reader.ch == LF) {
+                        reader.scanChar();
+                    }
+                    processLineTerminator(pos, reader.bp);
+                    break;
+                case 'A': case 'B': case 'C': case 'D': case 'E':
+                case 'F': case 'G': case 'H': case 'I': case 'J':
+                case 'K': case 'L': case 'M': case 'N': case 'O':
+                case 'P': case 'Q': case 'R': case 'S': case 'T':
+                case 'U': case 'V': case 'W': case 'X': case 'Y':
+                case 'Z':
+                case 'a': case 'b': case 'c': case 'd': case 'e':
+                case 'f': case 'g': case 'h': case 'i': case 'j':
+                case 'k': case 'l': case 'm': case 'n': case 'o':
+                case 'p': case 'q': case 'r': case 's': case 't':
+                case 'u': case 'v': case 'w': case 'x': case 'y':
+                case 'z':
+                case '$': case '_':
+                    scanIdent();
+                    break loop;
+                case '0':
+                    reader.scanChar();
+                    if (reader.ch == 'x' || reader.ch == 'X') {
+                        reader.scanChar();
+                        skipIllegalUnderscores();
+                        scanNumber(pos, 16);
+                    } else if (reader.ch == 'b' || reader.ch == 'B') {
+                        if (!allowBinaryLiterals) {
+                            lexError(pos, "unsupported.binary.lit", source.name);
+                            allowBinaryLiterals = true;
+                        }
+                        reader.scanChar();
+                        skipIllegalUnderscores();
+                        scanNumber(pos, 2);
+                    } else {
+                        reader.putChar('0');
+                        if (reader.ch == '_') {
+                            int savePos = reader.bp;
+                            do {
+                                reader.scanChar();
+                            } while (reader.ch == '_');
+                            if (reader.digit(pos, 10) < 0) {
+                                lexError(savePos, "illegal.underscore");
+                            }
+                        }
+                        scanNumber(pos, 8);
+                    }
+                    break loop;
+                case '1': case '2': case '3': case '4':
+                case '5': case '6': case '7': case '8': case '9':
+                    scanNumber(pos, 10);
+                    break loop;
+                case '.':
+                    reader.scanChar();
+                    if (reader.digit(pos, 10) >= 0) {
+                        reader.putChar('.');
+                        scanFractionAndSuffix(pos);
+                    } else if (reader.ch == '.') {
+                        int savePos = reader.bp;
+                        reader.putChar('.'); reader.putChar('.', true);
+                        if (reader.ch == '.') {
+                            reader.scanChar();
+                            reader.putChar('.');
+                            tk = TokenKind.ELLIPSIS;
+                        } else {
+                            lexError(savePos, "illegal.dot");
+                        }
+                    } else {
+                        tk = TokenKind.DOT;
+                    }
+                    break loop;
+                case ',':
+                    reader.scanChar(); tk = TokenKind.COMMA; break loop;
+                case ';':
+                    reader.scanChar(); tk = TokenKind.SEMI; break loop;
+                case '(':
+                    reader.scanChar(); tk = TokenKind.LPAREN; break loop;
+                case ')':
+                    reader.scanChar(); tk = TokenKind.RPAREN; break loop;
+                case '[':
+                    reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
+                case ']':
+                    reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
+                case '{':
+                    reader.scanChar(); tk = TokenKind.LBRACE; break loop;
+                case '}':
+                    reader.scanChar(); tk = TokenKind.RBRACE; break loop;
+                case '/':
+                    reader.scanChar();
+                    if (reader.ch == '/') {
+                        do {
+                            reader.scanCommentChar();
+                        } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
+                        if (reader.bp < reader.buflen) {
+                            comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
+                        }
+                        break;
+                    } else if (reader.ch == '*') {
+                        boolean isEmpty = false;
+                        reader.scanChar();
+                        CommentStyle style;
+                        if (reader.ch == '*') {
+                            style = CommentStyle.JAVADOC;
+                            reader.scanCommentChar();
+                            if (reader.ch == '/') {
+                                isEmpty = true;
+                            }
+                        } else {
+                            style = CommentStyle.BLOCK;
+                        }
+                        while (!isEmpty && reader.bp < reader.buflen) {
+                            if (reader.ch == '*') {
+                                reader.scanChar();
+                                if (reader.ch == '/') break;
+                            } else {
+                                reader.scanCommentChar();
+                            }
+                        }
+                        if (reader.ch == '/') {
+                            reader.scanChar();
+                            comments = addComment(comments, processComment(pos, reader.bp, style));
+                            break;
+                        } else {
+                            lexError(pos, "unclosed.comment");
+                            break loop;
+                        }
+                    } else if (reader.ch == '=') {
+                        tk = TokenKind.SLASHEQ;
+                        reader.scanChar();
+                    } else {
+                        tk = TokenKind.SLASH;
+                    }
+                    break loop;
+                case '\'':
+                    reader.scanChar();
+                    if (reader.ch == '\'') {
+                        lexError(pos, "empty.char.lit");
+                        reader.scanChar();
+                    } else {
+                        if (reader.ch == CR || reader.ch == LF)
+                            lexError(pos, "illegal.line.end.in.char.lit");
+                        scanLitChar(pos);
+                        if (reader.ch == '\'') {
+                            reader.scanChar();
+                            tk = TokenKind.CHARLITERAL;
+                        } else {
+                            lexError(pos, "unclosed.char.lit");
+                        }
+                    }
+                    break loop;
+                case '\"':
+                    reader.scanChar();
+                    while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen)
+                        scanLitChar(pos);
+                    if (reader.ch == '\"') {
+                        tk = TokenKind.STRINGLITERAL;
+                        reader.scanChar();
+                    } else {
+                        lexError(pos, "unclosed.str.lit");
+                    }
+                    break loop;
+                default:
+                    if (isSpecial(reader.ch)) {
+                        scanOperator();
+                    } else {
+                        boolean isJavaIdentifierStart;
+                        int codePoint = -1;
+                        if (reader.ch < '\u0080') {
+                            // all ASCII range chars already handled, above
+                            isJavaIdentifierStart = false;
+                        } else {
+                            codePoint = reader.peekSurrogates();
+                            if (codePoint >= 0) {
+                                if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
+                                    reader.putChar(true);
+                                }
+                            } else {
+                                isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
+                            }
+                        }
+                        if (isJavaIdentifierStart) {
+                            scanIdent();
+                        } else if (reader.digit(pos, 10) >= 0) {
+                            scanNumber(pos, 10);
+                        } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
+                            tk = TokenKind.EOF;
+                            pos = reader.buflen;
+                        } else {
+                            String arg;
+
+                            if (codePoint >= 0) {
+                                char high = reader.ch;
+                                reader.scanChar();
+                                arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
+                            } else {
+                                arg = (32 < reader.ch && reader.ch < 127) ?
+                                                String.format("%s", reader.ch) :
+                                                String.format("\\u%04x", (int)reader.ch);
+                            }
+                            lexError(pos, "illegal.char", arg);
+                            reader.scanChar();
+                        }
+                    }
+                    break loop;
+                }
+            }
+            endPos = reader.bp;
+            switch (tk.tag) {
+                case DEFAULT: return new Token(tk, pos, endPos, comments);
+                case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
+                case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);
+                case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
+                default: throw new AssertionError();
+            }
+        }
+        finally {
+            if (scannerDebug) {
+                    System.out.println("nextToken(" + pos
+                                       + "," + endPos + ")=|" +
+                                       new String(reader.getRawCharacters(pos, endPos))
+                                       + "|");
+            }
+        }
+    }
+    //where
+        List<Comment> addComment(List<Comment> comments, Comment comment) {
+            return comments == null ?
+                    List.of(comment) :
+                    comments.prepend(comment);
+        }
+
+    /** Return the position where a lexical error occurred;
+     */
+    public int errPos() {
+        return errPos;
+    }
+
+    /** Set the position where a lexical error occurred;
+     */
+    public void errPos(int pos) {
+        errPos = pos;
+    }
+
+    /**
+     * Called when a complete comment has been scanned. pos and endPos
+     * will mark the comment boundary.
+     */
+    protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
+        if (scannerDebug)
+            System.out.println("processComment(" + pos
+                               + "," + endPos + "," + style + ")=|"
+                               + new String(reader.getRawCharacters(pos, endPos))
+                               + "|");
+        char[] buf = reader.getRawCharacters(pos, endPos);
+        return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style);
+    }
+
+    /**
+     * Called when a complete whitespace run has been scanned. pos and endPos
+     * will mark the whitespace boundary.
+     */
+    protected void processWhiteSpace(int pos, int endPos) {
+        if (scannerDebug)
+            System.out.println("processWhitespace(" + pos
+                               + "," + endPos + ")=|" +
+                               new String(reader.getRawCharacters(pos, endPos))
+                               + "|");
+    }
+
+    /**
+     * Called when a line terminator has been processed.
+     */
+    protected void processLineTerminator(int pos, int endPos) {
+        if (scannerDebug)
+            System.out.println("processTerminator(" + pos
+                               + "," + endPos + ")=|" +
+                               new String(reader.getRawCharacters(pos, endPos))
+                               + "|");
+    }
+
+    /** Build a map for translating between line numbers and
+     * positions in the input.
+     *
+     * @return a LineMap */
+    public Position.LineMap getLineMap() {
+        return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
+    }
+
+
+    /**
+    * Scan a documentation comment; determine if a deprecated tag is present.
+    * Called once the initial /, * have been skipped, positioned at the second *
+    * (which is treated as the beginning of the first line).
+    * Stops positioned at the closing '/'.
+    */
+    protected static class BasicComment<U extends UnicodeReader> implements Comment {
+
+        CommentStyle cs;
+        U comment_reader;
+
+        protected boolean deprecatedFlag = false;
+        protected boolean scanned = false;
+
+        protected BasicComment(U comment_reader, CommentStyle cs) {
+            this.comment_reader = comment_reader;
+            this.cs = cs;
+        }
+
+        public String getText() {
+            return null;
+        }
+
+        public int getSourcePos(int pos) {
+            return -1;
+        }
+
+        public CommentStyle getStyle() {
+            return cs;
+        }
+
+        public boolean isDeprecated() {
+            if (!scanned && cs == CommentStyle.JAVADOC) {
+                scanDocComment();
+            }
+            return deprecatedFlag;
+        }
+
+        @SuppressWarnings("fallthrough")
+        protected void scanDocComment() {
+            try {
+                boolean deprecatedPrefix = false;
+
+                comment_reader.bp += 3; // '/**'
+                comment_reader.ch = comment_reader.buf[comment_reader.bp];
+
+                forEachLine:
+                while (comment_reader.bp < comment_reader.buflen) {
+
+                    // Skip optional WhiteSpace at beginning of line
+                    while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
+                        comment_reader.scanCommentChar();
+                    }
+
+                    // Skip optional consecutive Stars
+                    while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
+                        comment_reader.scanCommentChar();
+                        if (comment_reader.ch == '/') {
+                            return;
+                        }
+                    }
+
+                    // Skip optional WhiteSpace after Stars
+                    while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
+                        comment_reader.scanCommentChar();
+                    }
+
+                    deprecatedPrefix = false;
+                    // At beginning of line in the JavaDoc sense.
+                    if (!deprecatedFlag) {
+                        String deprecated = "@deprecated";
+                        int i = 0;
+                        while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
+                            comment_reader.scanCommentChar();
+                            i++;
+                            if (i == deprecated.length()) {
+                                deprecatedPrefix = true;
+                                break;
+                            }
+                        }
+                    }
+
+                    if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
+                        if (Character.isWhitespace(comment_reader.ch)) {
+                            deprecatedFlag = true;
+                        } else if (comment_reader.ch == '*') {
+                            comment_reader.scanCommentChar();
+                            if (comment_reader.ch == '/') {
+                                deprecatedFlag = true;
+                                return;
+                            }
+                        }
+                    }
+
+                    // Skip rest of line
+                    while (comment_reader.bp < comment_reader.buflen) {
+                        switch (comment_reader.ch) {
+                            case '*':
+                                comment_reader.scanCommentChar();
+                                if (comment_reader.ch == '/') {
+                                    return;
+                                }
+                                break;
+                            case CR: // (Spec 3.4)
+                                comment_reader.scanCommentChar();
+                                if (comment_reader.ch != LF) {
+                                    continue forEachLine;
+                                }
+                            /* fall through to LF case */
+                            case LF: // (Spec 3.4)
+                                comment_reader.scanCommentChar();
+                                continue forEachLine;
+                            default:
+                                comment_reader.scanCommentChar();
+                        }
+                    } // rest of line
+                } // forEachLine
+                return;
+            } finally {
+                scanned = true;
+            }
+        }
+    }
+}