langtools/src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java
changeset 10948 063463f6535f
parent 10815 a719aa5f1631
child 11144 8a4ae514eedf
--- a/langtools/src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java	Tue Nov 01 15:49:45 2011 -0700
+++ b/langtools/src/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java	Fri Nov 04 12:36:40 2011 +0000
@@ -25,10 +25,11 @@
 
 package com.sun.tools.javac.parser;
 
-import java.nio.CharBuffer;
 import com.sun.tools.javac.code.Source;
+import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
 import com.sun.tools.javac.util.*;
 
+import java.nio.CharBuffer;
 
 import static com.sun.tools.javac.parser.Tokens.*;
 import static com.sun.tools.javac.util.LayoutCharacters.*;
@@ -65,9 +66,6 @@
      */
     private final Log log;
 
-    /** The name table. */
-    private final Names names;
-
     /** The token factory. */
     private final Tokens tokens;
 
@@ -87,17 +85,11 @@
      */
     protected int errPos = Position.NOPOS;
 
-    /** Has a @deprecated been encountered in last doc comment?
-     *  this needs to be reset by client.
+    /** The Unicode reader (low-level stream reader).
      */
-    protected boolean deprecatedFlag = false;
+    protected UnicodeReader reader;
 
-    /** A character buffer for saved chars.
-     */
-    protected char[] sbuf = new char[128];
-    protected int sp;
-
-    protected UnicodeReader reader;
+    protected ScannerFactory fac;
 
     private static final boolean hexFloatsWork = hexFloatsWork();
     private static boolean hexFloatsWork() {
@@ -129,14 +121,14 @@
     }
 
     protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
-        log = fac.log;
-        names = fac.names;
-        tokens = fac.tokens;
-        source = fac.source;
+        this.fac = fac;
+        this.log = fac.log;
+        this.tokens = fac.tokens;
+        this.source = fac.source;
         this.reader = reader;
-        allowBinaryLiterals = source.allowBinaryLiterals();
-        allowHexFloats = source.allowHexFloats();
-        allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
+        this.allowBinaryLiterals = source.allowBinaryLiterals();
+        this.allowHexFloats = source.allowHexFloats();
+        this.allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
     }
 
     /** Report an error at the given position using the provided arguments.
@@ -147,38 +139,13 @@
         errPos = pos;
     }
 
-    /** Read next character in comment, skipping over double '\' characters.
-     */
-    protected void scanCommentChar() {
-        reader.scanChar();
-        if (reader.ch == '\\') {
-            if (reader.peekChar() == '\\' && !reader.isUnicode()) {
-                reader.skipChar();
-            } else {
-                reader.convertUnicode();
-            }
-        }
-    }
-
-    /** Append a character to sbuf.
-     */
-    private void putChar(char ch) {
-        if (sp == sbuf.length) {
-            char[] newsbuf = new char[sbuf.length * 2];
-            System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
-            sbuf = newsbuf;
-        }
-        sbuf[sp++] = ch;
-    }
-
     /** Read next character in character or string literal and copy into sbuf.
      */
     private void scanLitChar(int pos) {
         if (reader.ch == '\\') {
             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
                 reader.skipChar();
-                putChar('\\');
-                reader.scanChar();
+                reader.putChar('\\', true);
             } else {
                 reader.scanChar();
                 switch (reader.ch) {
@@ -195,30 +162,30 @@
                             reader.scanChar();
                         }
                     }
-                    putChar((char)oct);
+                    reader.putChar((char)oct);
                     break;
                 case 'b':
-                    putChar('\b'); reader.scanChar(); break;
+                    reader.putChar('\b', true); break;
                 case 't':
-                    putChar('\t'); reader.scanChar(); break;
+                    reader.putChar('\t', true); break;
                 case 'n':
-                    putChar('\n'); reader.scanChar(); break;
+                    reader.putChar('\n', true); break;
                 case 'f':
-                    putChar('\f'); reader.scanChar(); break;
+                    reader.putChar('\f', true); break;
                 case 'r':
-                    putChar('\r'); reader.scanChar(); break;
+                    reader.putChar('\r', true); break;
                 case '\'':
-                    putChar('\''); reader.scanChar(); break;
+                    reader.putChar('\'', true); break;
                 case '\"':
-                    putChar('\"'); reader.scanChar(); break;
+                    reader.putChar('\"', true); break;
                 case '\\':
-                    putChar('\\'); reader.scanChar(); break;
+                    reader.putChar('\\', true); break;
                 default:
                     lexError(reader.bp, "illegal.esc.char");
                 }
             }
         } else if (reader.bp != reader.buflen) {
-            putChar(reader.ch); reader.scanChar();
+            reader.putChar(true);
         }
     }
 
@@ -227,7 +194,7 @@
         int savePos;
         do {
             if (reader.ch != '_') {
-                putChar(reader.ch);
+                reader.putChar(false);
             } else {
                 if (!allowUnderscoresInLiterals) {
                     lexError(pos, "unsupported.underscore.lit", source.name);
@@ -246,12 +213,10 @@
      */
     private void scanHexExponentAndSuffix(int pos) {
         if (reader.ch == 'p' || reader.ch == 'P') {
-            putChar(reader.ch);
-            reader.scanChar();
+            reader.putChar(true);
             skipIllegalUnderscores();
             if (reader.ch == '+' || reader.ch == '-') {
-                putChar(reader.ch);
-                reader.scanChar();
+                reader.putChar(true);
             }
             skipIllegalUnderscores();
             if ('0' <= reader.ch && reader.ch <= '9') {
@@ -268,14 +233,12 @@
             lexError(pos, "malformed.fp.lit");
         }
         if (reader.ch == 'f' || reader.ch == 'F') {
-            putChar(reader.ch);
-            reader.scanChar();
+            reader.putChar(true);
             tk = TokenKind.FLOATLITERAL;
             radix = 16;
         } else {
             if (reader.ch == 'd' || reader.ch == 'D') {
-                putChar(reader.ch);
-                reader.scanChar();
+                reader.putChar(true);
             }
             tk = TokenKind.DOUBLELITERAL;
             radix = 16;
@@ -289,14 +252,12 @@
         if ('0' <= reader.ch && reader.ch <= '9') {
             scanDigits(pos, 10);
         }
-        int sp1 = sp;
+        int sp1 = reader.sp;
         if (reader.ch == 'e' || reader.ch == 'E') {
-            putChar(reader.ch);
-            reader.scanChar();
+            reader.putChar(true);
             skipIllegalUnderscores();
             if (reader.ch == '+' || reader.ch == '-') {
-                putChar(reader.ch);
-                reader.scanChar();
+                reader.putChar(true);
             }
             skipIllegalUnderscores();
             if ('0' <= reader.ch && reader.ch <= '9') {
@@ -304,7 +265,7 @@
                 return;
             }
             lexError(pos, "malformed.fp.lit");
-            sp = sp1;
+            reader.sp = sp1;
         }
     }
 
@@ -314,13 +275,11 @@
         radix = 10;
         scanFraction(pos);
         if (reader.ch == 'f' || reader.ch == 'F') {
-            putChar(reader.ch);
-            reader.scanChar();
+            reader.putChar(true);
             tk = TokenKind.FLOATLITERAL;
         } else {
             if (reader.ch == 'd' || reader.ch == 'D') {
-                putChar(reader.ch);
-                reader.scanChar();
+                reader.putChar(true);
             }
             tk = TokenKind.DOUBLELITERAL;
         }
@@ -331,8 +290,7 @@
     private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
         radix = 16;
         Assert.check(reader.ch == '.');
-        putChar(reader.ch);
-        reader.scanChar();
+        reader.putChar(true);
         skipIllegalUnderscores();
         if (reader.digit(pos, 16) >= 0) {
             seendigit = true;
@@ -369,8 +327,7 @@
         } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
             scanHexExponentAndSuffix(pos);
         } else if (digitRadix == 10 && reader.ch == '.') {
-            putChar(reader.ch);
-            reader.scanChar();
+            reader.putChar(true);
             scanFractionAndSuffix(pos);
         } else if (digitRadix == 10 &&
                    (reader.ch == 'e' || reader.ch == 'E' ||
@@ -393,10 +350,7 @@
         boolean isJavaIdentifierPart;
         char high;
         do {
-            if (sp == sbuf.length) putChar(reader.ch); else sbuf[sp++] = reader.ch;
-            // optimization, was: putChar(reader.ch);
-
-            reader.scanChar();
+            reader.putChar(true);
             switch (reader.ch) {
             case 'A': case 'B': case 'C': case 'D': case 'E':
             case 'F': case 'G': case 'H': case 'I': case 'J':
@@ -423,7 +377,7 @@
                 break;
             case '\u001A': // EOI is also a legal identifier part
                 if (reader.bp >= reader.buflen) {
-                    name = names.fromChars(sbuf, 0, sp);
+                    name = reader.name();
                     tk = tokens.lookupKind(name);
                     return;
                 }
@@ -435,11 +389,7 @@
                 } else {
                     high = reader.scanSurrogates();
                     if (high != 0) {
-                        if (sp == sbuf.length) {
-                            putChar(high);
-                        } else {
-                            sbuf[sp++] = high;
-                        }
+                        reader.putChar(high);
                         isJavaIdentifierPart = Character.isJavaIdentifierPart(
                             Character.toCodePoint(high, reader.ch));
                     } else {
@@ -447,7 +397,7 @@
                     }
                 }
                 if (!isJavaIdentifierPart) {
-                    name = names.fromChars(sbuf, 0, sp);
+                    name = reader.name();
                     tk = tokens.lookupKind(name);
                     return;
                 }
@@ -474,11 +424,11 @@
      */
     private void scanOperator() {
         while (true) {
-            putChar(reader.ch);
-            Name newname = names.fromChars(sbuf, 0, sp);
+            reader.putChar(false);
+            Name newname = reader.name();
             TokenKind tk1 = tokens.lookupKind(newname);
             if (tk1 == TokenKind.IDENTIFIER) {
-                sp--;
+                reader.sp--;
                 break;
             }
             tk = tk1;
@@ -487,111 +437,17 @@
         }
     }
 
-    /**
-     * Scan a documentation comment; determine if a deprecated tag is present.
-     * Called once the initial /, * have been skipped, positioned at the second *
-     * (which is treated as the beginning of the first line).
-     * Stops positioned at the closing '/'.
-     */
-    @SuppressWarnings("fallthrough")
-    private void scanDocComment() {
-        boolean deprecatedPrefix = false;
-
-        forEachLine:
-        while (reader.bp < reader.buflen) {
-
-            // Skip optional WhiteSpace at beginning of line
-            while (reader.bp < reader.buflen && (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF)) {
-                scanCommentChar();
-            }
-
-            // Skip optional consecutive Stars
-            while (reader.bp < reader.buflen && reader.ch == '*') {
-                scanCommentChar();
-                if (reader.ch == '/') {
-                    return;
-                }
-            }
-
-            // Skip optional WhiteSpace after Stars
-            while (reader.bp < reader.buflen && (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF)) {
-                scanCommentChar();
-            }
-
-            deprecatedPrefix = false;
-            // At beginning of line in the JavaDoc sense.
-            if (reader.bp < reader.buflen && reader.ch == '@' && !deprecatedFlag) {
-                scanCommentChar();
-                if (reader.bp < reader.buflen && reader.ch == 'd') {
-                    scanCommentChar();
-                    if (reader.bp < reader.buflen && reader.ch == 'e') {
-                        scanCommentChar();
-                        if (reader.bp < reader.buflen && reader.ch == 'p') {
-                            scanCommentChar();
-                            if (reader.bp < reader.buflen && reader.ch == 'r') {
-                                scanCommentChar();
-                                if (reader.bp < reader.buflen && reader.ch == 'e') {
-                                    scanCommentChar();
-                                    if (reader.bp < reader.buflen && reader.ch == 'c') {
-                                        scanCommentChar();
-                                        if (reader.bp < reader.buflen && reader.ch == 'a') {
-                                            scanCommentChar();
-                                            if (reader.bp < reader.buflen && reader.ch == 't') {
-                                                scanCommentChar();
-                                                if (reader.bp < reader.buflen && reader.ch == 'e') {
-                                                    scanCommentChar();
-                                                    if (reader.bp < reader.buflen && reader.ch == 'd') {
-                                                        deprecatedPrefix = true;
-                                                        scanCommentChar();
-                                                    }}}}}}}}}}}
-            if (deprecatedPrefix && reader.bp < reader.buflen) {
-                if (Character.isWhitespace(reader.ch)) {
-                    deprecatedFlag = true;
-                } else if (reader.ch == '*') {
-                    scanCommentChar();
-                    if (reader.ch == '/') {
-                        deprecatedFlag = true;
-                        return;
-                    }
-                }
-            }
-
-            // Skip rest of line
-            while (reader.bp < reader.buflen) {
-                switch (reader.ch) {
-                case '*':
-                    scanCommentChar();
-                    if (reader.ch == '/') {
-                        return;
-                    }
-                    break;
-                case CR: // (Spec 3.4)
-                    scanCommentChar();
-                    if (reader.ch != LF) {
-                        continue forEachLine;
-                    }
-                    /* fall through to LF case */
-                case LF: // (Spec 3.4)
-                    scanCommentChar();
-                    continue forEachLine;
-                default:
-                    scanCommentChar();
-                }
-            } // rest of line
-        } // forEachLine
-        return;
-    }
-
     /** Read token.
      */
     public Token readToken() {
 
-        sp = 0;
+        reader.sp = 0;
         name = null;
-        deprecatedFlag = false;
         radix = 0;
+
         int pos = 0;
         int endPos = 0;
+        List<Comment> comments = null;
 
         try {
             loop: while (true) {
@@ -656,7 +512,7 @@
                             scanNumber(pos, 2);
                         }
                     } else {
-                        putChar('0');
+                        reader.putChar('0');
                         if (reader.ch == '_') {
                             int savePos = reader.bp;
                             do {
@@ -676,14 +532,13 @@
                 case '.':
                     reader.scanChar();
                     if ('0' <= reader.ch && reader.ch <= '9') {
-                        putChar('.');
+                        reader.putChar('.');
                         scanFractionAndSuffix(pos);
                     } else if (reader.ch == '.') {
-                        putChar('.'); putChar('.');
-                        reader.scanChar();
+                        reader.putChar('.'); reader.putChar('.', true);
                         if (reader.ch == '.') {
                             reader.scanChar();
-                            putChar('.');
+                            reader.putChar('.');
                             tk = TokenKind.ELLIPSIS;
                         } else {
                             lexError(pos, "malformed.fp.lit");
@@ -712,32 +567,36 @@
                     reader.scanChar();
                     if (reader.ch == '/') {
                         do {
-                            scanCommentChar();
+                            reader.scanCommentChar();
                         } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
                         if (reader.bp < reader.buflen) {
-                            processComment(pos, reader.bp, CommentStyle.LINE);
+                            comments = addDocReader(comments, processComment(pos, reader.bp, CommentStyle.LINE));
                         }
                         break;
                     } else if (reader.ch == '*') {
+                        boolean isEmpty = false;
                         reader.scanChar();
                         CommentStyle style;
                         if (reader.ch == '*') {
                             style = CommentStyle.JAVADOC;
-                            scanDocComment();
+                            reader.scanCommentChar();
+                            if (reader.ch == '/') {
+                                isEmpty = true;
+                            }
                         } else {
                             style = CommentStyle.BLOCK;
-                            while (reader.bp < reader.buflen) {
-                                if (reader.ch == '*') {
-                                    reader.scanChar();
-                                    if (reader.ch == '/') break;
-                                } else {
-                                    scanCommentChar();
-                                }
+                        }
+                        while (!isEmpty && reader.bp < reader.buflen) {
+                            if (reader.ch == '*') {
+                                reader.scanChar();
+                                if (reader.ch == '/') break;
+                            } else {
+                                reader.scanCommentChar();
                             }
                         }
                         if (reader.ch == '/') {
                             reader.scanChar();
-                            processComment(pos, reader.bp, style);
+                            comments = addDocReader(comments, processComment(pos, reader.bp, style));
                             break;
                         } else {
                             lexError(pos, "unclosed.comment");
@@ -789,11 +648,7 @@
                         } else {
                             char high = reader.scanSurrogates();
                             if (high != 0) {
-                                if (sp == sbuf.length) {
-                                    putChar(high);
-                                } else {
-                                    sbuf[sp++] = high;
-                                }
+                                reader.putChar(high);
 
                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(
                                     Character.toCodePoint(high, reader.ch));
@@ -816,10 +671,10 @@
             }
             endPos = reader.bp;
             switch (tk.tag) {
-                case DEFAULT: return new Token(tk, pos, endPos, deprecatedFlag);
-                case NAMED: return new NamedToken(tk, pos, endPos, name, deprecatedFlag);
-                case STRING: return new StringToken(tk, pos, endPos, new String(sbuf, 0, sp), deprecatedFlag);
-                case NUMERIC: return new NumericToken(tk, pos, endPos, new String(sbuf, 0, sp), radix, deprecatedFlag);
+                case DEFAULT: return new Token(tk, pos, endPos, comments);
+                case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
+                case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);
+                case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
                 default: throw new AssertionError();
             }
         }
@@ -832,6 +687,12 @@
             }
         }
     }
+    //where
+        List<Comment> addDocReader(List<Comment> docReaders, Comment docReader) {
+            return docReaders == null ?
+                    List.of(docReader) :
+                    docReaders.prepend(docReader);
+        }
 
     /** Return the position where a lexical error occurred;
      */
@@ -845,22 +706,18 @@
         errPos = pos;
     }
 
-    public enum CommentStyle {
-        LINE,
-        BLOCK,
-        JAVADOC,
-    }
-
     /**
      * Called when a complete comment has been scanned. pos and endPos
      * will mark the comment boundary.
      */
-    protected void processComment(int pos, int endPos, CommentStyle style) {
+    protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
         if (scannerDebug)
             System.out.println("processComment(" + pos
                                + "," + endPos + "," + style + ")=|"
                                + new String(reader.getRawCharacters(pos, endPos))
                                + "|");
+        char[] buf = reader.getRawCharacters(pos, endPos);
+        return new BasicComment<UnicodeReader>(new UnicodeReader(fac, buf, buf.length), style);
     }
 
     /**
@@ -893,4 +750,125 @@
     public Position.LineMap getLineMap() {
         return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
     }
+
+
+    /**
+    * Scan a documentation comment; determine if a deprecated tag is present.
+    * Called once the initial /, * have been skipped, positioned at the second *
+    * (which is treated as the beginning of the first line).
+    * Stops positioned at the closing '/'.
+    */
+    protected class BasicComment<U extends UnicodeReader> implements Comment {
+
+        CommentStyle cs;
+        U comment_reader;
+
+        protected boolean deprecatedFlag = false;
+        protected boolean scanned = false;
+
+        protected BasicComment(U comment_reader, CommentStyle cs) {
+            this.comment_reader = comment_reader;
+            this.cs = cs;
+        }
+
+        public String getText() {
+            return null;
+        }
+
+        public CommentStyle getStyle() {
+            return cs;
+        }
+
+        public boolean isDeprecated() {
+            if (!scanned && cs == CommentStyle.JAVADOC) {
+                scanDocComment();
+            }
+            return deprecatedFlag;
+        }
+
+        @SuppressWarnings("fallthrough")
+        protected void scanDocComment() {
+            try {
+                boolean deprecatedPrefix = false;
+
+                comment_reader.bp += 3; // '/**'
+                comment_reader.ch = comment_reader.buf[comment_reader.bp];
+
+                forEachLine:
+                while (comment_reader.bp < comment_reader.buflen) {
+
+                    // Skip optional WhiteSpace at beginning of line
+                    while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
+                        comment_reader.scanCommentChar();
+                    }
+
+                    // Skip optional consecutive Stars
+                    while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
+                        comment_reader.scanCommentChar();
+                        if (comment_reader.ch == '/') {
+                            return;
+                        }
+                    }
+
+                    // Skip optional WhiteSpace after Stars
+                    while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
+                        comment_reader.scanCommentChar();
+                    }
+
+                    deprecatedPrefix = false;
+                    // At beginning of line in the JavaDoc sense.
+                    if (!deprecatedFlag) {
+                        String deprecated = "@deprecated";
+                        int i = 0;
+                        while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
+                            comment_reader.scanCommentChar();
+                            i++;
+                            if (i == deprecated.length()) {
+                                deprecatedPrefix = true;
+                                break;
+                            }
+                        }
+                    }
+
+                    if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
+                        if (Character.isWhitespace(comment_reader.ch)) {
+                            deprecatedFlag = true;
+                        } else if (comment_reader.ch == '*') {
+                            comment_reader.scanCommentChar();
+                            if (comment_reader.ch == '/') {
+                                deprecatedFlag = true;
+                                return;
+                            }
+                        }
+                    }
+
+                    // Skip rest of line
+                    while (comment_reader.bp < comment_reader.buflen) {
+                        switch (comment_reader.ch) {
+                            case '*':
+                                comment_reader.scanCommentChar();
+                                if (comment_reader.ch == '/') {
+                                    return;
+                                }
+                                break;
+                            case CR: // (Spec 3.4)
+                                comment_reader.scanCommentChar();
+                                if (comment_reader.ch != LF) {
+                                    continue forEachLine;
+                                }
+                            /* fall through to LF case */
+                            case LF: // (Spec 3.4)
+                                comment_reader.scanCommentChar();
+                                continue forEachLine;
+                            default:
+                                comment_reader.scanCommentChar();
+                        }
+                    } // rest of line
+                } // forEachLine
+                return;
+            } finally {
+                scanned = true;
+            }
+        }
+    }
 }