langtools/src/share/classes/com/sun/tools/javac/parser/DocCommentScanner.java
changeset 10821 5ec6698ec5a9
parent 10809 8311968cdce7
parent 10820 08597fb7c902
child 10822 0294e016d9b1
equal deleted inserted replaced
10809:8311968cdce7 10821:5ec6698ec5a9
     1 /*
       
     2  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package com.sun.tools.javac.parser;
       
    27 
       
    28 import java.nio.*;
       
    29 
       
    30 import com.sun.tools.javac.util.*;
       
    31 import static com.sun.tools.javac.util.LayoutCharacters.*;
       
    32 
       
    33 /** An extension to the base lexical analyzer that captures
       
    34  *  and processes the contents of doc comments.  It does so by
       
    35  *  translating Unicode escape sequences and by stripping the
       
    36  *  leading whitespace and starts from each line of the comment.
       
    37  *
       
    38  *  <p><b>This is NOT part of any supported API.
       
    39  *  If you write code that depends on this, you do so at your own risk.
       
    40  *  This code and its internal interfaces are subject to change or
       
    41  *  deletion without notice.</b>
       
    42  */
       
    43 public class DocCommentScanner extends Scanner {
       
    44 
       
    45     /** Create a scanner from the input buffer.  buffer must implement
       
    46      *  array() and compact(), and remaining() must be less than limit().
       
    47      */
       
    48     protected DocCommentScanner(ScannerFactory fac, CharBuffer buffer) {
       
    49         super(fac, buffer);
       
    50     }
       
    51 
       
    52     /** Create a scanner from the input array.  The array must have at
       
    53      *  least a single character of extra space.
       
    54      */
       
    55     protected DocCommentScanner(ScannerFactory fac, char[] input, int inputLength) {
       
    56         super(fac, input, inputLength);
       
    57     }
       
    58 
       
    59     /** Starting position of the comment in original source
       
    60      */
       
    61     private int pos;
       
    62 
       
    63     /** The comment input buffer, index of next chacter to be read,
       
    64      *  index of one past last character in buffer.
       
    65      */
       
    66     private char[] buf;
       
    67     private int bp;
       
    68     private int buflen;
       
    69 
       
    70     /** The current character.
       
    71      */
       
    72     private char ch;
       
    73 
       
    74     /** The column number position of the current character.
       
    75      */
       
    76     private int col;
       
    77 
       
    78     /** The buffer index of the last converted Unicode character
       
    79      */
       
    80     private int unicodeConversionBp = 0;
       
    81 
       
    82     /**
       
    83      * Buffer for doc comment.
       
    84      */
       
    85     private char[] docCommentBuffer = new char[1024];
       
    86 
       
    87     /**
       
    88      * Number of characters in doc comment buffer.
       
    89      */
       
    90     private int docCommentCount;
       
    91 
       
    92     /**
       
    93      * Translated and stripped contents of doc comment
       
    94      */
       
    95     private String docComment = null;
       
    96 
       
    97 
       
    98     /** Unconditionally expand the comment buffer.
       
    99      */
       
   100     private void expandCommentBuffer() {
       
   101         char[] newBuffer = new char[docCommentBuffer.length * 2];
       
   102         System.arraycopy(docCommentBuffer, 0, newBuffer,
       
   103                          0, docCommentBuffer.length);
       
   104         docCommentBuffer = newBuffer;
       
   105     }
       
   106 
       
   107     /** Convert an ASCII digit from its base (8, 10, or 16)
       
   108      *  to its value.
       
   109      */
       
   110     private int digit(int base) {
       
   111         char c = ch;
       
   112         int result = Character.digit(c, base);
       
   113         if (result >= 0 && c > 0x7f) {
       
   114             ch = "0123456789abcdef".charAt(result);
       
   115         }
       
   116         return result;
       
   117     }
       
   118 
       
   119     /** Convert Unicode escape; bp points to initial '\' character
       
   120      *  (Spec 3.3).
       
   121      */
       
   122     private void convertUnicode() {
       
   123         if (ch == '\\' && unicodeConversionBp != bp) {
       
   124             bp++; ch = buf[bp]; col++;
       
   125             if (ch == 'u') {
       
   126                 do {
       
   127                     bp++; ch = buf[bp]; col++;
       
   128                 } while (ch == 'u');
       
   129                 int limit = bp + 3;
       
   130                 if (limit < buflen) {
       
   131                     int d = digit(16);
       
   132                     int code = d;
       
   133                     while (bp < limit && d >= 0) {
       
   134                         bp++; ch = buf[bp]; col++;
       
   135                         d = digit(16);
       
   136                         code = (code << 4) + d;
       
   137                     }
       
   138                     if (d >= 0) {
       
   139                         ch = (char)code;
       
   140                         unicodeConversionBp = bp;
       
   141                         return;
       
   142                     }
       
   143                 }
       
   144                 // "illegal.Unicode.esc", reported by base scanner
       
   145             } else {
       
   146                 bp--;
       
   147                 ch = '\\';
       
   148                 col--;
       
   149             }
       
   150         }
       
   151     }
       
   152 
       
   153 
       
   154     /** Read next character.
       
   155      */
       
   156     private void scanChar() {
       
   157         bp++;
       
   158         ch = buf[bp];
       
   159         switch (ch) {
       
   160         case '\r': // return
       
   161             col = 0;
       
   162             break;
       
   163         case '\n': // newline
       
   164             if (bp == 0 || buf[bp-1] != '\r') {
       
   165                 col = 0;
       
   166             }
       
   167             break;
       
   168         case '\t': // tab
       
   169             col = (col / TabInc * TabInc) + TabInc;
       
   170             break;
       
   171         case '\\': // possible Unicode
       
   172             col++;
       
   173             convertUnicode();
       
   174             break;
       
   175         default:
       
   176             col++;
       
   177             break;
       
   178         }
       
   179     }
       
   180 
       
   181     /**
       
   182      * Read next character in doc comment, skipping over double '\' characters.
       
   183      * If a double '\' is skipped, put in the buffer and update buffer count.
       
   184      */
       
   185     private void scanDocCommentChar() {
       
   186         scanChar();
       
   187         if (ch == '\\') {
       
   188             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
       
   189                 if (docCommentCount == docCommentBuffer.length)
       
   190                     expandCommentBuffer();
       
   191                 docCommentBuffer[docCommentCount++] = ch;
       
   192                 bp++; col++;
       
   193             } else {
       
   194                 convertUnicode();
       
   195             }
       
   196         }
       
   197     }
       
   198 
       
   199     /* Reset doc comment before reading each new token
       
   200      */
       
   201     public void nextToken() {
       
   202         docComment = null;
       
   203         super.nextToken();
       
   204     }
       
   205 
       
   206     /**
       
   207      * Returns the documentation string of the current token.
       
   208      */
       
   209     public String docComment() {
       
   210         return docComment;
       
   211     }
       
   212 
       
   213     /**
       
   214      * Process a doc comment and make the string content available.
       
   215      * Strips leading whitespace and stars.
       
   216      */
       
   217     @SuppressWarnings("fallthrough")
       
   218     protected void processComment(CommentStyle style) {
       
   219         if (style != CommentStyle.JAVADOC) {
       
   220             return;
       
   221         }
       
   222 
       
   223         pos = pos();
       
   224         buf = getRawCharacters(pos, endPos());
       
   225         buflen = buf.length;
       
   226         bp = 0;
       
   227         col = 0;
       
   228 
       
   229         docCommentCount = 0;
       
   230 
       
   231         boolean firstLine = true;
       
   232 
       
   233         // Skip over first slash
       
   234         scanDocCommentChar();
       
   235         // Skip over first star
       
   236         scanDocCommentChar();
       
   237 
       
   238         // consume any number of stars
       
   239         while (bp < buflen && ch == '*') {
       
   240             scanDocCommentChar();
       
   241         }
       
   242         // is the comment in the form /**/, /***/, /****/, etc. ?
       
   243         if (bp < buflen && ch == '/') {
       
   244             docComment = "";
       
   245             return;
       
   246         }
       
   247 
       
   248         // skip a newline on the first line of the comment.
       
   249         if (bp < buflen) {
       
   250             if (ch == LF) {
       
   251                 scanDocCommentChar();
       
   252                 firstLine = false;
       
   253             } else if (ch == CR) {
       
   254                 scanDocCommentChar();
       
   255                 if (ch == LF) {
       
   256                     scanDocCommentChar();
       
   257                     firstLine = false;
       
   258                 }
       
   259             }
       
   260         }
       
   261 
       
   262     outerLoop:
       
   263 
       
   264         // The outerLoop processes the doc comment, looping once
       
   265         // for each line.  For each line, it first strips off
       
   266         // whitespace, then it consumes any stars, then it
       
   267         // puts the rest of the line into our buffer.
       
   268         while (bp < buflen) {
       
   269 
       
   270             // The wsLoop consumes whitespace from the beginning
       
   271             // of each line.
       
   272         wsLoop:
       
   273 
       
   274             while (bp < buflen) {
       
   275                 switch(ch) {
       
   276                 case ' ':
       
   277                     scanDocCommentChar();
       
   278                     break;
       
   279                 case '\t':
       
   280                     col = ((col - 1) / TabInc * TabInc) + TabInc;
       
   281                     scanDocCommentChar();
       
   282                     break;
       
   283                 case FF:
       
   284                     col = 0;
       
   285                     scanDocCommentChar();
       
   286                     break;
       
   287 // Treat newline at beginning of line (blank line, no star)
       
   288 // as comment text.  Old Javadoc compatibility requires this.
       
   289 /*---------------------------------*
       
   290                 case CR: // (Spec 3.4)
       
   291                     scanDocCommentChar();
       
   292                     if (ch == LF) {
       
   293                         col = 0;
       
   294                         scanDocCommentChar();
       
   295                     }
       
   296                     break;
       
   297                 case LF: // (Spec 3.4)
       
   298                     scanDocCommentChar();
       
   299                     break;
       
   300 *---------------------------------*/
       
   301                 default:
       
   302                     // we've seen something that isn't whitespace;
       
   303                     // jump out.
       
   304                     break wsLoop;
       
   305                 }
       
   306             }
       
   307 
       
   308             // Are there stars here?  If so, consume them all
       
   309             // and check for the end of comment.
       
   310             if (ch == '*') {
       
   311                 // skip all of the stars
       
   312                 do {
       
   313                     scanDocCommentChar();
       
   314                 } while (ch == '*');
       
   315 
       
   316                 // check for the closing slash.
       
   317                 if (ch == '/') {
       
   318                     // We're done with the doc comment
       
   319                     // scanChar() and breakout.
       
   320                     break outerLoop;
       
   321                 }
       
   322             } else if (! firstLine) {
       
   323                 //The current line does not begin with a '*' so we will indent it.
       
   324                 for (int i = 1; i < col; i++) {
       
   325                     if (docCommentCount == docCommentBuffer.length)
       
   326                         expandCommentBuffer();
       
   327                     docCommentBuffer[docCommentCount++] = ' ';
       
   328                 }
       
   329             }
       
   330 
       
   331             // The textLoop processes the rest of the characters
       
   332             // on the line, adding them to our buffer.
       
   333         textLoop:
       
   334             while (bp < buflen) {
       
   335                 switch (ch) {
       
   336                 case '*':
       
   337                     // Is this just a star?  Or is this the
       
   338                     // end of a comment?
       
   339                     scanDocCommentChar();
       
   340                     if (ch == '/') {
       
   341                         // This is the end of the comment,
       
   342                         // set ch and return our buffer.
       
   343                         break outerLoop;
       
   344                     }
       
   345                     // This is just an ordinary star.  Add it to
       
   346                     // the buffer.
       
   347                     if (docCommentCount == docCommentBuffer.length)
       
   348                         expandCommentBuffer();
       
   349                     docCommentBuffer[docCommentCount++] = '*';
       
   350                     break;
       
   351                 case ' ':
       
   352                 case '\t':
       
   353                     if (docCommentCount == docCommentBuffer.length)
       
   354                         expandCommentBuffer();
       
   355                     docCommentBuffer[docCommentCount++] = ch;
       
   356                     scanDocCommentChar();
       
   357                     break;
       
   358                 case FF:
       
   359                     scanDocCommentChar();
       
   360                     break textLoop; // treat as end of line
       
   361                 case CR: // (Spec 3.4)
       
   362                     scanDocCommentChar();
       
   363                     if (ch != LF) {
       
   364                         // Canonicalize CR-only line terminator to LF
       
   365                         if (docCommentCount == docCommentBuffer.length)
       
   366                             expandCommentBuffer();
       
   367                         docCommentBuffer[docCommentCount++] = (char)LF;
       
   368                         break textLoop;
       
   369                     }
       
   370                     /* fall through to LF case */
       
   371                 case LF: // (Spec 3.4)
       
   372                     // We've seen a newline.  Add it to our
       
   373                     // buffer and break out of this loop,
       
   374                     // starting fresh on a new line.
       
   375                     if (docCommentCount == docCommentBuffer.length)
       
   376                         expandCommentBuffer();
       
   377                     docCommentBuffer[docCommentCount++] = ch;
       
   378                     scanDocCommentChar();
       
   379                     break textLoop;
       
   380                 default:
       
   381                     // Add the character to our buffer.
       
   382                     if (docCommentCount == docCommentBuffer.length)
       
   383                         expandCommentBuffer();
       
   384                     docCommentBuffer[docCommentCount++] = ch;
       
   385                     scanDocCommentChar();
       
   386                 }
       
   387             } // end textLoop
       
   388             firstLine = false;
       
   389         } // end outerLoop
       
   390 
       
   391         if (docCommentCount > 0) {
       
   392             int i = docCommentCount - 1;
       
   393         trailLoop:
       
   394             while (i > -1) {
       
   395                 switch (docCommentBuffer[i]) {
       
   396                 case '*':
       
   397                     i--;
       
   398                     break;
       
   399                 default:
       
   400                     break trailLoop;
       
   401                 }
       
   402             }
       
   403             docCommentCount = i + 1;
       
   404 
       
   405             // Store the text of the doc comment
       
   406             docComment = new String(docCommentBuffer, 0 , docCommentCount);
       
   407         } else {
       
   408             docComment = "";
       
   409         }
       
   410     }
       
   411 
       
   412     /** Build a map for translating between line numbers and
       
   413      * positions in the input.
       
   414      *
       
   415      * @return a LineMap */
       
   416     public Position.LineMap getLineMap() {
       
   417         char[] buf = getRawCharacters();
       
   418         return Position.makeLineMap(buf, buf.length, true);
       
   419     }
       
   420 }