src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java
changeset 55263 830ca7b43b95
parent 53227 f15d443f9731
child 55700 1bb102c1cf27
equal deleted inserted replaced
55262:7d83cf1cfa74 55263:830ca7b43b95
    23  * questions.
    23  * questions.
    24  */
    24  */
    25 
    25 
    26 package com.sun.tools.javac.parser;
    26 package com.sun.tools.javac.parser;
    27 
    27 
       
    28 import com.sun.tools.javac.code.Lint;
       
    29 import com.sun.tools.javac.code.Lint.LintCategory;
    28 import com.sun.tools.javac.code.Preview;
    30 import com.sun.tools.javac.code.Preview;
    29 import com.sun.tools.javac.code.Source;
    31 import com.sun.tools.javac.code.Source;
    30 import com.sun.tools.javac.code.Source.Feature;
    32 import com.sun.tools.javac.code.Source.Feature;
    31 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
    33 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
    32 import com.sun.tools.javac.resources.CompilerProperties.Errors;
    34 import com.sun.tools.javac.resources.CompilerProperties.Errors;
       
    35 import com.sun.tools.javac.resources.CompilerProperties.Warnings;
    33 import com.sun.tools.javac.util.*;
    36 import com.sun.tools.javac.util.*;
    34 import com.sun.tools.javac.util.JCDiagnostic.DiagnosticFlag;
    37 import com.sun.tools.javac.util.JCDiagnostic.*;
    35 
    38 
       
    39 import java.lang.reflect.InvocationTargetException;
       
    40 import java.lang.reflect.Method;
    36 import java.nio.CharBuffer;
    41 import java.nio.CharBuffer;
       
    42 import java.util.HashSet;
       
    43 import java.util.Set;
    37 
    44 
    38 import static com.sun.tools.javac.parser.Tokens.*;
    45 import static com.sun.tools.javac.parser.Tokens.*;
    39 import static com.sun.tools.javac.util.LayoutCharacters.*;
    46 import static com.sun.tools.javac.util.LayoutCharacters.*;
    40 
    47 
    41 /** The lexical analyzer maps an input stream consisting of
    48 /** The lexical analyzer maps an input stream consisting of
    82 
    89 
    83     /** The Unicode reader (low-level stream reader).
    90     /** The Unicode reader (low-level stream reader).
    84      */
    91      */
    85     protected UnicodeReader reader;
    92     protected UnicodeReader reader;
    86 
    93 
       
    94     /** Should the string stripped of indentation?
       
    95      */
       
    96     protected boolean shouldStripIndent;
       
    97 
       
    98     /** Should the string's escapes be translated?
       
    99      */
       
   100     protected boolean shouldTranslateEscapes;
       
   101 
    87     protected ScannerFactory fac;
   102     protected ScannerFactory fac;
       
   103 
       
   104     // The set of lint options currently in effect. It is initialized
       
   105     // from the context, and then is set/reset as needed by Attr as it
       
   106     // visits all the various parts of the trees during attribution.
       
   107     protected Lint lint;
    88 
   108 
    89     private static final boolean hexFloatsWork = hexFloatsWork();
   109     private static final boolean hexFloatsWork = hexFloatsWork();
    90     private static boolean hexFloatsWork() {
   110     private static boolean hexFloatsWork() {
    91         try {
   111         try {
    92             Float.valueOf("0x1.0p1");
   112             Float.valueOf("0x1.0p1");
   119         this.log = fac.log;
   139         this.log = fac.log;
   120         this.tokens = fac.tokens;
   140         this.tokens = fac.tokens;
   121         this.source = fac.source;
   141         this.source = fac.source;
   122         this.preview = fac.preview;
   142         this.preview = fac.preview;
   123         this.reader = reader;
   143         this.reader = reader;
       
   144         this.lint = fac.lint;
   124     }
   145     }
   125 
   146 
   126     protected void checkSourceLevel(int pos, Feature feature) {
   147     protected void checkSourceLevel(int pos, Feature feature) {
   127         if (preview.isPreview(feature) && !preview.isEnabled()) {
   148         if (preview.isPreview(feature) && !preview.isEnabled()) {
   128             //preview feature without --preview flag, error
   149             //preview feature without --preview flag, error
   146 
   167 
   147     protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
   168     protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
   148         log.error(flags, pos, key);
   169         log.error(flags, pos, key);
   149         tk = TokenKind.ERROR;
   170         tk = TokenKind.ERROR;
   150         errPos = pos;
   171         errPos = pos;
       
   172     }
       
   173 
       
   174     protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) {
       
   175         DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ;
       
   176         log.warning(lc, dp, key);
   151     }
   177     }
   152 
   178 
   153     /** Read next character in character or string literal and copy into sbuf.
   179     /** Read next character in character or string literal and copy into sbuf.
   154      */
   180      */
   155     private void scanLitChar(int pos) {
   181     private void scanLitChar(int pos) {
   195                     lexError(reader.bp, Errors.IllegalEscChar);
   221                     lexError(reader.bp, Errors.IllegalEscChar);
   196                 }
   222                 }
   197             }
   223             }
   198         } else if (reader.bp != reader.buflen) {
   224         } else if (reader.bp != reader.buflen) {
   199             reader.putChar(true);
   225             reader.putChar(true);
       
   226         }
       
   227     }
       
   228 
       
   229     /** Read next character in character or string literal and copy into sbuf
       
   230      *  without translating escapes. Used by text blocks to preflight verify
       
   231      *  escapes sequences.
       
   232      */
       
   233     private void scanLitCharRaw(int pos) {
       
   234         if (reader.ch == '\\') {
       
   235             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
       
   236                 reader.skipChar();
       
   237                 reader.putChar('\\', false);
       
   238                 reader.putChar('\\', true);
       
   239             } else {
       
   240                 reader.putChar('\\', true);
       
   241                 switch (reader.ch) {
       
   242                 case '0': case '1': case '2': case '3':
       
   243                 case '4': case '5': case '6': case '7':
       
   244                     char leadch = reader.ch;
       
   245                     reader.putChar(true);
       
   246                     if ('0' <= reader.ch && reader.ch <= '7') {
       
   247                         reader.putChar(true);
       
   248                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
       
   249                             reader.putChar(true);
       
   250                         }
       
   251                     }
       
   252                     break;
       
   253                 // Effectively list of valid escape sequences.
       
   254                 case 'b':
       
   255                 case 't':
       
   256                 case 'n':
       
   257                 case 'f':
       
   258                 case 'r':
       
   259                 case '\'':
       
   260                 case '\"':
       
   261                 case '\\':
       
   262                     reader.putChar(true); break;
       
   263                 default:
       
   264                     lexError(reader.bp, Errors.IllegalEscChar);
       
   265                 }
       
   266             }
       
   267         } else if (reader.bp != reader.buflen) {
       
   268             reader.putChar(true);
       
   269         }
       
   270     }
       
   271 
       
   272     /** Interim access to String methods used to support text blocks.
       
   273      *  Required to handle bootstrapping with pre-text block jdks.
       
   274      *  Could be reworked in the 'next' jdk.
       
   275      */
       
   276     static class TextBlockSupport {
       
   277         /** Reflection method to remove incidental indentation.
       
   278          */
       
   279         private static final Method stripIndent;
       
   280 
       
   281         /** Reflection method to translate escape sequences.
       
   282          */
       
   283         private static final Method translateEscapes;
       
   284 
       
   285         /** true if stripIndent and translateEscapes are available in the bootstrap jdk.
       
   286          */
       
   287         private static final boolean hasSupport;
       
   288 
       
   289         /** Get a string method via refection or null if not available.
       
   290          */
       
   291         private static Method getStringMethodOrNull(String name) {
       
   292             try {
       
   293                 return String.class.getMethod(name);
       
   294             } catch (Exception ex) {
       
   295                 // Method not available, return null.
       
   296             }
       
   297             return null;
       
   298         }
       
   299 
       
   300         static {
       
   301             // Get text block string methods.
       
   302             stripIndent = getStringMethodOrNull("stripIndent");
       
   303             translateEscapes = getStringMethodOrNull("translateEscapes");
       
   304             // true if stripIndent and translateEscapes are available in the bootstrap jdk.
       
   305             hasSupport = stripIndent != null && translateEscapes != null;
       
   306         }
       
   307 
       
   308         /** Return true if stripIndent and translateEscapes are available in the bootstrap jdk.
       
   309          */
       
   310         static boolean hasSupport() {
       
   311             return hasSupport;
       
   312         }
       
   313 
       
   314         /** Return the leading whitespace count (indentation) of the line.
       
   315          */
       
   316         private static int indent(String line) {
       
   317             return line.length() - line.stripLeading().length();
       
   318         }
       
   319 
       
   320         enum WhitespaceChecks {
       
   321             INCONSISTENT,
       
   322             TRAILING
       
   323         };
       
   324 
       
   325         /** Check that the use of white space in content is not problematic.
       
   326          */
       
   327         static Set<WhitespaceChecks> checkWhitespace(String string) {
       
   328             // Start with empty result set.
       
   329             Set<WhitespaceChecks> checks = new HashSet<>();
       
   330             // No need to check empty strings.
       
   331             if (string.isEmpty()) {
       
   332                 return checks;
       
   333             }
       
   334             // Maximum common indentation.
       
   335             int outdent = 0;
       
   336             // No need to check indentation if opting out (last line is empty.)
       
   337             char lastChar = string.charAt(string.length() - 1);
       
   338             boolean optOut = lastChar == '\n' || lastChar == '\r';
       
   339             // Split string based at line terminators.
       
   340             String[] lines = string.split("\\R");
       
   341             int length = lines.length;
       
   342             // Extract last line.
       
   343             String lastLine = lines[length - 1];
       
   344             if (!optOut) {
       
   345                 // Prime with the last line indentation (may be blank.)
       
   346                 outdent = indent(lastLine);
       
   347                 for (String line : lines) {
       
   348                     // Blanks lines have no influence (last line accounted for.)
       
   349                     if (!line.isBlank()) {
       
   350                         outdent = Integer.min(outdent, indent(line));
       
   351                         if (outdent == 0) {
       
   352                             break;
       
   353                         }
       
   354                     }
       
   355                 }
       
   356             }
       
   357             // Last line is representative.
       
   358             String start = lastLine.substring(0, outdent);
       
   359             for (String line : lines) {
       
   360                 // Fail if a line does not have the same indentation.
       
   361                 if (!line.isBlank() && !line.startsWith(start)) {
       
   362                     // Mix of different white space
       
   363                     checks.add(WhitespaceChecks.INCONSISTENT);
       
   364                 }
       
   365                 // Line has content even after indent is removed.
       
   366                 if (outdent < line.length()) {
       
   367                     // Is the last character a white space.
       
   368                     lastChar = line.charAt(line.length() - 1);
       
   369                     if (Character.isWhitespace(lastChar)) {
       
   370                         // Has trailing white space.
       
   371                         checks.add(WhitespaceChecks.TRAILING);
       
   372                     }
       
   373                 }
       
   374             }
       
   375             return checks;
       
   376         }
       
   377 
       
   378         /** Invoke String::stripIndent through reflection.
       
   379          */
       
   380         static String stripIndent(String string) {
       
   381             try {
       
   382                 string = (String)stripIndent.invoke(string);
       
   383             } catch (InvocationTargetException | IllegalAccessException ex) {
       
   384                 throw new RuntimeException(ex);
       
   385             }
       
   386             return string;
       
   387         }
       
   388 
       
   389         /** Invoke String::translateEscapes through reflection.
       
   390          */
       
   391         static String translateEscapes(String string) {
       
   392             try {
       
   393                 string = (String)translateEscapes.invoke(string);
       
   394             } catch (InvocationTargetException | IllegalAccessException ex) {
       
   395                 throw new RuntimeException(ex);
       
   396             }
       
   397             return string;
       
   398         }
       
   399     }
       
   400 
       
   401     /** Test for EOLN.
       
   402      */
       
   403     private boolean isEOLN() {
       
   404         return reader.ch == LF || reader.ch == CR;
       
   405     }
       
   406 
       
   407     /** Test for CRLF.
       
   408      */
       
   409     private boolean isCRLF() {
       
   410         return reader.ch == CR && reader.peekChar() == LF;
       
   411     }
       
   412 
       
   413     /** Count and skip repeated occurances of the specified character.
       
   414      */
       
   415     private int countChar(char ch, int max) {
       
   416         int count = 0;
       
   417         for ( ; count < max && reader.bp < reader.buflen && reader.ch == ch; count++) {
       
   418             reader.scanChar();
       
   419         }
       
   420         return count;
       
   421     }
       
   422 
       
   423     /** Scan a string literal or text block.
       
   424      */
       
   425     private void scanString(int pos) {
       
   426         // Clear flags.
       
   427         shouldStripIndent = false;
       
   428         shouldTranslateEscapes = false;
       
   429         // Check if text block string methods are present.
       
   430         boolean hasTextBlockSupport = TextBlockSupport.hasSupport();
       
   431         // Track the end of first line for error recovery.
       
   432         int firstEOLN = -1;
       
   433         // Attempt to scan for up to 3 double quotes.
       
   434         int openCount = countChar('\"', 3);
       
   435         switch (openCount) {
       
   436         case 1: // Starting a string literal.
       
   437             break;
       
   438         case 2: // Starting an empty string literal.
       
   439             // Start again but only consume one quote.
       
   440             reader.reset(pos);
       
   441             openCount = countChar('\"', 1);
       
   442             break;
       
   443         case 3: // Starting a text block.
       
   444             // Check if preview feature is enabled for text blocks.
       
   445             checkSourceLevel(pos, Feature.TEXT_BLOCKS);
       
   446             // Only proceed if text block string methods are present.
       
   447             if (hasTextBlockSupport) {
       
   448                 // Indicate that the final string should have incidental indentation removed.
       
   449                 shouldStripIndent = true;
       
   450                 // Verify the open delimiter sequence.
       
   451                 boolean hasOpenEOLN = false;
       
   452                 while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) {
       
   453                     hasOpenEOLN = isEOLN();
       
   454                     if (hasOpenEOLN) {
       
   455                         break;
       
   456                     }
       
   457                     reader.scanChar();
       
   458                 }
       
   459                 // Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>.
       
   460                 if (!hasOpenEOLN) {
       
   461                     lexError(reader.bp, Errors.IllegalTextBlockOpen);
       
   462                     return;
       
   463                 }
       
   464                 // Skip line terminator.
       
   465                 int start = reader.bp;
       
   466                 if (isCRLF()) {
       
   467                     reader.scanChar();
       
   468                 }
       
   469                 reader.scanChar();
       
   470                 processLineTerminator(start, reader.bp);
       
   471             } else {
       
   472                 // No text block string methods are present, so reset and treat like string literal.
       
   473                 reader.reset(pos);
       
   474                 openCount = countChar('\"', 1);
       
   475             }
       
   476             break;
       
   477         }
       
   478         // While characters are available.
       
   479         while (reader.bp < reader.buflen) {
       
   480             // If possible close delimiter sequence.
       
   481             if (reader.ch == '\"') {
       
   482                 // Check to see if enough double quotes are present.
       
   483                 int closeCount = countChar('\"', openCount);
       
   484                 if (openCount == closeCount) {
       
   485                     // Good result.
       
   486                     tk = Tokens.TokenKind.STRINGLITERAL;
       
   487                     return;
       
   488                 }
       
   489                 // False alarm, add double quotes to string buffer.
       
   490                 reader.repeat('\"', closeCount);
       
   491             } else if (isEOLN()) {
       
   492                 // Line terminator in string literal is an error.
       
   493                 // Fall out to unclosed string literal error.
       
   494                 if (openCount == 1) {
       
   495                     break;
       
   496                 }
       
   497                  // Add line terminator to string buffer.
       
   498                 int start = reader.bp;
       
   499                 if (isCRLF()) {
       
   500                     reader.scanChar();
       
   501                 }
       
   502                 reader.putChar('\n', true);
       
   503                 processLineTerminator(start, reader.bp);
       
   504                 // Record first line terminator for error recovery.
       
   505                 if (firstEOLN == -1) {
       
   506                     firstEOLN = reader.bp;
       
   507                 }
       
   508             } else if (reader.ch == '\\') {
       
   509                 // Handle escape sequences.
       
   510                 if (hasTextBlockSupport) {
       
   511                     // Indicate that the final string should have escapes translated.
       
   512                     shouldTranslateEscapes = true;
       
   513                     // Validate escape sequence and add to string buffer.
       
   514                     scanLitCharRaw(pos);
       
   515                 } else {
       
   516                     // Translate escape sequence and add result to string buffer.
       
   517                     scanLitChar(pos);
       
   518                 }
       
   519             } else {
       
   520                 // Add character to string buffer.
       
   521                 reader.putChar(true);
       
   522             }
       
   523         }
       
   524         // String ended without close delimiter sequence.
       
   525         lexError(pos, openCount == 1 ? Errors.UnclosedStrLit : Errors.UnclosedTextBlock);
       
   526         if (firstEOLN  != -1) {
       
   527             // Reset recovery position to point after open delimiter sequence.
       
   528             reader.reset(firstEOLN);
   200         }
   529         }
   201     }
   530     }
   202 
   531 
   203     private void scanDigits(int pos, int digitRadix) {
   532     private void scanDigits(int pos, int digitRadix) {
   204         char saveCh;
   533         char saveCh;
   622                     reader.scanChar();
   951                     reader.scanChar();
   623                     if (reader.ch == '\'') {
   952                     if (reader.ch == '\'') {
   624                         lexError(pos, Errors.EmptyCharLit);
   953                         lexError(pos, Errors.EmptyCharLit);
   625                         reader.scanChar();
   954                         reader.scanChar();
   626                     } else {
   955                     } else {
   627                         if (reader.ch == CR || reader.ch == LF)
   956                         if (isEOLN())
   628                             lexError(pos, Errors.IllegalLineEndInCharLit);
   957                             lexError(pos, Errors.IllegalLineEndInCharLit);
   629                         scanLitChar(pos);
   958                         scanLitChar(pos);
   630                         if (reader.ch == '\'') {
   959                         if (reader.ch == '\'') {
   631                             reader.scanChar();
   960                             reader.scanChar();
   632                             tk = TokenKind.CHARLITERAL;
   961                             tk = TokenKind.CHARLITERAL;
   634                             lexError(pos, Errors.UnclosedCharLit);
   963                             lexError(pos, Errors.UnclosedCharLit);
   635                         }
   964                         }
   636                     }
   965                     }
   637                     break loop;
   966                     break loop;
   638                 case '\"':
   967                 case '\"':
   639                     reader.scanChar();
   968                     scanString(pos);
   640                     while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen)
       
   641                         scanLitChar(pos);
       
   642                     if (reader.ch == '\"') {
       
   643                         tk = TokenKind.STRINGLITERAL;
       
   644                         reader.scanChar();
       
   645                     } else {
       
   646                         lexError(pos, Errors.UnclosedStrLit);
       
   647                     }
       
   648                     break loop;
   969                     break loop;
   649                default:
   970                 default:
   650                     if (isSpecial(reader.ch)) {
   971                     if (isSpecial(reader.ch)) {
   651                         scanOperator();
   972                         scanOperator();
   652                     } else {
   973                     } else {
   653                         boolean isJavaIdentifierStart;
   974                         boolean isJavaIdentifierStart;
   654                         int codePoint = -1;
   975                         int codePoint = -1;
   693             }
  1014             }
   694             endPos = reader.bp;
  1015             endPos = reader.bp;
   695             switch (tk.tag) {
  1016             switch (tk.tag) {
   696                 case DEFAULT: return new Token(tk, pos, endPos, comments);
  1017                 case DEFAULT: return new Token(tk, pos, endPos, comments);
   697                 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
  1018                 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
   698                 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);
  1019                 case STRING: {
       
  1020                     // Get characters from string buffer.
       
  1021                     String string = reader.chars();
       
  1022                     // If a text block.
       
  1023                     if (shouldStripIndent) {
       
  1024                         // Verify that the incidental indentation is consistent.
       
  1025                         if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
       
  1026                             Set<TextBlockSupport.WhitespaceChecks> checks =
       
  1027                                     TextBlockSupport.checkWhitespace(string);
       
  1028                             if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) {
       
  1029                                 lexWarning(LintCategory.TEXT_BLOCKS, pos,
       
  1030                                         Warnings.InconsistentWhiteSpaceIndentation);
       
  1031                             }
       
  1032                             if (checks.contains(TextBlockSupport.WhitespaceChecks.TRAILING)) {
       
  1033                                 lexWarning(LintCategory.TEXT_BLOCKS, pos,
       
  1034                                         Warnings.TrailingWhiteSpaceWillBeRemoved);
       
  1035                             }
       
  1036                         }
       
  1037                         // Remove incidental indentation.
       
  1038                         string = TextBlockSupport.stripIndent(string);
       
  1039                     }
       
  1040                     // Translate escape sequences if present.
       
  1041                     if (shouldTranslateEscapes) {
       
  1042                         string = TextBlockSupport.translateEscapes(string);
       
  1043                     }
       
  1044                     // Build string token.
       
  1045                     return new StringToken(tk, pos, endPos, string, comments);
       
  1046                 }
   699                 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
  1047                 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
   700                 default: throw new AssertionError();
  1048                 default: throw new AssertionError();
   701             }
  1049             }
   702         }
  1050         }
   703         finally {
  1051         finally {