nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/Lexer.java
changeset 33414 2e284c36d51f
parent 32444 4c7a40aab132
child 33890 2e8c1be40a52
equal deleted inserted replaced
33373:4a0312f2894b 33414:2e284c36d51f
    44 import static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY;
    44 import static jdk.nashorn.internal.parser.TokenType.OCTAL_LEGACY;
    45 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
    45 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
    46 import static jdk.nashorn.internal.parser.TokenType.REGEX;
    46 import static jdk.nashorn.internal.parser.TokenType.REGEX;
    47 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
    47 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
    48 import static jdk.nashorn.internal.parser.TokenType.STRING;
    48 import static jdk.nashorn.internal.parser.TokenType.STRING;
       
    49 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE;
       
    50 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD;
       
    51 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE;
       
    52 import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL;
    49 import static jdk.nashorn.internal.parser.TokenType.XML;
    53 import static jdk.nashorn.internal.parser.TokenType.XML;
    50 
    54 
    51 import java.io.Serializable;
    55 import java.io.Serializable;
    52 
    56 
    53 import jdk.nashorn.internal.runtime.ECMAErrors;
    57 import jdk.nashorn.internal.runtime.ECMAErrors;
    93     /** Type of last token added. */
    97     /** Type of last token added. */
    94     private TokenType last;
    98     private TokenType last;
    95 
    99 
    96     private final boolean pauseOnFunctionBody;
   100     private final boolean pauseOnFunctionBody;
    97     private boolean pauseOnNextLeftBrace;
   101     private boolean pauseOnNextLeftBrace;
       
   102 
       
   103     private int templateExpressionOpenBraces;
    98 
   104 
    99     private static final String SPACETAB = " \t";  // ASCII space and tab
   105     private static final String SPACETAB = " \t";  // ASCII space and tab
   100     private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
   106     private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
   101 
   107 
   102     private static final String JAVASCRIPT_WHITESPACE_EOL =
   108     private static final String JAVASCRIPT_WHITESPACE_EOL =
   390     public static boolean isJSEOL(final char ch) {
   396     public static boolean isJSEOL(final char ch) {
   391         return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
   397         return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
   392     }
   398     }
   393 
   399 
   394     /**
   400     /**
   395      * Test if char is a string delimiter, e.g. '\' or '"'.  Also scans exec
   401      * Test if char is a string delimiter, e.g. '\' or '"'.
   396      * strings ('`') in scripting mode.
       
   397      * @param ch a char
   402      * @param ch a char
   398      * @return true if string delimiter
   403      * @return true if string delimiter
   399      */
   404      */
   400     protected boolean isStringDelimiter(final char ch) {
   405     protected boolean isStringDelimiter(final char ch) {
   401         return ch == '\'' || ch == '"' || (scripting && ch == '`');
   406         return ch == '\'' || ch == '"';
       
   407     }
       
   408 
       
   409     /**
       
   410      * Test if char is a template literal delimiter ('`').
       
   411      */
       
   412     private static boolean isTemplateDelimiter(char ch) {
       
   413         return ch == '`';
   402     }
   414     }
   403 
   415 
   404     /**
   416     /**
   405      * Test whether a char is valid JavaScript whitespace
   417      * Test whether a char is valid JavaScript whitespace
   406      * @param ch a char
   418      * @param ch a char
   941                 // All other characters.
   953                 // All other characters.
   942                 default:
   954                 default:
   943                     sb.append(next);
   955                     sb.append(next);
   944                     break;
   956                     break;
   945                 }
   957                 }
       
   958             } else if (ch0 == '\r') {
       
   959                 // Convert CR-LF or CR to LF line terminator.
       
   960                 sb.append('\n');
       
   961                 skip(ch1 == '\n' ? 2 : 1);
   946             } else {
   962             } else {
   947                 // Add regular character.
   963                 // Add regular character.
   948                 sb.append(ch0);
   964                 sb.append(ch0);
   949                 skip(1);
   965                 skip(1);
   950             }
   966             }
   956         return sb.toString();
   972         return sb.toString();
   957     }
   973     }
   958 
   974 
   959     /**
   975     /**
   960      * Scan over a string literal.
   976      * Scan over a string literal.
   961      * @param add true if we nare not just scanning but should actually modify the token stream
   977      * @param add true if we are not just scanning but should actually modify the token stream
   962      */
   978      */
   963     protected void scanString(final boolean add) {
   979     protected void scanString(final boolean add) {
   964         // Type of string.
   980         // Type of string.
   965         TokenType type = STRING;
   981         TokenType type = STRING;
   966         // Record starting quote.
   982         // Record starting quote.
  1029             } else {
  1045             } else {
  1030                 /// Add string token without editing.
  1046                 /// Add string token without editing.
  1031                 add(type, stringState.position, stringState.limit);
  1047                 add(type, stringState.position, stringState.limit);
  1032             }
  1048             }
  1033         }
  1049         }
       
  1050     }
       
  1051 
       
  1052     /**
       
  1053      * Scan over a template string literal.
       
  1054      */
       
  1055     private void scanTemplate() {
       
  1056         assert ch0 == '`';
       
  1057         TokenType type = TEMPLATE;
       
  1058 
       
  1059         // Skip over quote and record beginning of string content.
       
  1060         skip(1);
       
  1061         State stringState = saveState();
       
  1062 
       
  1063         // Scan until close quote
       
  1064         while (!atEOF()) {
       
  1065             // Skip over escaped character.
       
  1066             if (ch0 == '`') {
       
  1067                 skip(1);
       
  1068                 // Record end of string.
       
  1069                 stringState.setLimit(position - 1);
       
  1070                 add(type == TEMPLATE ? type : TEMPLATE_TAIL, stringState.position, stringState.limit);
       
  1071                 return;
       
  1072             } else if (ch0 == '$' && ch1 == '{') {
       
  1073                 skip(2);
       
  1074                 stringState.setLimit(position - 2);
       
  1075                 add(type == TEMPLATE ? TEMPLATE_HEAD : type, stringState.position, stringState.limit);
       
  1076 
       
  1077                 // scan to RBRACE
       
  1078                 Lexer expressionLexer = new Lexer(this, saveState());
       
  1079                 expressionLexer.templateExpressionOpenBraces = 1;
       
  1080                 expressionLexer.lexify();
       
  1081                 restoreState(expressionLexer.saveState());
       
  1082 
       
  1083                 // scan next middle or tail of the template literal
       
  1084                 assert ch0 == '}';
       
  1085                 type = TEMPLATE_MIDDLE;
       
  1086 
       
  1087                 // Skip over rbrace and record beginning of string content.
       
  1088                 skip(1);
       
  1089                 stringState = saveState();
       
  1090 
       
  1091                 continue;
       
  1092             } else if (ch0 == '\\') {
       
  1093                 skip(1);
       
  1094                 // EscapeSequence
       
  1095                 if (!isEscapeCharacter(ch0)) {
       
  1096                     error(Lexer.message("invalid.escape.char"), TEMPLATE, position, limit);
       
  1097                 }
       
  1098                 if (isEOL(ch0)) {
       
  1099                     // LineContinuation
       
  1100                     skipEOL(false);
       
  1101                     continue;
       
  1102                 }
       
  1103             }  else if (isEOL(ch0)) {
       
  1104                 // LineTerminatorSequence
       
  1105                 skipEOL(false);
       
  1106                 continue;
       
  1107             }
       
  1108 
       
  1109             // Skip literal character.
       
  1110             skip(1);
       
  1111         }
       
  1112 
       
  1113         error(Lexer.message("missing.close.quote"), TEMPLATE, position, limit);
  1034     }
  1114     }
  1035 
  1115 
  1036     /**
  1116     /**
  1037      * Is the given character a valid escape char after "\" ?
  1117      * Is the given character a valid escape char after "\" ?
  1038      *
  1118      *
  1619             if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
  1699             if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
  1620                 // '.' followed by digit.
  1700                 // '.' followed by digit.
  1621                 // Scan and add a number.
  1701                 // Scan and add a number.
  1622                 scanNumber();
  1702                 scanNumber();
  1623             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
  1703             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
       
  1704                 if (templateExpressionOpenBraces > 0) {
       
  1705                     if (type == LBRACE) {
       
  1706                         templateExpressionOpenBraces++;
       
  1707                     } else if (type == RBRACE) {
       
  1708                         if (--templateExpressionOpenBraces == 0) {
       
  1709                             break;
       
  1710                         }
       
  1711                     }
       
  1712                 }
       
  1713 
  1624                 // Get the number of characters in the token.
  1714                 // Get the number of characters in the token.
  1625                 final int typeLength = type.getLength();
  1715                 final int typeLength = type.getLength();
  1626                 // Skip that many characters.
  1716                 // Skip that many characters.
  1627                 skip(typeLength);
  1717                 skip(typeLength);
  1628                 // Add operator token.
  1718                 // Add operator token.
  1642                 // Scan and add a string.
  1732                 // Scan and add a string.
  1643                 scanString(true);
  1733                 scanString(true);
  1644             } else if (Character.isDigit(ch0)) {
  1734             } else if (Character.isDigit(ch0)) {
  1645                 // Scan and add a number.
  1735                 // Scan and add a number.
  1646                 scanNumber();
  1736                 scanNumber();
       
  1737             } else if (isTemplateDelimiter(ch0) && es6) {
       
  1738                 // Scan and add template in ES6 mode.
       
  1739                 scanTemplate();
       
  1740             } else if (isTemplateDelimiter(ch0) && scripting) {
       
  1741                 // Scan and add an exec string ('`') in scripting mode.
       
  1742                 scanString(true);
  1647             } else {
  1743             } else {
  1648                 // Don't recognize this character.
  1744                 // Don't recognize this character.
  1649                 skip(1);
  1745                 skip(1);
  1650                 add(ERROR, position - 1);
  1746                 add(ERROR, position - 1);
  1651             }
  1747             }
  1697             return valueOfString(start, len, strict); // String
  1793             return valueOfString(start, len, strict); // String
  1698         case IDENT:
  1794         case IDENT:
  1699             return valueOfIdent(start, len); // String
  1795             return valueOfIdent(start, len); // String
  1700         case REGEX:
  1796         case REGEX:
  1701             return valueOfPattern(start, len); // RegexToken::LexerToken
  1797             return valueOfPattern(start, len); // RegexToken::LexerToken
       
  1798         case TEMPLATE:
       
  1799         case TEMPLATE_HEAD:
       
  1800         case TEMPLATE_MIDDLE:
       
  1801         case TEMPLATE_TAIL:
       
  1802             return valueOfString(start, len, true); // String
  1702         case XML:
  1803         case XML:
  1703             return valueOfXML(start, len); // XMLToken::LexerToken
  1804             return valueOfXML(start, len); // XMLToken::LexerToken
  1704         case DIRECTIVE_COMMENT:
  1805         case DIRECTIVE_COMMENT:
  1705             return source.getString(start, len);
  1806             return source.getString(start, len);
  1706         default:
  1807         default:
  1707             break;
  1808             break;
  1708         }
  1809         }
  1709 
  1810 
  1710         return null;
  1811         return null;
       
  1812     }
       
  1813 
       
  1814     /**
       
  1815      * Get the raw string value of a template literal string part.
       
  1816      *
       
  1817      * @param token template string token
       
  1818      * @return raw string
       
  1819      */
       
  1820     public String valueOfRawString(final long token) {
       
  1821         final int start  = Token.descPosition(token);
       
  1822         final int length = Token.descLength(token);
       
  1823 
       
  1824         // Save the current position.
       
  1825         final int savePosition = position;
       
  1826         // Calculate the end position.
       
  1827         final int end = start + length;
       
  1828         // Reset to beginning of string.
       
  1829         reset(start);
       
  1830 
       
  1831         // Buffer for recording characters.
       
  1832         final StringBuilder sb = new StringBuilder(length);
       
  1833 
       
  1834         // Scan until end of string.
       
  1835         while (position < end) {
       
  1836             if (ch0 == '\r') {
       
  1837                 // Convert CR-LF or CR to LF line terminator.
       
  1838                 sb.append('\n');
       
  1839                 skip(ch1 == '\n' ? 2 : 1);
       
  1840             } else {
       
  1841                 // Add regular character.
       
  1842                 sb.append(ch0);
       
  1843                 skip(1);
       
  1844             }
       
  1845         }
       
  1846 
       
  1847         // Restore position.
       
  1848         reset(savePosition);
       
  1849 
       
  1850         return sb.toString();
  1711     }
  1851     }
  1712 
  1852 
  1713     /**
  1853     /**
  1714      * Get the correctly localized error message for a given message id format arguments
  1854      * Get the correctly localized error message for a given message id format arguments
  1715      * @param msgId message id
  1855      * @param msgId message id