8134941: Implement ES6 template literal support
authormhaupt
Wed, 28 Oct 2015 10:54:05 +0100
changeset 33414 2e284c36d51f
parent 33373 4a0312f2894b
child 33415 d4277cc1cb17
8134941: Implement ES6 template literal support Reviewed-by: attila, hannesw Contributed-by: andreas.woess@oracle.com
nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/ir/RuntimeNode.java
nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/Lexer.java
nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/Parser.java
nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/Token.java
nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/TokenType.java
nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/ScriptRuntime.java
nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/resources/Messages.properties
--- a/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/ir/RuntimeNode.java	Wed Jul 05 20:56:54 2017 +0200
+++ b/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/ir/RuntimeNode.java	Wed Oct 28 10:54:05 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -83,7 +83,9 @@
         /** is undefined */
         IS_UNDEFINED(TokenType.EQ_STRICT, Type.BOOLEAN, 2),
         /** is not undefined */
-        IS_NOT_UNDEFINED(TokenType.NE_STRICT, Type.BOOLEAN, 2);
+        IS_NOT_UNDEFINED(TokenType.NE_STRICT, Type.BOOLEAN, 2),
+        /** Get template object from raw and cooked string arrays. */
+        GET_TEMPLATE_OBJECT(TokenType.TEMPLATE, Type.SCRIPT_OBJECT, 2);
 
         /** token type */
         private final TokenType tokenType;
--- a/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/Lexer.java	Wed Jul 05 20:56:54 2017 +0200
+++ b/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/Lexer.java	Wed Oct 28 10:54:05 2015 +0100
@@ -46,6 +46,10 @@
 import static jdk.nashorn.internal.parser.TokenType.REGEX;
 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
 import static jdk.nashorn.internal.parser.TokenType.STRING;
+import static jdk.nashorn.internal.parser.TokenType.TEMPLATE;
+import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD;
+import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE;
+import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL;
 import static jdk.nashorn.internal.parser.TokenType.XML;
 
 import java.io.Serializable;
@@ -96,6 +100,8 @@
     private final boolean pauseOnFunctionBody;
     private boolean pauseOnNextLeftBrace;
 
+    private int templateExpressionOpenBraces;
+
     private static final String SPACETAB = " \t";  // ASCII space and tab
     private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
 
@@ -392,13 +398,19 @@
     }
 
     /**
-     * Test if char is a string delimiter, e.g. '\' or '"'.  Also scans exec
-     * strings ('`') in scripting mode.
+     * Test if char is a string delimiter, e.g. '\' or '"'.
      * @param ch a char
      * @return true if string delimiter
      */
     protected boolean isStringDelimiter(final char ch) {
-        return ch == '\'' || ch == '"' || (scripting && ch == '`');
+        return ch == '\'' || ch == '"';
+    }
+
+    /**
+     * Test if char is a template literal delimiter ('`').
+     */
+    private static boolean isTemplateDelimiter(char ch) {
+        return ch == '`';
     }
 
     /**
@@ -943,6 +955,10 @@
                     sb.append(next);
                     break;
                 }
+            } else if (ch0 == '\r') {
+                // Convert CR-LF or CR to LF line terminator.
+                sb.append('\n');
+                skip(ch1 == '\n' ? 2 : 1);
             } else {
                 // Add regular character.
                 sb.append(ch0);
@@ -958,7 +974,7 @@
 
     /**
      * Scan over a string literal.
-     * @param add true if we nare not just scanning but should actually modify the token stream
+     * @param add true if we are not just scanning but should actually modify the token stream
      */
     protected void scanString(final boolean add) {
         // Type of string.
@@ -1034,6 +1050,70 @@
     }
 
     /**
+     * Scan over a template string literal.
+     */
+    private void scanTemplate() {
+        assert ch0 == '`';
+        TokenType type = TEMPLATE;
+
+        // Skip over quote and record beginning of string content.
+        skip(1);
+        State stringState = saveState();
+
+        // Scan until close quote
+        while (!atEOF()) {
+            // Skip over escaped character.
+            if (ch0 == '`') {
+                skip(1);
+                // Record end of string.
+                stringState.setLimit(position - 1);
+                add(type == TEMPLATE ? type : TEMPLATE_TAIL, stringState.position, stringState.limit);
+                return;
+            } else if (ch0 == '$' && ch1 == '{') {
+                skip(2);
+                stringState.setLimit(position - 2);
+                add(type == TEMPLATE ? TEMPLATE_HEAD : type, stringState.position, stringState.limit);
+
+                // scan to RBRACE
+                Lexer expressionLexer = new Lexer(this, saveState());
+                expressionLexer.templateExpressionOpenBraces = 1;
+                expressionLexer.lexify();
+                restoreState(expressionLexer.saveState());
+
+                // scan next middle or tail of the template literal
+                assert ch0 == '}';
+                type = TEMPLATE_MIDDLE;
+
+                // Skip over rbrace and record beginning of string content.
+                skip(1);
+                stringState = saveState();
+
+                continue;
+            } else if (ch0 == '\\') {
+                skip(1);
+                // EscapeSequence
+                if (!isEscapeCharacter(ch0)) {
+                    error(Lexer.message("invalid.escape.char"), TEMPLATE, position, limit);
+                }
+                if (isEOL(ch0)) {
+                    // LineContinuation
+                    skipEOL(false);
+                    continue;
+                }
+            }  else if (isEOL(ch0)) {
+                // LineTerminatorSequence
+                skipEOL(false);
+                continue;
+            }
+
+            // Skip literal character.
+            skip(1);
+        }
+
+        error(Lexer.message("missing.close.quote"), TEMPLATE, position, limit);
+    }
+
+    /**
      * Is the given character a valid escape char after "\" ?
      *
      * @param ch character to be checked
@@ -1621,6 +1701,16 @@
                 // Scan and add a number.
                 scanNumber();
             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
+                if (templateExpressionOpenBraces > 0) {
+                    if (type == LBRACE) {
+                        templateExpressionOpenBraces++;
+                    } else if (type == RBRACE) {
+                        if (--templateExpressionOpenBraces == 0) {
+                            break;
+                        }
+                    }
+                }
+
                 // Get the number of characters in the token.
                 final int typeLength = type.getLength();
                 // Skip that many characters.
@@ -1644,6 +1734,12 @@
             } else if (Character.isDigit(ch0)) {
                 // Scan and add a number.
                 scanNumber();
+            } else if (isTemplateDelimiter(ch0) && es6) {
+                // Scan and add template in ES6 mode.
+                scanTemplate();
+            } else if (isTemplateDelimiter(ch0) && scripting) {
+                // Scan and add an exec string ('`') in scripting mode.
+                scanString(true);
             } else {
                 // Don't recognize this character.
                 skip(1);
@@ -1699,6 +1795,11 @@
             return valueOfIdent(start, len); // String
         case REGEX:
             return valueOfPattern(start, len); // RegexToken::LexerToken
+        case TEMPLATE:
+        case TEMPLATE_HEAD:
+        case TEMPLATE_MIDDLE:
+        case TEMPLATE_TAIL:
+            return valueOfString(start, len, true); // String
         case XML:
             return valueOfXML(start, len); // XMLToken::LexerToken
         case DIRECTIVE_COMMENT:
@@ -1711,6 +1812,45 @@
     }
 
     /**
+     * Get the raw string value of a template literal string part.
+     *
+     * @param token template string token
+     * @return raw string
+     */
+    public String valueOfRawString(final long token) {
+        final int start  = Token.descPosition(token);
+        final int length = Token.descLength(token);
+
+        // Save the current position.
+        final int savePosition = position;
+        // Calculate the end position.
+        final int end = start + length;
+        // Reset to beginning of string.
+        reset(start);
+
+        // Buffer for recording characters.
+        final StringBuilder sb = new StringBuilder(length);
+
+        // Scan until end of string.
+        while (position < end) {
+            if (ch0 == '\r') {
+                // Convert CR-LF or CR to LF line terminator.
+                sb.append('\n');
+                skip(ch1 == '\n' ? 2 : 1);
+            } else {
+                // Add regular character.
+                sb.append(ch0);
+                skip(1);
+            }
+        }
+
+        // Restore position.
+        reset(savePosition);
+
+        return sb.toString();
+    }
+
+    /**
      * Get the correctly localized error message for a given message id format arguments
      * @param msgId message id
      * @param args  format arguments
--- a/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/Parser.java	Wed Jul 05 20:56:54 2017 +0200
+++ b/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/Parser.java	Wed Oct 28 10:54:05 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,6 +51,10 @@
 import static jdk.nashorn.internal.parser.TokenType.RBRACKET;
 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
 import static jdk.nashorn.internal.parser.TokenType.SEMICOLON;
+import static jdk.nashorn.internal.parser.TokenType.TEMPLATE;
+import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_HEAD;
+import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_MIDDLE;
+import static jdk.nashorn.internal.parser.TokenType.TEMPLATE_TAIL;
 import static jdk.nashorn.internal.parser.TokenType.TERNARY;
 import static jdk.nashorn.internal.parser.TokenType.WHILE;
 
@@ -64,6 +68,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+
 import jdk.internal.dynalink.support.NameCodec;
 import jdk.nashorn.internal.codegen.CompilerConstants;
 import jdk.nashorn.internal.codegen.Namespace;
@@ -1926,10 +1931,10 @@
      *      Literal
      *      ArrayLiteral
      *      ObjectLiteral
+     *      RegularExpressionLiteral
+     *      TemplateLiteral
      *      ( Expression )
      *
-     *  See 11.1
-     *
      * Parse primary expression.
      * @return Expression node.
      */
@@ -1989,6 +1994,9 @@
             expect(RPAREN);
 
             return expression;
+        case TEMPLATE:
+        case TEMPLATE_HEAD:
+            return templateLiteral();
 
         default:
             // In this context some operator tokens mark the start of a literal.
@@ -2387,6 +2395,8 @@
     }
 
     /**
+     * Parse left hand side expression.
+     *
      * LeftHandSideExpression :
      *      NewExpression
      *      CallExpression
@@ -2396,10 +2406,8 @@
      *      CallExpression Arguments
      *      CallExpression [ Expression ]
      *      CallExpression . IdentifierName
+     *      CallExpression TemplateLiteral
      *
-     * See 11.2
-     *
-     * Parse left hand side expression.
      * @return Expression node.
      */
     private Expression leftHandSideExpression() {
@@ -2426,7 +2434,7 @@
             callToken = token;
 
             switch (type) {
-            case LPAREN:
+            case LPAREN: {
                 // Get NEW or FUNCTION arguments.
                 final List<Expression> arguments = optimizeList(argumentList());
 
@@ -2434,8 +2442,8 @@
                 lhs = new CallNode(callLine, callToken, finish, lhs, arguments, false);
 
                 break;
-
-            case LBRACKET:
+            }
+            case LBRACKET: {
                 next();
 
                 // Get array index.
@@ -2447,8 +2455,8 @@
                 lhs = new IndexNode(callToken, finish, lhs, rhs);
 
                 break;
-
-            case PERIOD:
+            }
+            case PERIOD: {
                 next();
 
                 final IdentNode property = getIdentifierName();
@@ -2457,7 +2465,16 @@
                 lhs = new AccessNode(callToken, finish, lhs, property.getName());
 
                 break;
-
+            }
+            case TEMPLATE:
+            case TEMPLATE_HEAD: {
+                // tagged template literal
+                final List<Expression> arguments = templateLiteralArgumentList();
+
+                lhs = new CallNode(callLine, callToken, finish, lhs, arguments, false);
+
+                break;
+            }
             default:
                 break loop;
             }
@@ -2516,16 +2533,16 @@
     }
 
     /**
+     * Parse member expression.
+     *
      * MemberExpression :
      *      PrimaryExpression
      *      FunctionExpression
      *      MemberExpression [ Expression ]
      *      MemberExpression . IdentifierName
+     *      MemberExpression TemplateLiteral
      *      new MemberExpression Arguments
      *
-     * See 11.2
-     *
-     * Parse member expression.
      * @return Expression node.
      */
     private Expression memberExpression() {
@@ -2582,6 +2599,16 @@
 
                 break;
             }
+            case TEMPLATE:
+            case TEMPLATE_HEAD: {
+                // tagged template literal
+                final int callLine = line;
+                final List<Expression> arguments = templateLiteralArgumentList();
+
+                lhs = new CallNode(callLine, callToken, finish, lhs, arguments, false);
+
+                break;
+            }
             default:
                 break loop;
             }
@@ -3035,6 +3062,20 @@
         final ParserState parserState = (ParserState)data.getEndParserState();
         assert parserState != null;
 
+        if (k < stream.last() && start < parserState.position && parserState.position <= Token.descPosition(stream.get(stream.last()))) {
+            // RBRACE is already in the token stream, so fast forward to it
+            for (; k < stream.last(); k++) {
+                long nextToken = stream.get(k + 1);
+                if (Token.descPosition(nextToken) == parserState.position && Token.descType(nextToken) == RBRACE) {
+                    token = stream.get(k);
+                    type = Token.descType(token);
+                    next();
+                    assert type == RBRACE && start == parserState.position;
+                    return true;
+                }
+            }
+        }
+
         stream.reset();
         lexer = parserState.createLexer(source, lexer, stream, scripting && !env._no_syntax_extensions, env._es6);
         line = parserState.line;
@@ -3425,6 +3466,79 @@
         }
     }
 
+    /**
+     * Parse untagged template literal as string concatenation.
+     */
+    private Expression templateLiteral() {
+        assert type == TEMPLATE || type == TEMPLATE_HEAD;
+        final boolean noSubstitutionTemplate = type == TEMPLATE;
+        long lastLiteralToken = token;
+        LiteralNode<?> literal = getLiteral();
+        if (noSubstitutionTemplate) {
+            return literal;
+        }
+
+        Expression concat = literal;
+        TokenType lastLiteralType;
+        do {
+            Expression expression = expression();
+            if (type != TEMPLATE_MIDDLE && type != TEMPLATE_TAIL) {
+                throw error(AbstractParser.message("unterminated.template.expression"), token);
+            }
+            concat = new BinaryNode(Token.recast(lastLiteralToken, TokenType.ADD), concat, expression);
+            lastLiteralType = type;
+            lastLiteralToken = token;
+            literal = getLiteral();
+            concat = new BinaryNode(Token.recast(lastLiteralToken, TokenType.ADD), concat, literal);
+        } while (lastLiteralType == TEMPLATE_MIDDLE);
+        return concat;
+    }
+
+    /**
+     * Parse tagged template literal as argument list.
+     * @return argument list for a tag function call (template object, ...substitutions)
+     */
+    private List<Expression> templateLiteralArgumentList() {
+        assert type == TEMPLATE || type == TEMPLATE_HEAD;
+        final ArrayList<Expression> argumentList = new ArrayList<>();
+        final ArrayList<Expression> rawStrings = new ArrayList<>();
+        final ArrayList<Expression> cookedStrings = new ArrayList<>();
+        argumentList.add(null); // filled at the end
+
+        final long templateToken = token;
+        final boolean hasSubstitutions = type == TEMPLATE_HEAD;
+        addTemplateLiteralString(rawStrings, cookedStrings);
+
+        if (hasSubstitutions) {
+            TokenType lastLiteralType;
+            do {
+                Expression expression = expression();
+                if (type != TEMPLATE_MIDDLE && type != TEMPLATE_TAIL) {
+                    throw error(AbstractParser.message("unterminated.template.expression"), token);
+                }
+                argumentList.add(expression);
+
+                lastLiteralType = type;
+                addTemplateLiteralString(rawStrings, cookedStrings);
+            } while (lastLiteralType == TEMPLATE_MIDDLE);
+        }
+
+        final LiteralNode<Expression[]> rawStringArray = LiteralNode.newInstance(templateToken, finish, rawStrings);
+        final LiteralNode<Expression[]> cookedStringArray = LiteralNode.newInstance(templateToken, finish, cookedStrings);
+        final RuntimeNode templateObject = new RuntimeNode(templateToken, finish, RuntimeNode.Request.GET_TEMPLATE_OBJECT, rawStringArray, cookedStringArray);
+        argumentList.set(0, templateObject);
+        return optimizeList(argumentList);
+    }
+
+    private void addTemplateLiteralString(final ArrayList<Expression> rawStrings, final ArrayList<Expression> cookedStrings) {
+        final long stringToken = token;
+        final String rawString = lexer.valueOfRawString(stringToken);
+        final String cookedString = (String) getValue();
+        next();
+        rawStrings.add(LiteralNode.newInstance(stringToken, finish, rawString));
+        cookedStrings.add(LiteralNode.newInstance(stringToken, finish, cookedString));
+    }
+
     @Override
     public String toString() {
         return "'JavaScript Parsing'";
--- a/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/Token.java	Wed Jul 05 20:56:54 2017 +0200
+++ b/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/Token.java	Wed Oct 28 10:54:05 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -71,11 +71,21 @@
     public static long withDelimiter(final long token) {
         final TokenType tokenType = Token.descType(token);
         switch(tokenType) {
-            case STRING: case ESCSTRING: case EXECSTRING: {
+            case STRING:
+            case ESCSTRING:
+            case EXECSTRING:
+            case TEMPLATE:
+            case TEMPLATE_TAIL: {
                 final int start = Token.descPosition(token) - 1;
                 final int len = Token.descLength(token) + 2;
                 return toDesc(tokenType, start, len);
             }
+            case TEMPLATE_HEAD:
+            case TEMPLATE_MIDDLE: {
+                final int start = Token.descPosition(token) - 1;
+                final int len = Token.descLength(token) + 3;
+                return toDesc(tokenType, start, len);
+            }
             default: {
                 return token;
             }
--- a/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/TokenType.java	Wed Jul 05 20:56:54 2017 +0200
+++ b/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/parser/TokenType.java	Wed Oct 28 10:54:05 2015 +0100
@@ -183,6 +183,10 @@
     XML            (LITERAL,  null),
     OBJECT         (LITERAL,  null),
     ARRAY          (LITERAL,  null),
+    TEMPLATE       (LITERAL,  null),
+    TEMPLATE_HEAD  (LITERAL,  null),
+    TEMPLATE_MIDDLE(LITERAL,  null),
+    TEMPLATE_TAIL  (LITERAL,  null),
 
     COMMALEFT      (IR,       null),
     DECPOSTFIX     (IR,       null),
--- a/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/ScriptRuntime.java	Wed Jul 05 20:56:54 2017 +0200
+++ b/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/ScriptRuntime.java	Wed Oct 28 10:54:05 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -45,6 +45,7 @@
 import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Objects;
+
 import jdk.internal.dynalink.beans.StaticClass;
 import jdk.nashorn.api.scripting.JSObject;
 import jdk.nashorn.api.scripting.ScriptObjectMirror;
@@ -1050,4 +1051,20 @@
         context.getLogger(ApplySpecialization.class).info("Overwrote special name '" + name +"' - invalidating switchpoint");
         SwitchPoint.invalidateAll(new SwitchPoint[] { sp });
     }
+
+    /**
+     * ES6 12.2.9.3 Runtime Semantics: GetTemplateObject(templateLiteral).
+     *
+     * @param rawStrings array of template raw values
+     * @param cookedStrings array of template values
+     * @return template object
+     */
+    public static ScriptObject GET_TEMPLATE_OBJECT(final Object rawStrings, final Object cookedStrings) {
+        final ScriptObject template = (ScriptObject)cookedStrings;
+        final ScriptObject rawObj = (ScriptObject)rawStrings;
+        assert rawObj.getArray().length() == template.getArray().length();
+        template.addOwnProperty("raw", Property.NOT_WRITABLE | Property.NOT_ENUMERABLE | Property.NOT_CONFIGURABLE, rawObj.freeze());
+        template.freeze();
+        return template;
+    }
 }
--- a/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/resources/Messages.properties	Wed Jul 05 20:56:54 2017 +0200
+++ b/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/resources/Messages.properties	Wed Oct 28 10:54:05 2015 +0100
@@ -60,6 +60,7 @@
 parser.error.regex.syntax={0}
 parser.error.trailing.comma.in.json=Trailing comma is not allowed in JSON
 parser.error.missing.const.assignment=Missing assignment to constant "{0}"
+parser.error.unterminated.template.expression=Expected } after expression in template literal
 
 # strict mode error messages
 parser.error.strict.no.with="with" statement cannot be used in strict mode