8206981: Compiler support for Raw String Literals
authorjlaskey
Wed, 12 Sep 2018 14:19:36 -0300
changeset 51713 d424675a9743
parent 51712 f0f5d23449d3
child 51714 975d3636a2f9
8206981: Compiler support for Raw String Literals Reviewed-by: mcimadamore, briangoetz, abuckley, jjg, vromero, jlahoda
src/jdk.compiler/share/classes/com/sun/tools/javac/code/Preview.java
src/jdk.compiler/share/classes/com/sun/tools/javac/code/Source.java
src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java
src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java
src/jdk.compiler/share/classes/com/sun/tools/javac/resources/compiler.properties
test/langtools/tools/javac/RawStringLiteralLang.java
test/langtools/tools/javac/RawStringLiteralLangAPI.java
test/langtools/tools/javac/diags/examples/IllegalChar.java
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/code/Preview.java	Wed Sep 12 08:46:25 2018 -0700
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/code/Preview.java	Wed Sep 12 14:19:36 2018 -0300
@@ -167,7 +167,8 @@
     public boolean isPreview(Feature feature) {
         if (feature == Feature.SWITCH_EXPRESSION ||
             feature == Feature.SWITCH_MULTIPLE_CASE_LABELS ||
-            feature == Feature.SWITCH_RULE)
+            feature == Feature.SWITCH_RULE ||
+            feature == Feature.RAW_STRING_LITERALS)
             return true;
         //Note: this is a backdoor which allows to optionally treat all features as 'preview' (for testing).
         //When real preview features will be added, this method can be implemented to return 'true'
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/code/Source.java	Wed Sep 12 08:46:25 2018 -0700
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/code/Source.java	Wed Sep 12 14:19:36 2018 -0300
@@ -183,7 +183,8 @@
         IMPORT_ON_DEMAND_OBSERVABLE_PACKAGES(JDK1_2, JDK8),
         SWITCH_MULTIPLE_CASE_LABELS(JDK12, Fragments.FeatureMultipleCaseLabels, DiagKind.PLURAL),
         SWITCH_RULE(JDK12, Fragments.FeatureSwitchRules, DiagKind.PLURAL),
-        SWITCH_EXPRESSION(JDK12, Fragments.FeatureSwitchExpressions, DiagKind.PLURAL);
+        SWITCH_EXPRESSION(JDK12, Fragments.FeatureSwitchExpressions, DiagKind.PLURAL),
+        RAW_STRING_LITERALS(JDK12, Fragments.FeatureRawStringLiterals, DiagKind.PLURAL);
 
         enum DiagKind {
             NORMAL,
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java	Wed Sep 12 08:46:25 2018 -0700
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java	Wed Sep 12 14:19:36 2018 -0300
@@ -646,6 +646,59 @@
                         lexError(pos, Errors.UnclosedStrLit);
                     }
                     break loop;
+                case '`':
+                    checkSourceLevel(pos, Feature.RAW_STRING_LITERALS);
+                    // Ensure that the backtick was not a Unicode escape sequence
+                    if (reader.peekBack() != '`') {
+                        reader.scanChar();
+                        lexError(pos, Errors.UnicodeBacktick);
+                        break loop;
+                    }
+                    // Turn off unicode processsing and save previous state
+                    boolean oldState = reader.setUnicodeConversion(false);
+                    // Count the number of backticks in the open quote sequence
+                    int openCount = reader.skipRepeats();
+                    // Skip last backtick
+                    reader.scanChar();
+                    while (reader.bp < reader.buflen) {
+                        // If potential close quote sequence
+                        if (reader.ch == '`') {
+                            // Count number of backticks in sequence
+                            int closeCount = reader.skipRepeats();
+                            // If the counts match we can exit the raw string literal
+                            if (openCount == closeCount) {
+                                break;
+                            }
+                            // Emit non-close backtick sequence
+                            for (int i = 0; i <= closeCount; i++) {
+                                reader.putChar('`', false);
+                            }
+                            // Skip last backtick
+                            reader.scanChar();
+                        } else if (reader.ch == LF) {
+                            reader.putChar(true);
+                            processLineTerminator(pos, reader.bp);
+                        } else if (reader.ch == CR) {
+                            if (reader.peekChar() == LF) {
+                                reader.scanChar();
+                            }
+                            // Translate CR and CRLF sequences to LF
+                            reader.putChar('\n', true);
+                            processLineTerminator(pos, reader.bp);
+                        } else {
+                            reader.putChar(true);
+                        }
+                    }
+                    // Restore unicode processsing
+                    reader.setUnicodeConversion(oldState);
+                    // Ensure the close quote was encountered
+                    if (reader.bp == reader.buflen) {
+                        lexError(pos, Errors.UnclosedStrLit);
+                    } else {
+                        tk = TokenKind.STRINGLITERAL;
+                        reader.scanChar();
+                    }
+                    break loop;
                 default:
                     if (isSpecial(reader.ch)) {
                         scanOperator();
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java	Wed Sep 12 08:46:25 2018 -0700
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java	Wed Sep 12 14:19:36 2018 -0300
@@ -64,6 +64,10 @@
      */
     protected int unicodeConversionBp = -1;
 
+    /** Control conversion of unicode characters
+     */
+    protected boolean unicodeConversion = true;
+
     protected Log log;
     protected Names names;
 
@@ -154,11 +158,17 @@
         return new String(sbuf, 0, sp);
     }
 
+    protected boolean setUnicodeConversion(boolean newState) {
+        boolean oldState = unicodeConversion;
+        unicodeConversion = newState;
+        return oldState;
+    }
+
     /** Convert unicode escape; bp points to initial '\' character
      *  (Spec 3.3).
      */
     protected void convertUnicode() {
-        if (ch == '\\' && unicodeConversionBp != bp) {
+        if (ch == '\\' && unicodeConversion && unicodeConversionBp != bp ) {
             bp++; ch = buf[bp];
             if (ch == 'u') {
                 do {
@@ -254,6 +264,24 @@
         return buf[bp + 1];
     }
 
+    protected char peekBack() {
+        return buf[bp];
+    }
+
+    /**
+     * Skips consecutive occurrences of the current character, leaving bp positioned
+     * at the last occurrence. Returns the occurrence count.
+     */
+    protected int skipRepeats() {
+        int start = bp;
+        while (bp < buflen) {
+            if (buf[bp] != buf[bp + 1])
+                break;
+            bp++;
+        }
+        return bp - start;
+    }
+
     /**
      * Returns a copy of the input buffer, up to its inputLength.
      * Unicode escape sequences are not translated.
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/resources/compiler.properties	Wed Sep 12 08:46:25 2018 -0700
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/resources/compiler.properties	Wed Sep 12 14:19:36 2018 -0300
@@ -1232,6 +1232,9 @@
 compiler.err.unclosed.str.lit=\
     unclosed string literal
 
+compiler.err.unicode.backtick=\
+    attempt to use \\u0060 as a raw string literal delimiter
+
 # 0: string
 compiler.err.unsupported.encoding=\
     unsupported encoding: {0}
@@ -2826,6 +2829,9 @@
 compiler.misc.feature.switch.expressions=\
     switch expressions
 
+compiler.misc.feature.raw.string.literals=\
+    raw string literals
+
 compiler.warn.underscore.as.identifier=\
     as of release 9, ''_'' is a keyword, and may not be used as an identifier
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/langtools/tools/javac/RawStringLiteralLang.java	Wed Sep 12 14:19:36 2018 -0300
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Unit tests for Raw String Literal language changes
+ * @compile --enable-preview -source 12 -encoding utf8 RawStringLiteralLang.java
+ * @run main/othervm --enable-preview RawStringLiteralLang
+ */
+
+public class RawStringLiteralLang {
+    public static void main(String... args) {
+        test1();
+        test2();
+    }
+
+    /*
+     * Test raw string functionality.
+     */
+    static void test1() {
+        EQ(`abc`, "abc");
+        EQ(`can't`, "can\'t");
+        EQ(``can`t``, "can`t");
+        EQ(`can\\'t`, "can\\\\'t");
+        EQ(``can\\`t``, "can\\\\`t");
+        EQ(`\t`, "\\t");
+        EQ(`•`, "\u2022");
+
+        LENGTH("abc``def", 8);
+        EQ("abc`\u0020`def", "abc` `def");
+    }
+
+    /*
+     * Test multi-line string functionality.
+     */
+    static void test2() {
+        EQ(`abc
+def
+ghi`, "abc\ndef\nghi");
+        EQ(`abc
+def
+ghi
+`, "abc\ndef\nghi\n");
+        EQ(`
+abc
+def
+ghi`, "\nabc\ndef\nghi");
+        EQ(`
+abc
+def
+ghi
+`, "\nabc\ndef\nghi\n");
+    }
+
+    /*
+     * Raise an exception if the string is not the expected length.
+     */
+    static void LENGTH(String rawString, int length) {
+        if (rawString == null || rawString.length() != length) {
+            System.err.println("Failed LENGTH");
+            System.err.println(rawString + " " + length);
+            throw new RuntimeException("Failed LENGTH");
+        }
+    }
+
+    /*
+     * Raise an exception if the two input strings are not equal.
+     */
+    static void EQ(String input, String expected) {
+        if (input == null || expected == null || !expected.equals(input)) {
+            System.err.println("Failed EQ");
+            System.err.println();
+            System.err.println("Input:");
+            System.err.println(input.replaceAll(" ", "."));
+            System.err.println();
+            System.err.println("Expected:");
+            System.err.println(expected.replaceAll(" ", "."));
+            throw new RuntimeException();
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/langtools/tools/javac/RawStringLiteralLangAPI.java	Wed Sep 12 14:19:36 2018 -0300
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Unit tests for Raw String Literal language changes
+ * @library /tools/lib
+ * @modules jdk.compiler/com.sun.tools.javac.api
+ *          jdk.compiler/com.sun.tools.javac.main
+ * @build toolbox.ToolBox toolbox.JavacTask
+ * @run main RawStringLiteralLangAPI
+ */
+
+import toolbox.JavacTask;
+import toolbox.JavaTask;
+import toolbox.Task;
+import toolbox.ToolBox;
+
+public class RawStringLiteralLangAPI {
+    private static ToolBox TOOLBOX = new ToolBox();
+
+    public static void main(String... args) {
+        test1();
+        test2();
+        test3();
+        test4();
+    }
+
+    /*
+     * Check that correct/incorrect syntax is properly detected
+     */
+    static void test1() {
+        int[] n = new int[] { 1, 2, 3, 4, 5, 10, 16, 32, 1000, 10000 };
+        String[] s = new String[] { "a", "ab", "abc", "\u2022", "*".repeat(1000), "*".repeat(10000) };
+        for (int i : n) {
+        for (int j : n) {
+        for (int k : n) {
+        for (String a : s)  {
+        for (String b : s) {
+            String code =
+                    "public class RawStringLiteralTest {\n" +
+                            "    public static void main(String... args) {\n" +
+                            "        String xxx = " +
+                            "`".repeat(i) + a + "`".repeat(j) + b + "`".repeat(k) + ";\n" +
+                            "    }\n" +
+                            "}\n";
+            if (i == k && j != i) {
+                compPass(code);
+            } else {
+                compFail(code);
+            }
+        }}}}}
+    }
+
+    /*
+     * Check that misuse of \u0060 is properly detected
+     */
+    static void test2() {
+        compFail("public class BadDelimiter {\n" +
+                "    public static void main(String... args) {\n" +
+                "        String xxx = \\u0060`abc`;\n" +
+                "    }\n" +
+                "}\n");
+    }
+
+    /*
+     * Check edge cases of raw string literal as last token
+     */
+    static void test3() {
+        compFail("public class RawStringLiteralTest {\n" +
+                "    public static void main(String... args) {\n" +
+                "        String xxx = `abc`");
+        compFail("public class RawStringLiteralTest {\n" +
+                "    public static void main(String... args) {\n" +
+                "        String xxx = `abc");
+        compFail("public class RawStringLiteralTest {\n" +
+                "    public static void main(String... args) {\n" +
+                "        String xxx = `abc\u0000");
+    }
+
+
+    /*
+     * Check line terminator translation
+     */
+    static void test4() {
+        String[] terminators = new String[] { "\n", "\r\n", "\r" };
+        for (String terminator : terminators) {
+            String code = "public class LineTerminatorTest {" + terminator +
+                          "    public static void main(String... args) {" + terminator +
+                          "        String s =" + terminator +
+                          "`" + terminator +
+                          "abc" + terminator +
+                          "`;" + terminator +
+                          "        System.out.println(s.equals(\"\\nabc\\n\"));" + terminator +
+                          "    }" + terminator +
+                          "}" + terminator;
+            new JavacTask(TOOLBOX)
+                    .sources(code)
+                    .classpath(".")
+                    .options("--enable-preview", "-source", "12")
+                    .run();
+            String output = new JavaTask(TOOLBOX)
+                    .vmOptions("--enable-preview")
+                    .classpath(".")
+                    .classArgs("LineTerminatorTest")
+                    .run()
+                    .writeAll()
+                    .getOutput(Task.OutputKind.STDOUT);
+
+            if (!output.contains("true")) {
+                throw new RuntimeException("Error detected");
+            }
+        }
+    }
+
+    /*
+     * Test source for successful compile.
+     */
+    static void compPass(String source) {
+        String output = new JavacTask(TOOLBOX)
+                .sources(source)
+                .classpath(".")
+                .options("--enable-preview", "-source", "12", "-encoding", "utf8")
+                .run()
+                .writeAll()
+                .getOutput(Task.OutputKind.DIRECT);
+
+        if (output.contains("compiler.err")) {
+            throw new RuntimeException("Error detected");
+        }
+    }
+
+    /*
+     * Test source for unsuccessful compile and specific error.
+     */
+    static void compFail(String source)  {
+        String errors = new JavacTask(TOOLBOX)
+                .sources(source)
+                .classpath(".")
+                .options("-XDrawDiagnostics", "--enable-preview", "-source", "12", "-encoding", "utf8")
+                .run(Task.Expect.FAIL)
+                .writeAll()
+                .getOutput(Task.OutputKind.DIRECT);
+
+        if (!errors.contains("compiler.err")) {
+            throw new RuntimeException("No error detected");
+        }
+    }
+}
--- a/test/langtools/tools/javac/diags/examples/IllegalChar.java	Wed Sep 12 08:46:25 2018 -0700
+++ b/test/langtools/tools/javac/diags/examples/IllegalChar.java	Wed Sep 12 14:19:36 2018 -0300
@@ -24,5 +24,5 @@
 // key: compiler.err.illegal.char
 
 class IllegalChar {
-    int i = `;
+    int i = #;
 }