8141285: NameCode should pass tests from BytecodeNameTest.java
authorsundar
Tue, 03 Nov 2015 21:08:53 +0530
changeset 33534 28065d9f5521
parent 33533 43400f0f2b47
child 33535 e844f2155d72
8141285: NameCode should pass tests from BytecodeNameTest.java Reviewed-by: attila, mhaupt
nashorn/samples/find_underscores.js
nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/linker/NameCodec.java
nashorn/test/src/jdk/nashorn/internal/runtime/linker/test/NameCodecTest.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/samples/find_underscores.js	Tue Nov 03 21:08:53 2015 +0530
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   - Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of Oracle nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+// Usage: jjs find_underscores.js -- <directory>
+
+if (arguments.length == 0) {
+    print("Usage: jjs find_underscores.js -- <directory>");
+    exit(1);
+}
+
+// Java types used
+var File = Java.type("java.io.File");
+var Files = Java.type("java.nio.file.Files");
+var StringArray = Java.type("java.lang.String[]");
+var ToolProvider = Java.type("javax.tools.ToolProvider");
+var Tree = Java.type("com.sun.source.tree.Tree");
+var Trees = Java.type("com.sun.source.util.Trees");
+var TreeScanner = Java.type("com.sun.source.util.TreeScanner");
+
+function findUnderscores() {
+    // get the system compiler tool
+    var compiler = ToolProvider.systemJavaCompiler;
+    // get standard file manager
+    var fileMgr = compiler.getStandardFileManager(null, null, null);
+    // Using Java.to convert script array (arguments) to a Java String[]
+    var compUnits = fileMgr.getJavaFileObjects(Java.to(arguments, StringArray));
+    // create a new compilation task
+    var task = compiler.getTask(null, fileMgr, null, null, null, compUnits);
+    var sourcePositions = Trees.instance(task).sourcePositions;
+    // subclass SimpleTreeVisitor - to find underscore variable names
+    var UnderscoreFinder = Java.extend(TreeScanner);
+
+    var visitor = new UnderscoreFinder() {
+        // override to capture information on current compilation unit
+        visitCompilationUnit: function(compUnit, p) {
+            this.compUnit = compUnit;
+            this.lineMap = compUnit.lineMap;
+            this.fileName = compUnit.sourceFile.name;
+
+            return Java.super(visitor).visitCompilationUnit(compUnit, p);
+        },
+
+        // override to check variable name
+        visitVariable: function(node, p) {
+            if (node.name.toString() == "_") {
+                var pos = sourcePositions.getStartPosition(this.compUnit, node);
+                var line = this.lineMap.getLineNumber(pos);
+                var col = this.lineMap.getColumnNumber(pos);
+                print(node + " @ " + this.fileName + ":" + line + ":" + col);
+            }
+
+            return Java.super(visitor).visitVariable(node, p);
+        }
+    }
+
+    for each (var cu in task.parse()) {
+        cu.accept(visitor, null);
+    }
+}
+
+// for each ".java" file in directory (recursively).
+function main(dir) {
+    var totalCount = 0;
+    Files.walk(dir.toPath()).
+      forEach(function(p) {
+          var name = p.toFile().absolutePath;
+          if (name.endsWith(".java")) {
+              findUnderscores(p);
+          }
+      });
+}
+
+main(new File(arguments[0]));
--- a/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/linker/NameCodec.java	Mon Nov 02 18:26:53 2015 +0100
+++ b/nashorn/src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/linker/NameCodec.java	Tue Nov 03 21:08:53 2015 +0530
@@ -26,44 +26,256 @@
 package jdk.nashorn.internal.runtime.linker;
 
 /**
+ * <p>
  * Implements the name mangling and demangling as specified by John Rose's
  * <a href="https://blogs.oracle.com/jrose/entry/symbolic_freedom_in_the_vm"
  * target="_blank">"Symbolic Freedom in the VM"</a> article. Normally, you would
  * mangle the names in the call sites as you're generating bytecode, and then
  * demangle them when you receive them in bootstrap methods.
+ * </p>
+ * <p>
+ * This code is derived from sun.invoke.util.BytecodeName. Apart from subsetting that
+ * class, we don't want to create dependency between non-exported package from java.base
+ * to nashorn module.
+ * </p>
+ *
+ * <h3>Comment from BytecodeName class reproduced here:</h3>
+ *
+ * Includes universal mangling rules for the JVM.
+ *
+ * <h3>Avoiding Dangerous Characters </h3>
+ *
+ * <p>
+ * The JVM defines a very small set of characters which are illegal
+ * in name spellings.  We will slightly extend and regularize this set
+ * into a group of <cite>dangerous characters</cite>.
+ * These characters will then be replaced, in mangled names, by escape sequences.
+ * In addition, accidental escape sequences must be further escaped.
+ * Finally, a special prefix will be applied if and only if
+ * the mangling would otherwise fail to begin with the escape character.
+ * This happens to cover the corner case of the null string,
+ * and also clearly marks symbols which need demangling.
+ * </p>
+ * <p>
+ * Dangerous characters are the union of all characters forbidden
+ * or otherwise restricted by the JVM specification,
+ * plus their mates, if they are brackets
+ * (<code><big><b>[</b></big></code> and <code><big><b>]</b></big></code>,
+ * <code><big><b>&lt;</b></big></code> and <code><big><b>&gt;</b></big></code>),
+ * plus, arbitrarily, the colon character <code><big><b>:</b></big></code>.
+ * There is no distinction between type, method, and field names.
+ * This makes it easier to convert between mangled names of different
+ * types, since they do not need to be decoded (demangled).
+ * </p>
+ * <p>
+ * The escape character is backslash <code><big><b>\</b></big></code>
+ * (also known as reverse solidus).
+ * This character is, until now, unheard of in bytecode names,
+ * but traditional in the proposed role.
+ *
+ * </p>
+ * <h3> Replacement Characters </h3>
+ *
+ *
+ * <p>
+ * Every escape sequence is two characters
+ * (in fact, two UTF8 bytes) beginning with
+ * the escape character and followed by a
+ * <cite>replacement character</cite>.
+ * (Since the replacement character is never a backslash,
+ * iterated manglings do not double in size.)
+ * </p>
+ * <p>
+ * Each dangerous character has some rough visual similarity
+ * to its corresponding replacement character.
+ * This makes mangled symbols easier to recognize by sight.
+ * </p>
+ * <p>
+ * The dangerous characters are
+ * <code><big><b>/</b></big></code> (forward slash, used to delimit package components),
+ * <code><big><b>.</b></big></code> (dot, also a package delimiter),
+ * <code><big><b>;</b></big></code> (semicolon, used in signatures),
+ * <code><big><b>$</b></big></code> (dollar, used in inner classes and synthetic members),
+ * <code><big><b>&lt;</b></big></code> (left angle),
+ * <code><big><b>&gt;</b></big></code> (right angle),
+ * <code><big><b>[</b></big></code> (left square bracket, used in array types),
+ * <code><big><b>]</b></big></code> (right square bracket, reserved in this scheme for language use),
+ * and <code><big><b>:</b></big></code> (colon, reserved in this scheme for language use).
+ * Their replacements are, respectively,
+ * <code><big><b>|</b></big></code> (vertical bar),
+ * <code><big><b>,</b></big></code> (comma),
+ * <code><big><b>?</b></big></code> (question mark),
+ * <code><big><b>%</b></big></code> (percent),
+ * <code><big><b>^</b></big></code> (caret),
+ * <code><big><b>_</b></big></code> (underscore), and
+ * <code><big><b>{</b></big></code> (left curly bracket),
+ * <code><big><b>}</b></big></code> (right curly bracket),
+ * <code><big><b>!</b></big></code> (exclamation mark).
+ * In addition, the replacement character for the escape character itself is
+ * <code><big><b>-</b></big></code> (hyphen),
+ * and the replacement character for the null prefix is
+ * <code><big><b>=</b></big></code> (equal sign).
+ * </p>
+ * <p>
+ * An escape character <code><big><b>\</b></big></code>
+ * followed by any of these replacement characters
+ * is an escape sequence, and there are no other escape sequences.
+ * An equal sign is only part of an escape sequence
+ * if it is the second character in the whole string, following a backslash.
+ * Two consecutive backslashes do <em>not</em> form an escape sequence.
+ * </p>
+ * <p>
+ * Each escape sequence replaces a so-called <cite>original character</cite>
+ * which is either one of the dangerous characters or the escape character.
+ * A null prefix replaces an initial null string, not a character.
+ * </p>
+ * <p>
+ * All this implies that escape sequences cannot overlap and may be
+ * determined all at once for a whole string.  Note that a spelling
+ * string can contain <cite>accidental escapes</cite>, apparent escape
+ * sequences which must not be interpreted as manglings.
+ * These are disabled by replacing their leading backslash with an
+ * escape sequence (<code><big><b>\-</b></big></code>).  To mangle a string, three logical steps
+ * are required, though they may be carried out in one pass:
+ * </p>
+ * <ol>
+ *   <li>In each accidental escape, replace the backslash with an escape sequence
+ * (<code><big><b>\-</b></big></code>).</li>
+ *   <li>Replace each dangerous character with an escape sequence
+ * (<code><big><b>\|</b></big></code> for <code><big><b>/</b></big></code>, etc.).</li>
+ *   <li>If the first two steps introduced any change, <em>and</em>
+ * if the string does not already begin with a backslash, prepend a null prefix (<code><big><b>\=</b></big></code>).</li>
+ * </ol>
+ *
+ * To demangle a mangled string that begins with an escape,
+ * remove any null prefix, and then replace (in parallel)
+ * each escape sequence by its original character.
+ * <p>Spelling strings which contain accidental
+ * escapes <em>must</em> have them replaced, even if those
+ * strings do not contain dangerous characters.
+ * This restriction means that mangling a string always
+ * requires a scan of the string for escapes.
+ * But then, a scan would be required anyway,
+ * to check for dangerous characters.
+ *
+ * </p>
+ * <h3> Nice Properties </h3>
+ *
+ * <p>
+ * If a bytecode name does not contain any escape sequence,
+ * demangling is a no-op:  The string demangles to itself.
+ * Such a string is called <cite>self-mangling</cite>.
+ * Almost all strings are self-mangling.
+ * In practice, to demangle almost any name &ldquo;found in nature&rdquo;,
+ * simply verify that it does not begin with a backslash.
+ * </p>
+ * <p>
+ * Mangling is a one-to-one function, while demangling
+ * is a many-to-one function.
+ * A mangled string is defined as <cite>validly mangled</cite> if
+ * it is in fact the unique mangling of its spelling string.
+ * Three examples of invalidly mangled strings are <code><big><b>\=foo</b></big></code>,
+ * <code><big><b>\-bar</b></big></code>, and <code><big><b>baz\!</b></big></code>, which demangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and
+ * <code><big><b>baz\!</b></big></code>, but then remangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and <code><big><b>\=baz\-!</b></big></code>.
+ * If a language back-end or runtime is using mangled names,
+ * it should never present an invalidly mangled bytecode
+ * name to the JVM.  If the runtime encounters one,
+ * it should also report an error, since such an occurrence
+ * probably indicates a bug in name encoding which
+ * will lead to errors in linkage.
+ * However, this note does not propose that the JVM verifier
+ * detect invalidly mangled names.
+ * </p>
+ * <p>
+ * As a result of these rules, it is a simple matter to
+ * compute validly mangled substrings and concatenations
+ * of validly mangled strings, and (with a little care)
+ * these correspond to corresponding operations on their
+ * spelling strings.
+ * </p>
+ * <ul>
+ *   <li>Any prefix of a validly mangled string is also validly mangled,
+ * although a null prefix may need to be removed.</li>
+ *   <li>Any suffix of a validly mangled string is also validly mangled,
+ * although a null prefix may need to be added.</li>
+ *   <li>Two validly mangled strings, when concatenated,
+ * are also validly mangled, although any null prefix
+ * must be removed from the second string,
+ * and a trailing backslash on the first string may need escaping,
+ * if it would participate in an accidental escape when followed
+ * by the first character of the second string.</li>
+ * </ul>
+ * <p>If languages that include non-Java symbol spellings use this
+ * mangling convention, they will enjoy the following advantages:
+ * </p>
+ * <ul>
+ *   <li>They can interoperate via symbols they share in common.</li>
+ *   <li>Low-level tools, such as backtrace printers, will have readable displays.</li>
+ *   <li>Future JVM and language extensions can safely use the dangerous characters
+ * for structuring symbols, but will never interfere with valid spellings.</li>
+ *   <li>Runtimes and compilers can use standard libraries for mangling and demangling.</li>
+ *   <li>Occasional transliterations and name composition will be simple and regular,
+ * for classes, methods, and fields.</li>
+ *   <li>Bytecode names will continue to be compact.
+ * When mangled, spellings will at most double in length, either in
+ * UTF8 or UTF16 format, and most will not change at all.</li>
+ * </ul>
+ *
+ *
+ * <h3> Suggestions for Human Readable Presentations </h3>
+ *
+ *
+ * <p>
+ * For human readable displays of symbols,
+ * it will be better to present a string-like quoted
+ * representation of the spelling, because JVM users
+ * are generally familiar with such tokens.
+ * We suggest using single or double quotes before and after
+ * mangled symbols which are not valid Java identifiers,
+ * with quotes, backslashes, and non-printing characters
+ * escaped as if for literals in the Java language.
+ * </p>
+ * <p>
+ * For example, an HTML-like spelling
+ * <code><big><b>&lt;pre&gt;</b></big></code> mangles to
+ * <code><big><b>\^pre\_</b></big></code> and could
+ * display more cleanly as
+ * <code><big><b>'&lt;pre&gt;'</b></big></code>,
+ * with the quotes included.
+ * Such string-like conventions are <em>not</em> suitable
+ * for mangled bytecode names, in part because
+ * dangerous characters must be eliminated, rather
+ * than just quoted.  Otherwise internally structured
+ * strings like package prefixes and method signatures
+ * could not be reliably parsed.
+ * </p>
+ * <p>
+ * In such human-readable displays, invalidly mangled
+ * names should <em>not</em> be demangled and quoted,
+ * for this would be misleading.  Likewise, JVM symbols
+ * which contain dangerous characters (like dots in field
+ * names or brackets in method names) should not be
+ * simply quoted.  The bytecode names
+ * <code><big><b>\=phase\,1</b></big></code> and
+ * <code><big><b>phase.1</b></big></code> are distinct,
+ * and in demangled displays they should be presented as
+ * <code><big><b>'phase.1'</b></big></code> and something like
+ * <code><big><b>'phase'.1</b></big></code>, respectively.
+ * </p>
  */
 public final class NameCodec {
-    private static final char ESCAPE_CHAR = '\\';
-    private static final char EMPTY_ESCAPE = '=';
+    private NameCodec() {
+    }
+
+    private static final char ESCAPE_C = '\\';
+    // empty escape sequence to avoid a null name or illegal prefix
+    private static final char NULL_ESCAPE_C = '=';
+    private static final String NULL_ESCAPE = ESCAPE_C+""+NULL_ESCAPE_C;
+
     /**
      * Canonical encoding for the empty name.
      */
-    public static final String EMPTY_NAME = new String(new char[] { ESCAPE_CHAR, EMPTY_ESCAPE });
-    private static final char EMPTY_CHAR = 0xFEFF;
-
-    private static final int MIN_ENCODING = '$';
-    private static final int MAX_ENCODING = ']';
-    private static final char[] ENCODING = new char[MAX_ENCODING - MIN_ENCODING + 1];
-    private static final int MIN_DECODING = '!';
-    private static final int MAX_DECODING = '}';
-    private static final char[] DECODING = new char[MAX_DECODING - MIN_DECODING + 1];
-
-    static {
-        addEncoding('/', '|');
-        addEncoding('.', ',');
-        addEncoding(';', '?');
-        addEncoding('$', '%');
-        addEncoding('<', '^');
-        addEncoding('>', '_');
-        addEncoding('[', '{');
-        addEncoding(']', '}');
-        addEncoding(':', '!');
-        addEncoding('\\', '-');
-        DECODING[EMPTY_ESCAPE - MIN_DECODING] = EMPTY_CHAR;
-    }
-
-    private NameCodec() {
-    }
+    public static final String EMPTY_NAME =  new String(new char[] { ESCAPE_C, NULL_ESCAPE_C });
 
     /**
      * Encodes ("mangles") an unencoded symbolic name.
@@ -71,37 +283,10 @@
      * @return the mangled form of the symbolic name.
      */
     public static String encode(final String name) {
-        final int l = name.length();
-        if(l == 0) {
-            return EMPTY_NAME;
-        }
-        StringBuilder b = null;
-        int lastEscape = -1;
-        for(int i = 0; i < l; ++i) {
-            final int encodeIndex = name.charAt(i) - MIN_ENCODING;
-            if(encodeIndex >= 0 && encodeIndex < ENCODING.length) {
-                final char e = ENCODING[encodeIndex];
-                if(e != 0) {
-                    if(b == null) {
-                        b = new StringBuilder(name.length() + 3);
-                        if(name.charAt(0) != ESCAPE_CHAR && i > 0) {
-                            b.append(EMPTY_NAME);
-                        }
-                        b.append(name, 0, i);
-                    } else {
-                        b.append(name, lastEscape + 1, i);
-                    }
-                    b.append(ESCAPE_CHAR).append(e);
-                    lastEscape = i;
-                }
-            }
-        }
-        if(b == null) {
-            return name;
-        }
-        assert lastEscape != -1;
-        b.append(name, lastEscape + 1, l);
-        return b.toString();
+        String bn = mangle(name);
+        assert((Object)bn == name || looksMangled(bn)) : bn;
+        assert(name.equals(decode(bn))) : name;
+        return bn;
     }
 
     /**
@@ -110,42 +295,138 @@
      * @return the demangled form of the symbolic name.
      */
     public static String decode(final String name) {
-        if(name.isEmpty() || name.charAt(0) != ESCAPE_CHAR) {
-            return name;
-        }
-        final int l = name.length();
-        if(l == 2 && name.charAt(1) == EMPTY_CHAR) {
-            return "";
+        String sn = name;
+        if (!sn.isEmpty() && looksMangled(name)) {
+            sn = demangle(name);
+            assert(name.equals(mangle(sn))) : name+" => "+sn+" => "+mangle(sn);
         }
-        final StringBuilder b = new StringBuilder(name.length());
-        int lastEscape = -2;
-        int lastBackslash = -1;
-        for(;;) {
-            final int nextBackslash = name.indexOf(ESCAPE_CHAR, lastBackslash + 1);
-            if(nextBackslash == -1 || nextBackslash == l - 1) {
-                break;
+        return sn;
+    }
+
+    private static boolean looksMangled(String s) {
+        return s.charAt(0) == ESCAPE_C;
+    }
+
+    private static String mangle(String s) {
+        if (s.length() == 0)
+            return NULL_ESCAPE;
+
+        // build this lazily, when we first need an escape:
+        StringBuilder sb = null;
+
+        for (int i = 0, slen = s.length(); i < slen; i++) {
+            char c = s.charAt(i);
+
+            boolean needEscape = false;
+            if (c == ESCAPE_C) {
+                if (i+1 < slen) {
+                    char c1 = s.charAt(i+1);
+                    if ((i == 0 && c1 == NULL_ESCAPE_C)
+                        || c1 != originalOfReplacement(c1)) {
+                        // an accidental escape
+                        needEscape = true;
+                    }
+                }
+            } else {
+                needEscape = isDangerous(c);
+            }
+
+            if (!needEscape) {
+                if (sb != null)  sb.append(c);
+                continue;
             }
-            final int decodeIndex = name.charAt(nextBackslash + 1) - MIN_DECODING;
-            if(decodeIndex >= 0 && decodeIndex < DECODING.length) {
-                final char d = DECODING[decodeIndex];
-                if(d == EMPTY_CHAR) {
-                    // "\=" is only valid at the beginning of a mangled string
-                    if(nextBackslash == 0) {
-                        lastEscape = 0;
+
+            // build sb if this is the first escape
+            if (sb == null) {
+                sb = new StringBuilder(s.length()+10);
+                // mangled names must begin with a backslash:
+                if (s.charAt(0) != ESCAPE_C && i > 0)
+                    sb.append(NULL_ESCAPE);
+                // append the string so far, which is unremarkable:
+                sb.append(s, 0, i);
+            }
+
+            // rewrite \ to \-, / to \|, etc.
+            sb.append(ESCAPE_C);
+            sb.append(replacementOf(c));
+        }
+
+        if (sb != null)   return sb.toString();
+
+        return s;
+    }
+
+    private static String demangle(String s) {
+        // build this lazily, when we first meet an escape:
+        StringBuilder sb = null;
+
+        int stringStart = 0;
+        if (s.startsWith(NULL_ESCAPE))
+            stringStart = 2;
+
+        for (int i = stringStart, slen = s.length(); i < slen; i++) {
+            char c = s.charAt(i);
+
+            if (c == ESCAPE_C && i+1 < slen) {
+                // might be an escape sequence
+                char rc = s.charAt(i+1);
+                char oc = originalOfReplacement(rc);
+                if (oc != rc) {
+                    // build sb if this is the first escape
+                    if (sb == null) {
+                        sb = new StringBuilder(s.length());
+                        // append the string so far, which is unremarkable:
+                        sb.append(s, stringStart, i);
                     }
-                } else if(d != 0) {
-                    b.append(name, lastEscape + 2, nextBackslash).append(d);
-                    lastEscape = nextBackslash;
+                    ++i;  // skip both characters
+                    c = oc;
                 }
             }
-            lastBackslash = nextBackslash;
+
+            if (sb != null)
+                sb.append(c);
         }
-        b.append(name, lastEscape + 2, l);
-        return b.toString();
+
+        if (sb != null)   return sb.toString();
+
+        return s.substring(stringStart);
+    }
+
+    private static final String DANGEROUS_CHARS   = "\\/.;:$[]<>"; // \\ must be first
+    private static final String REPLACEMENT_CHARS =  "-|,?!%{}^_";
+    private static final int DANGEROUS_CHAR_FIRST_INDEX = 1; // index after \\
+
+    private static final long[] SPECIAL_BITMAP = new long[2];  // 128 bits
+    static {
+        String SPECIAL = DANGEROUS_CHARS + REPLACEMENT_CHARS;
+        for (char c : SPECIAL.toCharArray()) {
+            SPECIAL_BITMAP[c >>> 6] |= 1L << c;
+        }
     }
 
-    private static void addEncoding(final char from, final char to) {
-        ENCODING[from - MIN_ENCODING] = to;
-        DECODING[to - MIN_DECODING] = from;
+    private static boolean isSpecial(char c) {
+        if ((c >>> 6) < SPECIAL_BITMAP.length)
+            return ((SPECIAL_BITMAP[c >>> 6] >> c) & 1) != 0;
+        else
+            return false;
+    }
+
+    private static char replacementOf(char c) {
+        if (!isSpecial(c))  return c;
+        int i = DANGEROUS_CHARS.indexOf(c);
+        if (i < 0)  return c;
+        return REPLACEMENT_CHARS.charAt(i);
+    }
+
+    private static char originalOfReplacement(char c) {
+        if (!isSpecial(c))  return c;
+        int i = REPLACEMENT_CHARS.indexOf(c);
+        if (i < 0)  return c;
+        return DANGEROUS_CHARS.charAt(i);
+    }
+
+    private static boolean isDangerous(char c) {
+        if (!isSpecial(c))  return false;
+        return (DANGEROUS_CHARS.indexOf(c) >= DANGEROUS_CHAR_FIRST_INDEX);
     }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nashorn/test/src/jdk/nashorn/internal/runtime/linker/test/NameCodecTest.java	Tue Nov 03 21:08:53 2015 +0530
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.nashorn.internal.runtime.linker.test;
+
+import static org.testng.Assert.assertEquals;
+
+import jdk.nashorn.internal.runtime.linker.NameCodec;
+import org.testng.annotations.Test;
+
+/**
+ * Test for jdk.nashorn.intenal.runtime.linker.NameCodec.java. This test is
+ * derived from BytecodeNameTest.java from (older) mlvm code @
+ * http://hg.openjdk.java.net/mlvm/mlvm/file/tip/netbeans/meth/test/sun/invoke/util/BytecodeNameTest.java
+ *
+ * @bug 8141285: NameCode should pass tests from BytecodeNameTest.java
+ */
+public class NameCodecTest {
+
+    static String[][] SAMPLES = {
+        // mangled, source
+        {"foo", "foo"},
+        {"ba\\r", "ba\\r"},
+        {"\\=ba\\-%z", "ba\\%z"},
+        {"\\=ba\\--z", "ba\\-z"},
+        {"=\\=", "=\\="},
+        {"\\==\\|\\=", "=/\\="},
+        {"\\|\\=", "/\\="},
+        {"\\=ba\\!", "ba:"},
+        {"\\|", "/"},
+        {"\\", "\\"},
+        {"\\\\%", "\\$"},
+        {"\\\\", "\\\\"},
+        {"\\=", ""}
+
+    };
+
+    static final String DANGEROUS_CHARS = "\\/.;:$[]<>";
+    static final String REPLACEMENT_CHARS = "-|,?!%{}^_";
+
+    static String[][] canonicalSamples() {
+        int ndc = DANGEROUS_CHARS.length();
+        String[][] res = new String[2 * ndc][];
+        for (int i = 0; i < ndc; i++) {
+            char dc = DANGEROUS_CHARS.charAt(i);
+            char rc = REPLACEMENT_CHARS.charAt(i);
+            if (dc == '\\') {
+                res[2 * i + 0] = new String[]{"\\-%", "\\%"};
+            } else {
+                res[2 * i + 0] = new String[]{"\\" + rc, "" + dc};
+            }
+            res[2 * i + 1] = new String[]{"" + rc, "" + rc};
+        }
+        return res;
+    }
+
+    @Test
+    public void testEncode() {
+        System.out.println("testEncode");
+        testEncode(SAMPLES);
+        testEncode(canonicalSamples());
+    }
+
+    private void testEncode(String[][] samples) {
+        for (String[] sample : samples) {
+            String s = sample[1];
+            String expResult = sample[0];
+            String result = NameCodec.encode(s);
+            if (!result.equals(expResult)) {
+                System.out.println(s + " => " + result + " != " + expResult);
+            }
+            assertEquals(expResult, result);
+        }
+    }
+
+    @Test
+    public void testDecode() {
+        System.out.println("testDecode");
+        testDecode(SAMPLES);
+        testDecode(canonicalSamples());
+    }
+
+    private void testDecode(String[][] samples) {
+        for (String[] sample : samples) {
+            String s = sample[0];
+            String expResult = sample[1];
+            String result = NameCodec.decode(s);
+            assertEquals(expResult, result);
+        }
+    }
+}