src/jdk.scripting.nashorn/share/classes/jdk/nashorn/internal/runtime/linker/NameCodec.java
changeset 49145 2854589fd853
parent 47216 71c04702a3d5
child 54547 53aecb049e0a
equal deleted inserted replaced
49144:71bc133f25ea 49145:2854589fd853
    58  * </p>
    58  * </p>
    59  * <p>
    59  * <p>
    60  * Dangerous characters are the union of all characters forbidden
    60  * Dangerous characters are the union of all characters forbidden
    61  * or otherwise restricted by the JVM specification,
    61  * or otherwise restricted by the JVM specification,
    62  * plus their mates, if they are brackets
    62  * plus their mates, if they are brackets
    63  * (<code><big><b>[</b></big></code> and <code><big><b>]</b></big></code>,
    63  * (<code><b>[</b></code> and <code><b>]</b></code>,
    64  * <code><big><b>&lt;</b></big></code> and <code><big><b>&gt;</b></big></code>),
    64  * <code><b>&lt;</b></code> and <code><b>&gt;</b></code>),
    65  * plus, arbitrarily, the colon character <code><big><b>:</b></big></code>.
    65  * plus, arbitrarily, the colon character <code><b>:</b></code>.
    66  * There is no distinction between type, method, and field names.
    66  * There is no distinction between type, method, and field names.
    67  * This makes it easier to convert between mangled names of different
    67  * This makes it easier to convert between mangled names of different
    68  * types, since they do not need to be decoded (demangled).
    68  * types, since they do not need to be decoded (demangled).
    69  * </p>
    69  * </p>
    70  * <p>
    70  * <p>
    71  * The escape character is backslash <code><big><b>\</b></big></code>
    71  * The escape character is backslash <code><b>\</b></code>
    72  * (also known as reverse solidus).
    72  * (also known as reverse solidus).
    73  * This character is, until now, unheard of in bytecode names,
    73  * This character is, until now, unheard of in bytecode names,
    74  * but traditional in the proposed role.
    74  * but traditional in the proposed role.
    75  *
    75  *
    76  * </p>
    76  * </p>
    90  * to its corresponding replacement character.
    90  * to its corresponding replacement character.
    91  * This makes mangled symbols easier to recognize by sight.
    91  * This makes mangled symbols easier to recognize by sight.
    92  * </p>
    92  * </p>
    93  * <p>
    93  * <p>
    94  * The dangerous characters are
    94  * The dangerous characters are
    95  * <code><big><b>/</b></big></code> (forward slash, used to delimit package components),
    95  * <code><b>/</b></code> (forward slash, used to delimit package components),
    96  * <code><big><b>.</b></big></code> (dot, also a package delimiter),
    96  * <code><b>.</b></code> (dot, also a package delimiter),
    97  * <code><big><b>;</b></big></code> (semicolon, used in signatures),
    97  * <code><b>;</b></code> (semicolon, used in signatures),
    98  * <code><big><b>$</b></big></code> (dollar, used in inner classes and synthetic members),
    98  * <code><b>$</b></code> (dollar, used in inner classes and synthetic members),
    99  * <code><big><b>&lt;</b></big></code> (left angle),
    99  * <code><b>&lt;</b></code> (left angle),
   100  * <code><big><b>&gt;</b></big></code> (right angle),
   100  * <code><b>&gt;</b></code> (right angle),
   101  * <code><big><b>[</b></big></code> (left square bracket, used in array types),
   101  * <code><b>[</b></code> (left square bracket, used in array types),
   102  * <code><big><b>]</b></big></code> (right square bracket, reserved in this scheme for language use),
   102  * <code><b>]</b></code> (right square bracket, reserved in this scheme for language use),
   103  * and <code><big><b>:</b></big></code> (colon, reserved in this scheme for language use).
   103  * and <code><b>:</b></code> (colon, reserved in this scheme for language use).
   104  * Their replacements are, respectively,
   104  * Their replacements are, respectively,
   105  * <code><big><b>|</b></big></code> (vertical bar),
   105  * <code><b>|</b></code> (vertical bar),
   106  * <code><big><b>,</b></big></code> (comma),
   106  * <code><b>,</b></code> (comma),
   107  * <code><big><b>?</b></big></code> (question mark),
   107  * <code><b>?</b></code> (question mark),
   108  * <code><big><b>%</b></big></code> (percent),
   108  * <code><b>%</b></code> (percent),
   109  * <code><big><b>^</b></big></code> (caret),
   109  * <code><b>^</b></code> (caret),
   110  * <code><big><b>_</b></big></code> (underscore), and
   110  * <code><b>_</b></code> (underscore), and
   111  * <code><big><b>{</b></big></code> (left curly bracket),
   111  * <code><b>{</b></code> (left curly bracket),
   112  * <code><big><b>}</b></big></code> (right curly bracket),
   112  * <code><b>}</b></code> (right curly bracket),
   113  * <code><big><b>!</b></big></code> (exclamation mark).
   113  * <code><b>!</b></code> (exclamation mark).
   114  * In addition, the replacement character for the escape character itself is
   114  * In addition, the replacement character for the escape character itself is
   115  * <code><big><b>-</b></big></code> (hyphen),
   115  * <code><b>-</b></code> (hyphen),
   116  * and the replacement character for the null prefix is
   116  * and the replacement character for the null prefix is
   117  * <code><big><b>=</b></big></code> (equal sign).
   117  * <code><b>=</b></code> (equal sign).
   118  * </p>
   118  * </p>
   119  * <p>
   119  * <p>
   120  * An escape character <code><big><b>\</b></big></code>
   120  * An escape character <code><b>\</b></code>
   121  * followed by any of these replacement characters
   121  * followed by any of these replacement characters
   122  * is an escape sequence, and there are no other escape sequences.
   122  * is an escape sequence, and there are no other escape sequences.
   123  * An equal sign is only part of an escape sequence
   123  * An equal sign is only part of an escape sequence
   124  * if it is the second character in the whole string, following a backslash.
   124  * if it is the second character in the whole string, following a backslash.
   125  * Two consecutive backslashes do <em>not</em> form an escape sequence.
   125  * Two consecutive backslashes do <em>not</em> form an escape sequence.
   133  * All this implies that escape sequences cannot overlap and may be
   133  * All this implies that escape sequences cannot overlap and may be
   134  * determined all at once for a whole string.  Note that a spelling
   134  * determined all at once for a whole string.  Note that a spelling
   135  * string can contain <cite>accidental escapes</cite>, apparent escape
   135  * string can contain <cite>accidental escapes</cite>, apparent escape
   136  * sequences which must not be interpreted as manglings.
   136  * sequences which must not be interpreted as manglings.
   137  * These are disabled by replacing their leading backslash with an
   137  * These are disabled by replacing their leading backslash with an
   138  * escape sequence (<code><big><b>\-</b></big></code>).  To mangle a string, three logical steps
   138  * escape sequence (<code><b>\-</b></code>).  To mangle a string, three logical steps
   139  * are required, though they may be carried out in one pass:
   139  * are required, though they may be carried out in one pass:
   140  * </p>
   140  * </p>
   141  * <ol>
   141  * <ol>
   142  *   <li>In each accidental escape, replace the backslash with an escape sequence
   142  *   <li>In each accidental escape, replace the backslash with an escape sequence
   143  * (<code><big><b>\-</b></big></code>).</li>
   143  * (<code><b>\-</b></code>).</li>
   144  *   <li>Replace each dangerous character with an escape sequence
   144  *   <li>Replace each dangerous character with an escape sequence
   145  * (<code><big><b>\|</b></big></code> for <code><big><b>/</b></big></code>, etc.).</li>
   145  * (<code><b>\|</b></code> for <code><b>/</b></code>, etc.).</li>
   146  *   <li>If the first two steps introduced any change, <em>and</em>
   146  *   <li>If the first two steps introduced any change, <em>and</em>
   147  * if the string does not already begin with a backslash, prepend a null prefix (<code><big><b>\=</b></big></code>).</li>
   147  * if the string does not already begin with a backslash, prepend a null prefix (<code><b>\=</b></code>).</li>
   148  * </ol>
   148  * </ol>
   149  *
   149  *
   150  * To demangle a mangled string that begins with an escape,
   150  * To demangle a mangled string that begins with an escape,
   151  * remove any null prefix, and then replace (in parallel)
   151  * remove any null prefix, and then replace (in parallel)
   152  * each escape sequence by its original character.
   152  * each escape sequence by its original character.
   172  * <p>
   172  * <p>
   173  * Mangling is a one-to-one function, while demangling
   173  * Mangling is a one-to-one function, while demangling
   174  * is a many-to-one function.
   174  * is a many-to-one function.
   175  * A mangled string is defined as <cite>validly mangled</cite> if
   175  * A mangled string is defined as <cite>validly mangled</cite> if
   176  * it is in fact the unique mangling of its spelling string.
   176  * it is in fact the unique mangling of its spelling string.
   177  * Three examples of invalidly mangled strings are <code><big><b>\=foo</b></big></code>,
   177  * Three examples of invalidly mangled strings are <code><b>\=foo</b></code>,
   178  * <code><big><b>\-bar</b></big></code>, and <code><big><b>baz\!</b></big></code>, which demangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and
   178  * <code><b>\-bar</b></code>, and <code><b>baz\!</b></code>, which demangle to <code><b>foo</b></code>, <code><b>\bar</b></code>, and
   179  * <code><big><b>baz\!</b></big></code>, but then remangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and <code><big><b>\=baz\-!</b></big></code>.
   179  * <code><b>baz\!</b></code>, but then remangle to <code><b>foo</b></code>, <code><b>\bar</b></code>, and <code><b>\=baz\-!</b></code>.
   180  * If a language back-end or runtime is using mangled names,
   180  * If a language back-end or runtime is using mangled names,
   181  * it should never present an invalidly mangled bytecode
   181  * it should never present an invalidly mangled bytecode
   182  * name to the JVM.  If the runtime encounters one,
   182  * name to the JVM.  If the runtime encounters one,
   183  * it should also report an error, since such an occurrence
   183  * it should also report an error, since such an occurrence
   184  * probably indicates a bug in name encoding which
   184  * probably indicates a bug in name encoding which
   235  * with quotes, backslashes, and non-printing characters
   235  * with quotes, backslashes, and non-printing characters
   236  * escaped as if for literals in the Java language.
   236  * escaped as if for literals in the Java language.
   237  * </p>
   237  * </p>
   238  * <p>
   238  * <p>
   239  * For example, an HTML-like spelling
   239  * For example, an HTML-like spelling
   240  * <code><big><b>&lt;pre&gt;</b></big></code> mangles to
   240  * <code><b>&lt;pre&gt;</b></code> mangles to
   241  * <code><big><b>\^pre\_</b></big></code> and could
   241  * <code><b>\^pre\_</b></code> and could
   242  * display more cleanly as
   242  * display more cleanly as
   243  * <code><big><b>'&lt;pre&gt;'</b></big></code>,
   243  * <code><b>'&lt;pre&gt;'</b></code>,
   244  * with the quotes included.
   244  * with the quotes included.
   245  * Such string-like conventions are <em>not</em> suitable
   245  * Such string-like conventions are <em>not</em> suitable
   246  * for mangled bytecode names, in part because
   246  * for mangled bytecode names, in part because
   247  * dangerous characters must be eliminated, rather
   247  * dangerous characters must be eliminated, rather
   248  * than just quoted.  Otherwise internally structured
   248  * than just quoted.  Otherwise internally structured
   254  * names should <em>not</em> be demangled and quoted,
   254  * names should <em>not</em> be demangled and quoted,
   255  * for this would be misleading.  Likewise, JVM symbols
   255  * for this would be misleading.  Likewise, JVM symbols
   256  * which contain dangerous characters (like dots in field
   256  * which contain dangerous characters (like dots in field
   257  * names or brackets in method names) should not be
   257  * names or brackets in method names) should not be
   258  * simply quoted.  The bytecode names
   258  * simply quoted.  The bytecode names
   259  * <code><big><b>\=phase\,1</b></big></code> and
   259  * <code><b>\=phase\,1</b></code> and
   260  * <code><big><b>phase.1</b></big></code> are distinct,
   260  * <code><b>phase.1</b></code> are distinct,
   261  * and in demangled displays they should be presented as
   261  * and in demangled displays they should be presented as
   262  * <code><big><b>'phase.1'</b></big></code> and something like
   262  * <code><b>'phase.1'</b></code> and something like
   263  * <code><big><b>'phase'.1</b></big></code>, respectively.
   263  * <code><b>'phase'.1</b></code>, respectively.
   264  * </p>
   264  * </p>
   265  */
   265  */
   266 public final class NameCodec {
   266 public final class NameCodec {
   267     private NameCodec() {
   267     private NameCodec() {
   268     }
   268     }