jdk/src/share/classes/sun/invoke/util/BytecodeName.java
changeset 8822 8145ab9f5f86
parent 7668 d4a77089c587
equal deleted inserted replaced
8821:2836ee97ee27 8822:8145ab9f5f86
       
     1 /*
       
     2  * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package sun.invoke.util;
       
    27 
       
    28 /**
       
    29  * Utility routines for dealing with bytecode-level names.
       
    30  * Includes universal mangling rules for the JVM.
       
    31  *
       
    32  * <h3>Avoiding Dangerous Characters </h3>
       
    33  *
       
    34  * <p>
       
    35  * The JVM defines a very small set of characters which are illegal
       
    36  * in name spellings.  We will slightly extend and regularize this set
       
    37  * into a group of <cite>dangerous characters</cite>.
       
    38  * These characters will then be replaced, in mangled names, by escape sequences.
       
    39  * In addition, accidental escape sequences must be further escaped.
       
    40  * Finally, a special prefix will be applied if and only if
       
    41  * the mangling would otherwise fail to begin with the escape character.
       
    42  * This happens to cover the corner case of the null string,
       
    43  * and also clearly marks symbols which need demangling.
       
    44  * </p>
       
    45  * <p>
       
    46  * Dangerous characters are the union of all characters forbidden
       
    47  * or otherwise restricted by the JVM specification,
       
    48  * plus their mates, if they are brackets
       
    49  * (<code><big><b>[</b></big></code> and <code><big><b>]</b></big></code>,
       
    50  * <code><big><b>&lt;</b></big></code> and <code><big><b>&gt;</b></big></code>),
       
    51  * plus, arbitrarily, the colon character <code><big><b>:</b></big></code>.
       
    52  * There is no distinction between type, method, and field names.
       
    53  * This makes it easier to convert between mangled names of different
       
    54  * types, since they do not need to be decoded (demangled).
       
    55  * </p>
       
    56  * <p>
       
    57  * The escape character is backslash <code><big><b>\</b></big></code>
       
    58  * (also known as reverse solidus).
       
    59  * This character is, until now, unheard of in bytecode names,
       
    60  * but traditional in the proposed role.
       
    61  *
       
    62  * </p>
       
    63  * <h3> Replacement Characters </h3>
       
    64  *
       
    65  *
       
    66  * <p>
       
    67  * Every escape sequence is two characters
       
    68  * (in fact, two UTF8 bytes) beginning with
       
    69  * the escape character and followed by a
       
    70  * <cite>replacement character</cite>.
       
    71  * (Since the replacement character is never a backslash,
       
    72  * iterated manglings do not double in size.)
       
    73  * </p>
       
    74  * <p>
       
    75  * Each dangerous character has some rough visual similarity
       
    76  * to its corresponding replacement character.
       
    77  * This makes mangled symbols easier to recognize by sight.
       
    78  * </p>
       
    79  * <p>
       
    80  * The dangerous characters are
       
    81  * <code><big><b>/</b></big></code> (forward slash, used to delimit package components),
       
    82  * <code><big><b>.</b></big></code> (dot, also a package delimiter),
       
    83  * <code><big><b>;</b></big></code> (semicolon, used in signatures),
       
    84  * <code><big><b>$</b></big></code> (dollar, used in inner classes and synthetic members),
       
    85  * <code><big><b>&lt;</b></big></code> (left angle),
       
    86  * <code><big><b>&gt;</b></big></code> (right angle),
       
    87  * <code><big><b>[</b></big></code> (left square bracket, used in array types),
       
    88  * <code><big><b>]</b></big></code> (right square bracket, reserved in this scheme for language use),
       
    89  * and <code><big><b>:</b></big></code> (colon, reserved in this scheme for language use).
       
    90  * Their replacements are, respectively,
       
    91  * <code><big><b>|</b></big></code> (vertical bar),
       
    92  * <code><big><b>,</b></big></code> (comma),
       
    93  * <code><big><b>?</b></big></code> (question mark),
       
    94  * <code><big><b>%</b></big></code> (percent),
       
    95  * <code><big><b>^</b></big></code> (caret),
       
    96  * <code><big><b>_</b></big></code> (underscore), and
       
    97  * <code><big><b>{</b></big></code> (left curly bracket),
       
    98  * <code><big><b>}</b></big></code> (right curly bracket),
       
    99  * <code><big><b>!</b></big></code> (exclamation mark).
       
   100  * In addition, the replacement character for the escape character itself is
       
   101  * <code><big><b>-</b></big></code> (hyphen),
       
   102  * and the replacement character for the null prefix is
       
   103  * <code><big><b>=</b></big></code> (equal sign).
       
   104  * </p>
       
   105  * <p>
       
   106  * An escape character <code><big><b>\</b></big></code>
       
   107  * followed by any of these replacement characters
       
   108  * is an escape sequence, and there are no other escape sequences.
       
   109  * An equal sign is only part of an escape sequence
       
   110  * if it is the second character in the whole string, following a backslash.
       
   111  * Two consecutive backslashes do <em>not</em> form an escape sequence.
       
   112  * </p>
       
   113  * <p>
       
   114  * Each escape sequence replaces a so-called <cite>original character</cite>
       
   115  * which is either one of the dangerous characters or the escape character.
       
   116  * A null prefix replaces an initial null string, not a character.
       
   117  * </p>
       
   118  * <p>
       
   119  * All this implies that escape sequences cannot overlap and may be
       
   120  * determined all at once for a whole string.  Note that a spelling
       
   121  * string can contain <cite>accidental escapes</cite>, apparent escape
       
   122  * sequences which must not be interpreted as manglings.
       
   123  * These are disabled by replacing their leading backslash with an
       
   124  * escape sequence (<code><big><b>\-</b></big></code>).  To mangle a string, three logical steps
       
   125  * are required, though they may be carried out in one pass:
       
   126  * </p>
       
   127  * <ol>
       
   128  *   <li>In each accidental escape, replace the backslash with an escape sequence
       
   129  * (<code><big><b>\-</b></big></code>).</li>
       
   130  *   <li>Replace each dangerous character with an escape sequence
       
   131  * (<code><big><b>\|</b></big></code> for <code><big><b>/</b></big></code>, etc.).</li>
       
   132  *   <li>If the first two steps introduced any change, <em>and</em>
       
   133  * if the string does not already begin with a backslash, prepend a null prefix (<code><big><b>\=</b></big></code>).</li>
       
   134  * </ol>
       
   135  *
       
   136  * To demangle a mangled string that begins with an escape,
       
   137  * remove any null prefix, and then replace (in parallel)
       
   138  * each escape sequence by its original character.
       
   139  * <p>Spelling strings which contain accidental
       
   140  * escapes <em>must</em> have them replaced, even if those
       
   141  * strings do not contain dangerous characters.
       
   142  * This restriction means that mangling a string always
       
   143  * requires a scan of the string for escapes.
       
   144  * But then, a scan would be required anyway,
       
   145  * to check for dangerous characters.
       
   146  *
       
   147  * </p>
       
   148  * <h3> Nice Properties </h3>
       
   149  *
       
   150  * <p>
       
   151  * If a bytecode name does not contain any escape sequence,
       
   152  * demangling is a no-op:  The string demangles to itself.
       
   153  * Such a string is called <cite>self-mangling</cite>.
       
   154  * Almost all strings are self-mangling.
       
   155  * In practice, to demangle almost any name &ldquo;found in nature&rdquo;,
       
   156  * simply verify that it does not begin with a backslash.
       
   157  * </p>
       
   158  * <p>
       
   159  * Mangling is a one-to-one function, while demangling
       
   160  * is a many-to-one function.
       
   161  * A mangled string is defined as <cite>validly mangled</cite> if
       
   162  * it is in fact the unique mangling of its spelling string.
       
   163  * Three examples of invalidly mangled strings are <code><big><b>\=foo</b></big></code>,
       
   164  * <code><big><b>\-bar</b></big></code>, and <code><big><b>baz\!</b></big></code>, which demangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and
       
   165  * <code><big><b>baz\!</b></big></code>, but then remangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and <code><big><b>\=baz\-!</b></big></code>.
       
   166  * If a language back-end or runtime is using mangled names,
       
   167  * it should never present an invalidly mangled bytecode
       
   168  * name to the JVM.  If the runtime encounters one,
       
   169  * it should also report an error, since such an occurrence
       
   170  * probably indicates a bug in name encoding which
       
   171  * will lead to errors in linkage.
       
   172  * However, this note does not propose that the JVM verifier
       
   173  * detect invalidly mangled names.
       
   174  * </p>
       
   175  * <p>
       
   176  * As a result of these rules, it is a simple matter to
       
   177  * compute validly mangled substrings and concatenations
       
   178  * of validly mangled strings, and (with a little care)
       
   179  * these correspond to corresponding operations on their
       
   180  * spelling strings.
       
   181  * </p>
       
   182  * <ul>
       
   183  *   <li>Any prefix of a validly mangled string is also validly mangled,
       
   184  * although a null prefix may need to be removed.</li>
       
   185  *   <li>Any suffix of a validly mangled string is also validly mangled,
       
   186  * although a null prefix may need to be added.</li>
       
   187  *   <li>Two validly mangled strings, when concatenated,
       
   188  * are also validly mangled, although any null prefix
       
   189  * must be removed from the second string,
       
   190  * and a trailing backslash on the first string may need escaping,
       
   191  * if it would participate in an accidental escape when followed
       
   192  * by the first character of the second string.</li>
       
   193  * </ul>
       
   194  * <p>If languages that include non-Java symbol spellings use this
       
   195  * mangling convention, they will enjoy the following advantages:
       
   196  * </p>
       
   197  * <ul>
       
   198  *   <li>They can interoperate via symbols they share in common.</li>
       
   199  *   <li>Low-level tools, such as backtrace printers, will have readable displays.</li>
       
   200  *   <li>Future JVM and language extensions can safely use the dangerous characters
       
   201  * for structuring symbols, but will never interfere with valid spellings.</li>
       
   202  *   <li>Runtimes and compilers can use standard libraries for mangling and demangling.</li>
       
   203  *   <li>Occasional transliterations and name composition will be simple and regular,
       
   204  * for classes, methods, and fields.</li>
       
   205  *   <li>Bytecode names will continue to be compact.
       
   206  * When mangled, spellings will at most double in length, either in
       
   207  * UTF8 or UTF16 format, and most will not change at all.</li>
       
   208  * </ul>
       
   209  *
       
   210  *
       
   211  * <h3> Suggestions for Human Readable Presentations </h3>
       
   212  *
       
   213  *
       
   214  * <p>
       
   215  * For human readable displays of symbols,
       
   216  * it will be better to present a string-like quoted
       
   217  * representation of the spelling, because JVM users
       
   218  * are generally familiar with such tokens.
       
   219  * We suggest using single or double quotes before and after
       
   220  * mangled symbols which are not valid Java identifiers,
       
   221  * with quotes, backslashes, and non-printing characters
       
   222  * escaped as if for literals in the Java language.
       
   223  * </p>
       
   224  * <p>
       
   225  * For example, an HTML-like spelling
       
   226  * <code><big><b>&lt;pre&gt;</b></big></code> mangles to
       
   227  * <code><big><b>\^pre\_</b></big></code> and could
       
   228  * display more cleanly as
       
   229  * <code><big><b>'&lt;pre&gt;'</b></big></code>,
       
   230  * with the quotes included.
       
   231  * Such string-like conventions are <em>not</em> suitable
       
   232  * for mangled bytecode names, in part because
       
   233  * dangerous characters must be eliminated, rather
       
   234  * than just quoted.  Otherwise internally structured
       
   235  * strings like package prefixes and method signatures
       
   236  * could not be reliably parsed.
       
   237  * </p>
       
   238  * <p>
       
   239  * In such human-readable displays, invalidly mangled
       
   240  * names should <em>not</em> be demangled and quoted,
       
   241  * for this would be misleading.  Likewise, JVM symbols
       
   242  * which contain dangerous characters (like dots in field
       
   243  * names or brackets in method names) should not be
       
   244  * simply quoted.  The bytecode names
       
   245  * <code><big><b>\=phase\,1</b></big></code> and
       
   246  * <code><big><b>phase.1</b></big></code> are distinct,
       
   247  * and in demangled displays they should be presented as
       
   248  * <code><big><b>'phase.1'</b></big></code> and something like
       
   249  * <code><big><b>'phase'.1</b></big></code>, respectively.
       
   250  * </p>
       
   251  *
       
   252  * @author John Rose
       
   253  * @version 1.2, 02/06/2008
       
   254  * @see http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm
       
   255  */
       
   256 public class BytecodeName {
       
   257     private BytecodeName() { }  // static only class
       
   258 
       
   259     /** Given a source name, produce the corresponding bytecode name.
       
   260      * The source name should not be qualified, because any syntactic
       
   261      * markers (dots, slashes, dollar signs, colons, etc.) will be mangled.
       
   262      * @param s the source name
       
   263      * @return a valid bytecode name which represents the source name
       
   264      */
       
   265     public static String toBytecodeName(String s) {
       
   266         String bn = mangle(s);
       
   267         assert((Object)bn == s || looksMangled(bn)) : bn;
       
   268         assert(s.equals(toSourceName(bn))) : s;
       
   269         return bn;
       
   270     }
       
   271 
       
   272     /** Given an unqualified bytecode name, produce the corresponding source name.
       
   273      * The bytecode name must not contain dangerous characters.
       
   274      * In particular, it must not be qualified or segmented by colon {@code ':'}.
       
   275      * @param s the bytecode name
       
   276      * @return the source name, which may possibly have unsafe characters
       
   277      * @throws IllegalArgumentException if the bytecode name is not {@link #isSafeBytecodeName safe}
       
   278      * @see #isSafeBytecodeName(java.lang.String)
       
   279      */
       
   280     public static String toSourceName(String s) {
       
   281         checkSafeBytecodeName(s);
       
   282         String sn = s;
       
   283         if (looksMangled(s)) {
       
   284             sn = demangle(s);
       
   285             assert(s.equals(mangle(sn))) : s+" => "+sn+" => "+mangle(sn);
       
   286         }
       
   287         return sn;
       
   288     }
       
   289 
       
   290     /**
       
   291      * Given a bytecode name from a classfile, separate it into
       
   292      * components delimited by dangerous characters.
       
   293      * Each resulting array element will be either a dangerous character,
       
   294      * or else a safe bytecode name.
       
   295      * (The safe name might possibly be mangled to hide further dangerous characters.)
       
   296      * For example, the qualified class name {@code java/lang/String}
       
   297      * will be parsed into the array {@code {"java", '/', "lang", '/', "String"}}.
       
   298      * The name {@code &lt;init&gt;} will be parsed into { '&lt;', "init", '&gt;'}}
       
   299      * The name {@code foo/bar$:baz} will be parsed into
       
   300      * {@code {"foo", '/', "bar", '$', ':', "baz"}}.
       
   301      * The name {@code ::\=:foo:\=bar\!baz} will be parsed into
       
   302      * {@code {':', ':', "", ':', "foo", ':', "bar:baz"}}.
       
   303      */
       
   304     public static Object[] parseBytecodeName(String s) {
       
   305         int slen = s.length();
       
   306         Object[] res = null;
       
   307         for (int pass = 0; pass <= 1; pass++) {
       
   308             int fillp = 0;
       
   309             int lasti = 0;
       
   310             for (int i = 0; i <= slen; i++) {
       
   311                 int whichDC = -1;
       
   312                 if (i < slen) {
       
   313                     whichDC = DANGEROUS_CHARS.indexOf(s.charAt(i));
       
   314                     if (whichDC < DANGEROUS_CHAR_FIRST_INDEX)  continue;
       
   315                 }
       
   316                 // got to end of string or next dangerous char
       
   317                 if (lasti < i) {
       
   318                     // normal component
       
   319                     if (pass != 0)
       
   320                         res[fillp] = toSourceName(s.substring(lasti, i));
       
   321                     fillp++;
       
   322                     lasti = i+1;
       
   323                 }
       
   324                 if (whichDC >= DANGEROUS_CHAR_FIRST_INDEX) {
       
   325                     if (pass != 0)
       
   326                         res[fillp] = DANGEROUS_CHARS_CA[whichDC];
       
   327                     fillp++;
       
   328                     lasti = i+1;
       
   329                 }
       
   330             }
       
   331             if (pass != 0)  break;
       
   332             // between passes, build the result array
       
   333             res = new Object[fillp];
       
   334             if (fillp <= 1 && lasti == 0) {
       
   335                 if (fillp != 0)  res[0] = toSourceName(s);
       
   336                 break;
       
   337             }
       
   338         }
       
   339         return res;
       
   340     }
       
   341 
       
   342     /**
       
   343      * Given a series of components, create a bytecode name for a classfile.
       
   344      * This is the inverse of {@link #parseBytecodeName(java.lang.String)}.
       
   345      * Each component must either be an interned one-character string of
       
   346      * a dangerous character, or else a safe bytecode name.
       
   347      * @param components a series of name components
       
   348      * @return the concatenation of all components
       
   349      * @throws IllegalArgumentException if any component contains an unsafe
       
   350      *          character, and is not an interned one-character string
       
   351      * @throws NullPointerException if any component is null
       
   352      */
       
   353     public static String unparseBytecodeName(Object[] components) {
       
   354         Object[] components0 = components;
       
   355         for (int i = 0; i < components.length; i++) {
       
   356             Object c = components[i];
       
   357             if (c instanceof String) {
       
   358                 String mc = toBytecodeName((String) c);
       
   359                 if (i == 0 && components.length == 1)
       
   360                     return mc;  // usual case
       
   361                 if ((Object)mc != c) {
       
   362                     if (components == components0)
       
   363                         components = components.clone();
       
   364                     components[i] = c = mc;
       
   365                 }
       
   366             }
       
   367         }
       
   368         return appendAll(components);
       
   369     }
       
   370     private static String appendAll(Object[] components) {
       
   371         if (components.length <= 1) {
       
   372             if (components.length == 1) {
       
   373                 return String.valueOf(components[0]);
       
   374             }
       
   375             return "";
       
   376         }
       
   377         int slen = 0;
       
   378         for (Object c : components) {
       
   379             if (c instanceof String)
       
   380                 slen += String.valueOf(c).length();
       
   381             else
       
   382                 slen += 1;
       
   383         }
       
   384         StringBuilder sb = new StringBuilder(slen);
       
   385         for (Object c : components) {
       
   386             sb.append(c);
       
   387         }
       
   388         return sb.toString();
       
   389     }
       
   390 
       
   391     /**
       
   392      * Given a bytecode name, produce the corresponding display name.
       
   393      * This is the source name, plus quotes if needed.
       
   394      * If the bytecode name contains dangerous characters,
       
   395      * assume that they are being used as punctuation,
       
   396      * and pass them through unchanged.
       
   397      * Non-empty runs of non-dangerous characters are demangled
       
   398      * if necessary, and the resulting names are quoted if
       
   399      * they are not already valid Java identifiers, or if
       
   400      * they contain a dangerous character (i.e., dollar sign "$").
       
   401      * Single quotes are used when quoting.
       
   402      * Within quoted names, embedded single quotes and backslashes
       
   403      * are further escaped by prepended backslashes.
       
   404      *
       
   405      * @param s the original bytecode name (which may be qualified)
       
   406      * @return a human-readable presentation
       
   407      */
       
   408     public static String toDisplayName(String s) {
       
   409         Object[] components = parseBytecodeName(s);
       
   410         for (int i = 0; i < components.length; i++) {
       
   411             if (!(components[i] instanceof String))
       
   412                 continue;
       
   413             String sn = (String) components[i];
       
   414             // note that the name is already demangled!
       
   415             //sn = toSourceName(sn);
       
   416             if (!isJavaIdent(sn) || sn.indexOf('$') >=0 ) {
       
   417                 components[i] = quoteDisplay(sn);
       
   418             }
       
   419         }
       
   420         return appendAll(components);
       
   421     }
       
   422     private static boolean isJavaIdent(String s) {
       
   423         int slen = s.length();
       
   424         if (slen == 0)  return false;
       
   425         if (!Character.isJavaIdentifierStart(s.charAt(0)))
       
   426             return false;
       
   427         for (int i = 1; i < slen; i++) {
       
   428             if (!Character.isJavaIdentifierPart(s.charAt(i)))
       
   429                 return false;
       
   430         }
       
   431         return true;
       
   432     }
       
   433     private static String quoteDisplay(String s) {
       
   434         // TO DO:  Replace wierd characters in s by C-style escapes.
       
   435         return "'"+s.replaceAll("['\\\\]", "\\\\$0")+"'";
       
   436     }
       
   437 
       
   438     private static void checkSafeBytecodeName(String s)
       
   439             throws IllegalArgumentException {
       
   440         if (!isSafeBytecodeName(s)) {
       
   441             throw new IllegalArgumentException(s);
       
   442         }
       
   443     }
       
   444 
       
   445     /**
       
   446      * Report whether a simple name is safe as a bytecode name.
       
   447      * Such names are acceptable in class files as class, method, and field names.
       
   448      * Additionally, they are free of "dangerous" characters, even if those
       
   449      * characters are legal in some (or all) names in class files.
       
   450      * @param s the proposed bytecode name
       
   451      * @return true if the name is non-empty and all of its characters are safe
       
   452      */
       
   453     public static boolean isSafeBytecodeName(String s) {
       
   454         if (s.length() == 0)  return false;
       
   455         // check occurrences of each DANGEROUS char
       
   456         for (char xc : DANGEROUS_CHARS_A) {
       
   457             if (xc == ESCAPE_C)  continue;  // not really that dangerous
       
   458             if (s.indexOf(xc) >= 0)  return false;
       
   459         }
       
   460         return true;
       
   461     }
       
   462 
       
   463     /**
       
   464      * Report whether a character is safe in a bytecode name.
       
   465      * This is true of any unicode character except the following
       
   466      * <em>dangerous characters</em>: {@code ".;:$[]<>/"}.
       
   467      * @param s the proposed character
       
   468      * @return true if the character is safe to use in classfiles
       
   469      */
       
   470     public static boolean isSafeBytecodeChar(char c) {
       
   471         return DANGEROUS_CHARS.indexOf(c) < DANGEROUS_CHAR_FIRST_INDEX;
       
   472     }
       
   473 
       
   474     private static boolean looksMangled(String s) {
       
   475         return s.charAt(0) == ESCAPE_C;
       
   476     }
       
   477 
       
   478     private static String mangle(String s) {
       
   479         if (s.length() == 0)
       
   480             return NULL_ESCAPE;
       
   481 
       
   482         // build this lazily, when we first need an escape:
       
   483         StringBuilder sb = null;
       
   484 
       
   485         for (int i = 0, slen = s.length(); i < slen; i++) {
       
   486             char c = s.charAt(i);
       
   487 
       
   488             boolean needEscape = false;
       
   489             if (c == ESCAPE_C) {
       
   490                 if (i+1 < slen) {
       
   491                     char c1 = s.charAt(i+1);
       
   492                     if ((i == 0 && c1 == NULL_ESCAPE_C)
       
   493                         || c1 != originalOfReplacement(c1)) {
       
   494                         // an accidental escape
       
   495                         needEscape = true;
       
   496                     }
       
   497                 }
       
   498             } else {
       
   499                 needEscape = isDangerous(c);
       
   500             }
       
   501 
       
   502             if (!needEscape) {
       
   503                 if (sb != null)  sb.append(c);
       
   504                 continue;
       
   505             }
       
   506 
       
   507             // build sb if this is the first escape
       
   508             if (sb == null) {
       
   509                 sb = new StringBuilder(s.length()+10);
       
   510                 // mangled names must begin with a backslash:
       
   511                 if (s.charAt(0) != ESCAPE_C && i > 0)
       
   512                     sb.append(NULL_ESCAPE);
       
   513                 // append the string so far, which is unremarkable:
       
   514                 sb.append(s.substring(0, i));
       
   515             }
       
   516 
       
   517             // rewrite \ to \-, / to \|, etc.
       
   518             sb.append(ESCAPE_C);
       
   519             sb.append(replacementOf(c));
       
   520         }
       
   521 
       
   522         if (sb != null)   return sb.toString();
       
   523 
       
   524         return s;
       
   525     }
       
   526 
       
   527     private static String demangle(String s) {
       
   528         // build this lazily, when we first meet an escape:
       
   529         StringBuilder sb = null;
       
   530 
       
   531         int stringStart = 0;
       
   532         if (s.startsWith(NULL_ESCAPE))
       
   533             stringStart = 2;
       
   534 
       
   535         for (int i = stringStart, slen = s.length(); i < slen; i++) {
       
   536             char c = s.charAt(i);
       
   537 
       
   538             if (c == ESCAPE_C && i+1 < slen) {
       
   539                 // might be an escape sequence
       
   540                 char rc = s.charAt(i+1);
       
   541                 char oc = originalOfReplacement(rc);
       
   542                 if (oc != rc) {
       
   543                     // build sb if this is the first escape
       
   544                     if (sb == null) {
       
   545                         sb = new StringBuilder(s.length());
       
   546                         // append the string so far, which is unremarkable:
       
   547                         sb.append(s.substring(stringStart, i));
       
   548                     }
       
   549                     ++i;  // skip both characters
       
   550                     c = oc;
       
   551                 }
       
   552             }
       
   553 
       
   554             if (sb != null)
       
   555                 sb.append(c);
       
   556         }
       
   557 
       
   558         if (sb != null)   return sb.toString();
       
   559 
       
   560         return s.substring(stringStart);
       
   561     }
       
   562 
       
   563     static char ESCAPE_C = '\\';
       
   564     // empty escape sequence to avoid a null name or illegal prefix
       
   565     static char NULL_ESCAPE_C = '=';
       
   566     static String NULL_ESCAPE = ESCAPE_C+""+NULL_ESCAPE_C;
       
   567 
       
   568     static final String DANGEROUS_CHARS   = "\\/.;:$[]<>"; // \\ must be first
       
   569     static final String REPLACEMENT_CHARS =  "-|,?!%{}^_";
       
   570     static final int DANGEROUS_CHAR_FIRST_INDEX = 1; // index after \\
       
   571     static char[] DANGEROUS_CHARS_A   = DANGEROUS_CHARS.toCharArray();
       
   572     static char[] REPLACEMENT_CHARS_A = REPLACEMENT_CHARS.toCharArray();
       
   573     static final Character[] DANGEROUS_CHARS_CA;
       
   574     static {
       
   575         Character[] dcca = new Character[DANGEROUS_CHARS.length()];
       
   576         for (int i = 0; i < dcca.length; i++)
       
   577             dcca[i] = Character.valueOf(DANGEROUS_CHARS.charAt(i));
       
   578         DANGEROUS_CHARS_CA = dcca;
       
   579     }
       
   580 
       
   581     static final long[] SPECIAL_BITMAP = new long[2];  // 128 bits
       
   582     static {
       
   583         String SPECIAL = DANGEROUS_CHARS + REPLACEMENT_CHARS;
       
   584         //System.out.println("SPECIAL = "+SPECIAL);
       
   585         for (char c : SPECIAL.toCharArray()) {
       
   586             SPECIAL_BITMAP[c >>> 6] |= 1L << c;
       
   587         }
       
   588     }
       
   589     static boolean isSpecial(char c) {
       
   590         if ((c >>> 6) < SPECIAL_BITMAP.length)
       
   591             return ((SPECIAL_BITMAP[c >>> 6] >> c) & 1) != 0;
       
   592         else
       
   593             return false;
       
   594     }
       
   595     static char replacementOf(char c) {
       
   596         if (!isSpecial(c))  return c;
       
   597         int i = DANGEROUS_CHARS.indexOf(c);
       
   598         if (i < 0)  return c;
       
   599         return REPLACEMENT_CHARS.charAt(i);
       
   600     }
       
   601     static char originalOfReplacement(char c) {
       
   602         if (!isSpecial(c))  return c;
       
   603         int i = REPLACEMENT_CHARS.indexOf(c);
       
   604         if (i < 0)  return c;
       
   605         return DANGEROUS_CHARS.charAt(i);
       
   606     }
       
   607     static boolean isDangerous(char c) {
       
   608         if (!isSpecial(c))  return false;
       
   609         return (DANGEROUS_CHARS.indexOf(c) >= DANGEROUS_CHAR_FIRST_INDEX);
       
   610     }
       
   611     static int indexOfDangerousChar(String s, int from) {
       
   612         for (int i = from, slen = s.length(); i < slen; i++) {
       
   613             if (isDangerous(s.charAt(i)))
       
   614                 return i;
       
   615         }
       
   616         return -1;
       
   617     }
       
   618     static int lastIndexOfDangerousChar(String s, int from) {
       
   619         for (int i = Math.min(from, s.length()-1); i >= 0; i--) {
       
   620             if (isDangerous(s.charAt(i)))
       
   621                 return i;
       
   622         }
       
   623         return -1;
       
   624     }
       
   625 
       
   626 
       
   627 }