Merge
authorlana
Tue, 24 Jul 2012 11:05:30 -0700 (2012-07-24)
changeset 13268 23117496a054
parent 13267 75ebe516dded (diff)
parent 13265 046a8a4cdccb (current diff)
child 13269 b3258b6c8d5a
Merge
--- a/jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java	Mon Jul 23 12:38:21 2012 -0700
+++ b/jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java	Tue Jul 24 11:05:30 2012 -0700
@@ -602,7 +602,7 @@
         if (reader == null) {
             stream = xmlInputSource.getByteStream();
             if (stream == null) {
-                URL location = new URL(escapeNonUSAscii(expandedSystemId));
+                URL location = new URL(expandedSystemId);
                 URLConnection connect = location.openConnection();
                 if (!(connect instanceof HttpURLConnection)) {
                     stream = connect.getInputStream();
@@ -2586,76 +2586,6 @@
 
     } // fixURI(String):String
 
-    /**
-     * Escape invalid URI characters.
-     *
-     * Passed a URI that contains invalid characters (like spaces, non-ASCII Unicode characters, and the like),
-     * this function percent encodes the invalid characters per the URI specification (i.e., as a sequence of
-     * %-encoded UTF-8 octets).
-     *
-     * N.B. There are two problems. If the URI contains a '%' character, that might be an indication that
-     * the URI has already been escaped by the author, or it might be an invalid '%'. In the former case,
-     * it's important not to escape it, or we'll wind up with invalid, doubly-escaped '%'s. In the latter,
-     * the URI is broken if we don't encode it. Similarly, a '#' character might be the start of a fragment
-     * identifier or it might be an invalid '#'.
-     *
-     * Given that the former is vastly more likely than the latter in each case (most users are familiar with
-     * the magic status of '%' and '#' and they occur relatively infrequently in filenames, and if the user parses
-     * a proper Java File, we will already have %-escaped the URI), we simply assume that %'s and #'s are legit.
-     *
-     * Very rarely, we may be wrong. If so, tell the user to fix the clearly broken URI.
-     */
-    protected static String escapeNonUSAscii(String str) {
-        if (str == null) {
-            return str;
-        }
-        int len = str.length(), i=0, ch;
-        for (; i < len; i++) {
-            ch = str.charAt(i);
-            // if it's not an ASCII 7 character, break here, and use UTF-8 encoding
-            if (ch >= 128)
-                break;
-        }
-
-        // we saw no non-ascii-7 character
-        if (i == len) {
-            return str;
-        }
-
-        // get UTF-8 bytes for the string
-        StringBuffer buffer = new StringBuffer();
-        byte[] bytes = null;
-        byte b;
-        try {
-            bytes = str.getBytes("UTF-8");
-        } catch (java.io.UnsupportedEncodingException e) {
-            // should never happen
-            return str;
-        }
-
-        len = bytes.length;
-
-        // for each byte
-        for (i = 0; i < len; i++) {
-            b = bytes[i];
-            // for non-ascii character: make it positive, then escape
-            if (b < 0) {
-                ch = b + 256;
-                buffer.append('%');
-                buffer.append(gHexChs[ch >> 4]);
-                buffer.append(gHexChs[ch & 0xf]);
-            }
-            else if (b != '%' && b != '#' && gNeedEscaping[b]) {
-                buffer.append('%');
-                buffer.append(gAfterEscaping1[b]);
-                buffer.append(gAfterEscaping2[b]);
-            }
-            else {
-                buffer.append((char)b);
-            }
-        }
-        return buffer.toString();
-    }
 
     //
     // Package visible methods