# HG changeset patch # User lana # Date 1343153130 25200 # Node ID 23117496a0544f66d5d63acc4ee2f737c68104b0 # Parent 75ebe516ddedf5e479d3a0cc4937bf4d88875ac1# Parent 046a8a4cdccb514860cd674d7e039e4e64f8325a Merge diff -r 046a8a4cdccb -r 23117496a054 jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java --- a/jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java Mon Jul 23 12:38:21 2012 -0700 +++ b/jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java Tue Jul 24 11:05:30 2012 -0700 @@ -602,7 +602,7 @@ if (reader == null) { stream = xmlInputSource.getByteStream(); if (stream == null) { - URL location = new URL(escapeNonUSAscii(expandedSystemId)); + URL location = new URL(expandedSystemId); URLConnection connect = location.openConnection(); if (!(connect instanceof HttpURLConnection)) { stream = connect.getInputStream(); @@ -2586,76 +2586,6 @@ } // fixURI(String):String - /** - * Escape invalid URI characters. - * - * Passed a URI that contains invalid characters (like spaces, non-ASCII Unicode characters, and the like), - * this function percent encodes the invalid characters per the URI specification (i.e., as a sequence of - * %-encoded UTF-8 octets). - * - * N.B. There are two problems. If the URI contains a '%' character, that might be an indication that - * the URI has already been escaped by the author, or it might be an invalid '%'. In the former case, - * it's important not to escape it, or we'll wind up with invalid, doubly-escaped '%'s. In the latter, - * the URI is broken if we don't encode it. Similarly, a '#' character might be the start of a fragment - * identifier or it might be an invalid '#'. - * - * Given that the former is vastly more likely than the latter in each case (most users are familiar with - * the magic status of '%' and '#' and they occur relatively infrequently in filenames, and if the user parses - * a proper Java File, we will already have %-escaped the URI), we simply assume that %'s and #'s are legit. - * - * Very rarely, we may be wrong. If so, tell the user to fix the clearly broken URI. - */ - protected static String escapeNonUSAscii(String str) { - if (str == null) { - return str; - } - int len = str.length(), i=0, ch; - for (; i < len; i++) { - ch = str.charAt(i); - // if it's not an ASCII 7 character, break here, and use UTF-8 encoding - if (ch >= 128) - break; - } - - // we saw no non-ascii-7 character - if (i == len) { - return str; - } - - // get UTF-8 bytes for the string - StringBuffer buffer = new StringBuffer(); - byte[] bytes = null; - byte b; - try { - bytes = str.getBytes("UTF-8"); - } catch (java.io.UnsupportedEncodingException e) { - // should never happen - return str; - } - - len = bytes.length; - - // for each byte - for (i = 0; i < len; i++) { - b = bytes[i]; - // for non-ascii character: make it positive, then escape - if (b < 0) { - ch = b + 256; - buffer.append('%'); - buffer.append(gHexChs[ch >> 4]); - buffer.append(gHexChs[ch & 0xf]); - } - else if (b != '%' && b != '#' && gNeedEscaping[b]) { - buffer.append('%'); - buffer.append(gAfterEscaping1[b]); - buffer.append(gAfterEscaping2[b]); - } - else { - buffer.append((char)b); - } - } - return buffer.toString(); - } // // Package visible methods