600 baseSystemId = expandedSystemId; |
600 baseSystemId = expandedSystemId; |
601 } |
601 } |
602 if (reader == null) { |
602 if (reader == null) { |
603 stream = xmlInputSource.getByteStream(); |
603 stream = xmlInputSource.getByteStream(); |
604 if (stream == null) { |
604 if (stream == null) { |
605 URL location = new URL(escapeNonUSAscii(expandedSystemId)); |
605 URL location = new URL(expandedSystemId); |
606 URLConnection connect = location.openConnection(); |
606 URLConnection connect = location.openConnection(); |
607 if (!(connect instanceof HttpURLConnection)) { |
607 if (!(connect instanceof HttpURLConnection)) { |
608 stream = connect.getInputStream(); |
608 stream = connect.getInputStream(); |
609 } |
609 } |
610 else { |
610 else { |
2584 // done |
2584 // done |
2585 return str; |
2585 return str; |
2586 |
2586 |
2587 } // fixURI(String):String |
2587 } // fixURI(String):String |
2588 |
2588 |
2589 /** |
|
2590 * Escape invalid URI characters. |
|
2591 * |
|
2592 * Passed a URI that contains invalid characters (like spaces, non-ASCII Unicode characters, and the like), |
|
2593 * this function percent encodes the invalid characters per the URI specification (i.e., as a sequence of |
|
2594 * %-encoded UTF-8 octets). |
|
2595 * |
|
2596 * N.B. There are two problems. If the URI contains a '%' character, that might be an indication that |
|
2597 * the URI has already been escaped by the author, or it might be an invalid '%'. In the former case, |
|
2598 * it's important not to escape it, or we'll wind up with invalid, doubly-escaped '%'s. In the latter, |
|
2599 * the URI is broken if we don't encode it. Similarly, a '#' character might be the start of a fragment |
|
2600 * identifier or it might be an invalid '#'. |
|
2601 * |
|
2602 * Given that the former is vastly more likely than the latter in each case (most users are familiar with |
|
2603 * the magic status of '%' and '#' and they occur relatively infrequently in filenames, and if the user parses |
|
2604 * a proper Java File, we will already have %-escaped the URI), we simply assume that %'s and #'s are legit. |
|
2605 * |
|
2606 * Very rarely, we may be wrong. If so, tell the user to fix the clearly broken URI. |
|
2607 */ |
|
2608 protected static String escapeNonUSAscii(String str) { |
|
2609 if (str == null) { |
|
2610 return str; |
|
2611 } |
|
2612 int len = str.length(), i=0, ch; |
|
2613 for (; i < len; i++) { |
|
2614 ch = str.charAt(i); |
|
2615 // if it's not an ASCII 7 character, break here, and use UTF-8 encoding |
|
2616 if (ch >= 128) |
|
2617 break; |
|
2618 } |
|
2619 |
|
2620 // we saw no non-ascii-7 character |
|
2621 if (i == len) { |
|
2622 return str; |
|
2623 } |
|
2624 |
|
2625 // get UTF-8 bytes for the string |
|
2626 StringBuffer buffer = new StringBuffer(); |
|
2627 byte[] bytes = null; |
|
2628 byte b; |
|
2629 try { |
|
2630 bytes = str.getBytes("UTF-8"); |
|
2631 } catch (java.io.UnsupportedEncodingException e) { |
|
2632 // should never happen |
|
2633 return str; |
|
2634 } |
|
2635 |
|
2636 len = bytes.length; |
|
2637 |
|
2638 // for each byte |
|
2639 for (i = 0; i < len; i++) { |
|
2640 b = bytes[i]; |
|
2641 // for non-ascii character: make it positive, then escape |
|
2642 if (b < 0) { |
|
2643 ch = b + 256; |
|
2644 buffer.append('%'); |
|
2645 buffer.append(gHexChs[ch >> 4]); |
|
2646 buffer.append(gHexChs[ch & 0xf]); |
|
2647 } |
|
2648 else if (b != '%' && b != '#' && gNeedEscaping[b]) { |
|
2649 buffer.append('%'); |
|
2650 buffer.append(gAfterEscaping1[b]); |
|
2651 buffer.append(gAfterEscaping2[b]); |
|
2652 } |
|
2653 else { |
|
2654 buffer.append((char)b); |
|
2655 } |
|
2656 } |
|
2657 return buffer.toString(); |
|
2658 } |
|
2659 |
2589 |
2660 // |
2590 // |
2661 // Package visible methods |
2591 // Package visible methods |
2662 // |
2592 // |
2663 /** Prints the contents of the buffer. */ |
2593 /** Prints the contents of the buffer. */ |