jdk/src/java.base/share/classes/java/net/URI.java
changeset 47024 5bfe7700a8f7
parent 46152 51d10b05c78e
equal deleted inserted replaced
47023:c4d5e9974766 47024:5bfe7700a8f7
    35 import java.nio.charset.CharsetDecoder;
    35 import java.nio.charset.CharsetDecoder;
    36 import java.nio.charset.CoderResult;
    36 import java.nio.charset.CoderResult;
    37 import java.nio.charset.CodingErrorAction;
    37 import java.nio.charset.CodingErrorAction;
    38 import java.nio.charset.CharacterCodingException;
    38 import java.nio.charset.CharacterCodingException;
    39 import java.text.Normalizer;
    39 import java.text.Normalizer;
    40 import jdk.internal.loader.URLClassPath;
       
    41 import jdk.internal.misc.JavaNetUriAccess;
    40 import jdk.internal.misc.JavaNetUriAccess;
    42 import jdk.internal.misc.SharedSecrets;
    41 import jdk.internal.misc.SharedSecrets;
    43 import sun.nio.cs.ThreadLocalCoders;
    42 import sun.nio.cs.ThreadLocalCoders;
    44 
    43 
    45 import java.lang.Character;             // for javadoc
    44 import java.lang.Character;             // for javadoc
  2526     // This approach is more efficient than sequentially searching arrays of
  2525     // This approach is more efficient than sequentially searching arrays of
  2527     // permitted characters.  It could be made still more efficient by
  2526     // permitted characters.  It could be made still more efficient by
  2528     // precompiling the mask information so that a character's presence in a
  2527     // precompiling the mask information so that a character's presence in a
  2529     // given mask could be determined by a single table lookup.
  2528     // given mask could be determined by a single table lookup.
  2530 
  2529 
       
  2530     // To save startup time, we manually calculate the low-/highMask constants.
       
  2531     // For reference, the following methods were used to calculate the values:
       
  2532 
  2531     // Compute the low-order mask for the characters in the given string
  2533     // Compute the low-order mask for the characters in the given string
  2532     private static long lowMask(String chars) {
  2534     //     private static long lowMask(String chars) {
  2533         int n = chars.length();
  2535     //        int n = chars.length();
  2534         long m = 0;
  2536     //        long m = 0;
  2535         for (int i = 0; i < n; i++) {
  2537     //        for (int i = 0; i < n; i++) {
  2536             char c = chars.charAt(i);
  2538     //            char c = chars.charAt(i);
  2537             if (c < 64)
  2539     //            if (c < 64)
  2538                 m |= (1L << c);
  2540     //                m |= (1L << c);
  2539         }
  2541     //        }
  2540         return m;
  2542     //        return m;
  2541     }
  2543     //    }
  2542 
  2544 
  2543     // Compute the high-order mask for the characters in the given string
  2545     // Compute the high-order mask for the characters in the given string
  2544     private static long highMask(String chars) {
  2546     //    private static long highMask(String chars) {
  2545         int n = chars.length();
  2547     //        int n = chars.length();
  2546         long m = 0;
  2548     //        long m = 0;
  2547         for (int i = 0; i < n; i++) {
  2549     //        for (int i = 0; i < n; i++) {
  2548             char c = chars.charAt(i);
  2550     //            char c = chars.charAt(i);
  2549             if ((c >= 64) && (c < 128))
  2551     //            if ((c >= 64) && (c < 128))
  2550                 m |= (1L << (c - 64));
  2552     //                m |= (1L << (c - 64));
  2551         }
  2553     //        }
  2552         return m;
  2554     //        return m;
  2553     }
  2555     //    }
  2554 
  2556 
  2555     // Compute a low-order mask for the characters
  2557     // Compute a low-order mask for the characters
  2556     // between first and last, inclusive
  2558     // between first and last, inclusive
  2557     private static long lowMask(char first, char last) {
  2559     //    private static long lowMask(char first, char last) {
  2558         long m = 0;
  2560     //        long m = 0;
  2559         int f = Math.max(Math.min(first, 63), 0);
  2561     //        int f = Math.max(Math.min(first, 63), 0);
  2560         int l = Math.max(Math.min(last, 63), 0);
  2562     //        int l = Math.max(Math.min(last, 63), 0);
  2561         for (int i = f; i <= l; i++)
  2563     //        for (int i = f; i <= l; i++)
  2562             m |= 1L << i;
  2564     //            m |= 1L << i;
  2563         return m;
  2565     //        return m;
  2564     }
  2566     //    }
  2565 
  2567 
  2566     // Compute a high-order mask for the characters
  2568     // Compute a high-order mask for the characters
  2567     // between first and last, inclusive
  2569     // between first and last, inclusive
  2568     private static long highMask(char first, char last) {
  2570     //    private static long highMask(char first, char last) {
  2569         long m = 0;
  2571     //        long m = 0;
  2570         int f = Math.max(Math.min(first, 127), 64) - 64;
  2572     //        int f = Math.max(Math.min(first, 127), 64) - 64;
  2571         int l = Math.max(Math.min(last, 127), 64) - 64;
  2573     //        int l = Math.max(Math.min(last, 127), 64) - 64;
  2572         for (int i = f; i <= l; i++)
  2574     //        for (int i = f; i <= l; i++)
  2573             m |= 1L << i;
  2575     //            m |= 1L << i;
  2574         return m;
  2576     //        return m;
  2575     }
  2577     //    }
  2576 
  2578 
  2577     // Tell whether the given character is permitted by the given mask pair
  2579     // Tell whether the given character is permitted by the given mask pair
  2578     private static boolean match(char c, long lowMask, long highMask) {
  2580     private static boolean match(char c, long lowMask, long highMask) {
  2579         if (c == 0) // 0 doesn't have a slot in the mask. So, it never matches.
  2581         if (c == 0) // 0 doesn't have a slot in the mask. So, it never matches.
  2580             return false;
  2582             return false;
  2588     // Character-class masks, in reverse order from RFC2396 because
  2590     // Character-class masks, in reverse order from RFC2396 because
  2589     // initializers for static fields cannot make forward references.
  2591     // initializers for static fields cannot make forward references.
  2590 
  2592 
  2591     // digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
  2593     // digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
  2592     //            "8" | "9"
  2594     //            "8" | "9"
  2593     private static final long L_DIGIT = lowMask('0', '9');
  2595     private static final long L_DIGIT = 0x3FF000000000000L; // lowMask('0', '9');
  2594     private static final long H_DIGIT = 0L;
  2596     private static final long H_DIGIT = 0L;
  2595 
  2597 
  2596     // upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
  2598     // upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
  2597     //            "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
  2599     //            "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
  2598     //            "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
  2600     //            "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
  2599     private static final long L_UPALPHA = 0L;
  2601     private static final long L_UPALPHA = 0L;
  2600     private static final long H_UPALPHA = highMask('A', 'Z');
  2602     private static final long H_UPALPHA = 0x7FFFFFEL; // highMask('A', 'Z');
  2601 
  2603 
  2602     // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
  2604     // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
  2603     //            "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
  2605     //            "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
  2604     //            "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
  2606     //            "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
  2605     private static final long L_LOWALPHA = 0L;
  2607     private static final long L_LOWALPHA = 0L;
  2606     private static final long H_LOWALPHA = highMask('a', 'z');
  2608     private static final long H_LOWALPHA = 0x7FFFFFE00000000L; // highMask('a', 'z');
  2607 
  2609 
  2608     // alpha         = lowalpha | upalpha
  2610     // alpha         = lowalpha | upalpha
  2609     private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA;
  2611     private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA;
  2610     private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA;
  2612     private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA;
  2611 
  2613 
  2614     private static final long H_ALPHANUM = H_DIGIT | H_ALPHA;
  2616     private static final long H_ALPHANUM = H_DIGIT | H_ALPHA;
  2615 
  2617 
  2616     // hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
  2618     // hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
  2617     //                         "a" | "b" | "c" | "d" | "e" | "f"
  2619     //                         "a" | "b" | "c" | "d" | "e" | "f"
  2618     private static final long L_HEX = L_DIGIT;
  2620     private static final long L_HEX = L_DIGIT;
  2619     private static final long H_HEX = highMask('A', 'F') | highMask('a', 'f');
  2621     private static final long H_HEX = 0x7E0000007EL; // highMask('A', 'F') | highMask('a', 'f');
  2620 
  2622 
  2621     // mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
  2623     // mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
  2622     //                 "(" | ")"
  2624     //                 "(" | ")"
  2623     private static final long L_MARK = lowMask("-_.!~*'()");
  2625     private static final long L_MARK = 0x678200000000L; // lowMask("-_.!~*'()");
  2624     private static final long H_MARK = highMask("-_.!~*'()");
  2626     private static final long H_MARK = 0x4000000080000000L; // highMask("-_.!~*'()");
  2625 
  2627 
  2626     // unreserved    = alphanum | mark
  2628     // unreserved    = alphanum | mark
  2627     private static final long L_UNRESERVED = L_ALPHANUM | L_MARK;
  2629     private static final long L_UNRESERVED = L_ALPHANUM | L_MARK;
  2628     private static final long H_UNRESERVED = H_ALPHANUM | H_MARK;
  2630     private static final long H_UNRESERVED = H_ALPHANUM | H_MARK;
  2629 
  2631 
  2630     // reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
  2632     // reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
  2631     //                 "$" | "," | "[" | "]"
  2633     //                 "$" | "," | "[" | "]"
  2632     // Added per RFC2732: "[", "]"
  2634     // Added per RFC2732: "[", "]"
  2633     private static final long L_RESERVED = lowMask(";/?:@&=+$,[]");
  2635     private static final long L_RESERVED = 0xAC00985000000000L; // lowMask(";/?:@&=+$,[]");
  2634     private static final long H_RESERVED = highMask(";/?:@&=+$,[]");
  2636     private static final long H_RESERVED = 0x28000001L; // highMask(";/?:@&=+$,[]");
  2635 
  2637 
  2636     // The zero'th bit is used to indicate that escape pairs and non-US-ASCII
  2638     // The zero'th bit is used to indicate that escape pairs and non-US-ASCII
  2637     // characters are allowed; this is handled by the scanEscape method below.
  2639     // characters are allowed; this is handled by the scanEscape method below.
  2638     private static final long L_ESCAPED = 1L;
  2640     private static final long L_ESCAPED = 1L;
  2639     private static final long H_ESCAPED = 0L;
  2641     private static final long H_ESCAPED = 0L;
  2643     private static final long H_URIC = H_RESERVED | H_UNRESERVED | H_ESCAPED;
  2645     private static final long H_URIC = H_RESERVED | H_UNRESERVED | H_ESCAPED;
  2644 
  2646 
  2645     // pchar         = unreserved | escaped |
  2647     // pchar         = unreserved | escaped |
  2646     //                 ":" | "@" | "&" | "=" | "+" | "$" | ","
  2648     //                 ":" | "@" | "&" | "=" | "+" | "$" | ","
  2647     private static final long L_PCHAR
  2649     private static final long L_PCHAR
  2648         = L_UNRESERVED | L_ESCAPED | lowMask(":@&=+$,");
  2650         = L_UNRESERVED | L_ESCAPED | 0x2400185000000000L; // lowMask(":@&=+$,");
  2649     private static final long H_PCHAR
  2651     private static final long H_PCHAR
  2650         = H_UNRESERVED | H_ESCAPED | highMask(":@&=+$,");
  2652         = H_UNRESERVED | H_ESCAPED | 0x1L; // highMask(":@&=+$,");
  2651 
  2653 
  2652     // All valid path characters
  2654     // All valid path characters
  2653     private static final long L_PATH = L_PCHAR | lowMask(";/");
  2655     private static final long L_PATH = L_PCHAR | 0x800800000000000L; // lowMask(";/");
  2654     private static final long H_PATH = H_PCHAR | highMask(";/");
  2656     private static final long H_PATH = H_PCHAR; // highMask(";/") == 0x0L;
  2655 
  2657 
  2656     // Dash, for use in domainlabel and toplabel
  2658     // Dash, for use in domainlabel and toplabel
  2657     private static final long L_DASH = lowMask("-");
  2659     private static final long L_DASH = 0x200000000000L; // lowMask("-");
  2658     private static final long H_DASH = highMask("-");
  2660     private static final long H_DASH = 0x0L; // highMask("-");
  2659 
  2661 
  2660     // Dot, for use in hostnames
  2662     // Dot, for use in hostnames
  2661     private static final long L_DOT = lowMask(".");
  2663     private static final long L_DOT = 0x400000000000L; // lowMask(".");
  2662     private static final long H_DOT = highMask(".");
  2664     private static final long H_DOT = 0x0L; // highMask(".");
  2663 
  2665 
  2664     // userinfo      = *( unreserved | escaped |
  2666     // userinfo      = *( unreserved | escaped |
  2665     //                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
  2667     //                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
  2666     private static final long L_USERINFO
  2668     private static final long L_USERINFO
  2667         = L_UNRESERVED | L_ESCAPED | lowMask(";:&=+$,");
  2669         = L_UNRESERVED | L_ESCAPED | 0x2C00185000000000L; // lowMask(";:&=+$,");
  2668     private static final long H_USERINFO
  2670     private static final long H_USERINFO
  2669         = H_UNRESERVED | H_ESCAPED | highMask(";:&=+$,");
  2671         = H_UNRESERVED | H_ESCAPED; // | highMask(";:&=+$,") == 0L;
  2670 
  2672 
  2671     // reg_name      = 1*( unreserved | escaped | "$" | "," |
  2673     // reg_name      = 1*( unreserved | escaped | "$" | "," |
  2672     //                     ";" | ":" | "@" | "&" | "=" | "+" )
  2674     //                     ";" | ":" | "@" | "&" | "=" | "+" )
  2673     private static final long L_REG_NAME
  2675     private static final long L_REG_NAME
  2674         = L_UNRESERVED | L_ESCAPED | lowMask("$,;:@&=+");
  2676         = L_UNRESERVED | L_ESCAPED | 0x2C00185000000000L; // lowMask("$,;:@&=+");
  2675     private static final long H_REG_NAME
  2677     private static final long H_REG_NAME
  2676         = H_UNRESERVED | H_ESCAPED | highMask("$,;:@&=+");
  2678         = H_UNRESERVED | H_ESCAPED | 0x1L; // highMask("$,;:@&=+");
  2677 
  2679 
  2678     // All valid characters for server-based authorities
  2680     // All valid characters for server-based authorities
  2679     private static final long L_SERVER
  2681     private static final long L_SERVER
  2680         = L_USERINFO | L_ALPHANUM | L_DASH | lowMask(".:@[]");
  2682         = L_USERINFO | L_ALPHANUM | L_DASH | 0x400400000000000L; // lowMask(".:@[]");
  2681     private static final long H_SERVER
  2683     private static final long H_SERVER
  2682         = H_USERINFO | H_ALPHANUM | H_DASH | highMask(".:@[]");
  2684         = H_USERINFO | H_ALPHANUM | H_DASH | 0x28000001L; // highMask(".:@[]");
  2683 
  2685 
  2684     // Special case of server authority that represents an IPv6 address
  2686     // Special case of server authority that represents an IPv6 address
  2685     // In this case, a % does not signify an escape sequence
  2687     // In this case, a % does not signify an escape sequence
  2686     private static final long L_SERVER_PERCENT
  2688     private static final long L_SERVER_PERCENT
  2687         = L_SERVER | lowMask("%");
  2689         = L_SERVER | 0x2000000000L; // lowMask("%");
  2688     private static final long H_SERVER_PERCENT
  2690     private static final long H_SERVER_PERCENT
  2689         = H_SERVER | highMask("%");
  2691         = H_SERVER; // | highMask("%") == 0L;
  2690     private static final long L_LEFT_BRACKET = lowMask("[");
       
  2691     private static final long H_LEFT_BRACKET = highMask("[");
       
  2692 
  2692 
  2693     // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
  2693     // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
  2694     private static final long L_SCHEME = L_ALPHA | L_DIGIT | lowMask("+-.");
  2694     private static final long L_SCHEME = L_ALPHA | L_DIGIT | 0x680000000000L; // lowMask("+-.");
  2695     private static final long H_SCHEME = H_ALPHA | H_DIGIT | highMask("+-.");
  2695     private static final long H_SCHEME = H_ALPHA | H_DIGIT; // | highMask("+-.") == 0L
  2696 
  2696 
  2697     // scope_id = alpha | digit | "_" | "."
  2697     // scope_id = alpha | digit | "_" | "."
  2698     private static final long L_SCOPE_ID
  2698     private static final long L_SCOPE_ID
  2699         = L_ALPHANUM | lowMask("_.");
  2699         = L_ALPHANUM | 0x400000000000L; // lowMask("_.");
  2700     private static final long H_SCOPE_ID
  2700     private static final long H_SCOPE_ID
  2701         = H_ALPHANUM | highMask("_.");
  2701         = H_ALPHANUM | 0x80000000L; // highMask("_.");
  2702 
  2702 
  2703     // -- Escaping and encoding --
  2703     // -- Escaping and encoding --
  2704 
  2704 
  2705     private static final char[] hexDigits = {
  2705     private static final char[] hexDigits = {
  2706         '0', '1', '2', '3', '4', '5', '6', '7',
  2706         '0', '1', '2', '3', '4', '5', '6', '7',