jdk/src/java.base/share/classes/java/net/URI.java
changeset 34887 fcac26ad0c56
parent 34783 337afb24ec6c
child 34939 4fd867517aec
equal deleted inserted replaced
34886:f3ee5206aa01 34887:fcac26ad0c56
  2057         if (child.isOpaque() || base.isOpaque())
  2057         if (child.isOpaque() || base.isOpaque())
  2058             return child;
  2058             return child;
  2059 
  2059 
  2060         // 5.2 (2): Reference to current document (lone fragment)
  2060         // 5.2 (2): Reference to current document (lone fragment)
  2061         if ((child.scheme == null) && (child.authority == null)
  2061         if ((child.scheme == null) && (child.authority == null)
  2062             && child.path.equals("") && (child.fragment != null)
  2062             && child.path.isEmpty() && (child.fragment != null)
  2063             && (child.query == null)) {
  2063             && (child.query == null)) {
  2064             if ((base.fragment != null)
  2064             if ((base.fragment != null)
  2065                 && child.fragment.equals(base.fragment)) {
  2065                 && child.fragment.equals(base.fragment)) {
  2066                 return base;
  2066                 return base;
  2067             }
  2067             }
  2644     private static final long H_LEFT_BRACKET = highMask("[");
  2644     private static final long H_LEFT_BRACKET = highMask("[");
  2645 
  2645 
  2646     // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
  2646     // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
  2647     private static final long L_SCHEME = L_ALPHA | L_DIGIT | lowMask("+-.");
  2647     private static final long L_SCHEME = L_ALPHA | L_DIGIT | lowMask("+-.");
  2648     private static final long H_SCHEME = H_ALPHA | H_DIGIT | highMask("+-.");
  2648     private static final long H_SCHEME = H_ALPHA | H_DIGIT | highMask("+-.");
  2649 
       
  2650     // uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
       
  2651     //                 "&" | "=" | "+" | "$" | ","
       
  2652     private static final long L_URIC_NO_SLASH
       
  2653         = L_UNRESERVED | L_ESCAPED | lowMask(";?:@&=+$,");
       
  2654     private static final long H_URIC_NO_SLASH
       
  2655         = H_UNRESERVED | H_ESCAPED | highMask(";?:@&=+$,");
       
  2656 
  2649 
  2657     // scope_id = alpha | digit | "_" | "."
  2650     // scope_id = alpha | digit | "_" | "."
  2658     private static final long L_SCOPE_ID
  2651     private static final long L_SCOPE_ID
  2659         = L_ALPHANUM | lowMask("_.");
  2652         = L_ALPHANUM | lowMask("_.");
  2660     private static final long H_SCOPE_ID
  2653     private static final long H_SCOPE_ID
  2882         }
  2875         }
  2883 
  2876 
  2884 
  2877 
  2885         // -- Simple access to the input string --
  2878         // -- Simple access to the input string --
  2886 
  2879 
  2887         // Return a substring of the input string
       
  2888         //
       
  2889         private String substring(int start, int end) {
       
  2890             return input.substring(start, end);
       
  2891         }
       
  2892 
       
  2893         // Return the char at position p,
       
  2894         // assuming that p < input.length()
       
  2895         //
       
  2896         private char charAt(int p) {
       
  2897             return input.charAt(p);
       
  2898         }
       
  2899 
       
  2900         // Tells whether start < end and, if so, whether charAt(start) == c
  2880         // Tells whether start < end and, if so, whether charAt(start) == c
  2901         //
  2881         //
  2902         private boolean at(int start, int end, char c) {
  2882         private boolean at(int start, int end, char c) {
  2903             return (start < end) && (charAt(start) == c);
  2883             return (start < end) && (input.charAt(start) == c);
  2904         }
  2884         }
  2905 
  2885 
  2906         // Tells whether start + s.length() < end and, if so,
  2886         // Tells whether start + s.length() < end and, if so,
  2907         // whether the chars at the start position match s exactly
  2887         // whether the chars at the start position match s exactly
  2908         //
  2888         //
  2911             int sn = s.length();
  2891             int sn = s.length();
  2912             if (sn > end - p)
  2892             if (sn > end - p)
  2913                 return false;
  2893                 return false;
  2914             int i = 0;
  2894             int i = 0;
  2915             while (i < sn) {
  2895             while (i < sn) {
  2916                 if (charAt(p++) != s.charAt(i)) {
  2896                 if (input.charAt(p++) != s.charAt(i)) {
  2917                     break;
  2897                     break;
  2918                 }
  2898                 }
  2919                 i++;
  2899                 i++;
  2920             }
  2900             }
  2921             return (i == sn);
  2901             return (i == sn);
  2951         // Scan a specific char: If the char at the given start position is
  2931         // Scan a specific char: If the char at the given start position is
  2952         // equal to c, return the index of the next char; otherwise, return the
  2932         // equal to c, return the index of the next char; otherwise, return the
  2953         // start position.
  2933         // start position.
  2954         //
  2934         //
  2955         private int scan(int start, int end, char c) {
  2935         private int scan(int start, int end, char c) {
  2956             if ((start < end) && (charAt(start) == c))
  2936             if ((start < end) && (input.charAt(start) == c))
  2957                 return start + 1;
  2937                 return start + 1;
  2958             return start;
  2938             return start;
  2959         }
  2939         }
  2960 
  2940 
  2961         // Scan forward from the given start position.  Stop at the first char
  2941         // Scan forward from the given start position.  Stop at the first char
  2966         // nothing matches.
  2946         // nothing matches.
  2967         //
  2947         //
  2968         private int scan(int start, int end, String err, String stop) {
  2948         private int scan(int start, int end, String err, String stop) {
  2969             int p = start;
  2949             int p = start;
  2970             while (p < end) {
  2950             while (p < end) {
  2971                 char c = charAt(p);
  2951                 char c = input.charAt(p);
  2972                 if (err.indexOf(c) >= 0)
  2952                 if (err.indexOf(c) >= 0)
  2973                     return -1;
  2953                     return -1;
       
  2954                 if (stop.indexOf(c) >= 0)
       
  2955                     break;
       
  2956                 p++;
       
  2957             }
       
  2958             return p;
       
  2959         }
       
  2960 
       
  2961         // Scan forward from the given start position.  Stop at the first char
       
  2962         // in the stop string (in which case the index of the preceding char is
       
  2963         // returned), or the end of the input string (in which case the length
       
  2964         // of the input string is returned).  May return the start position if
       
  2965         // nothing matches.
       
  2966         //
       
  2967         private int scan(int start, int end, String stop) {
       
  2968             int p = start;
       
  2969             while (p < end) {
       
  2970                 char c = input.charAt(p);
  2974                 if (stop.indexOf(c) >= 0)
  2971                 if (stop.indexOf(c) >= 0)
  2975                     break;
  2972                     break;
  2976                 p++;
  2973                 p++;
  2977             }
  2974             }
  2978             return p;
  2975             return p;
  2990             int p = start;
  2987             int p = start;
  2991             char c = first;
  2988             char c = first;
  2992             if (c == '%') {
  2989             if (c == '%') {
  2993                 // Process escape pair
  2990                 // Process escape pair
  2994                 if ((p + 3 <= n)
  2991                 if ((p + 3 <= n)
  2995                     && match(charAt(p + 1), L_HEX, H_HEX)
  2992                     && match(input.charAt(p + 1), L_HEX, H_HEX)
  2996                     && match(charAt(p + 2), L_HEX, H_HEX)) {
  2993                     && match(input.charAt(p + 2), L_HEX, H_HEX)) {
  2997                     return p + 3;
  2994                     return p + 3;
  2998                 }
  2995                 }
  2999                 fail("Malformed escape pair", p);
  2996                 fail("Malformed escape pair", p);
  3000             } else if ((c > 128)
  2997             } else if ((c > 128)
  3001                        && !Character.isSpaceChar(c)
  2998                        && !Character.isSpaceChar(c)
  3011         private int scan(int start, int n, long lowMask, long highMask)
  3008         private int scan(int start, int n, long lowMask, long highMask)
  3012             throws URISyntaxException
  3009             throws URISyntaxException
  3013         {
  3010         {
  3014             int p = start;
  3011             int p = start;
  3015             while (p < n) {
  3012             while (p < n) {
  3016                 char c = charAt(p);
  3013                 char c = input.charAt(p);
  3017                 if (match(c, lowMask, highMask)) {
  3014                 if (match(c, lowMask, highMask)) {
  3018                     p++;
  3015                     p++;
  3019                     continue;
  3016                     continue;
  3020                 }
  3017                 }
  3021                 if ((lowMask & L_ESCAPED) != 0) {
  3018                 if ((lowMask & L_ESCAPED) != 0) {
  3065             if ((p >= 0) && at(p, n, ':')) {
  3062             if ((p >= 0) && at(p, n, ':')) {
  3066                 if (p == 0)
  3063                 if (p == 0)
  3067                     failExpecting("scheme name", 0);
  3064                     failExpecting("scheme name", 0);
  3068                 checkChar(0, L_ALPHA, H_ALPHA, "scheme name");
  3065                 checkChar(0, L_ALPHA, H_ALPHA, "scheme name");
  3069                 checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");
  3066                 checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");
  3070                 scheme = substring(0, p);
  3067                 scheme = input.substring(0, p);
  3071                 p++;                    // Skip ':'
  3068                 p++;                    // Skip ':'
  3072                 ssp = p;
  3069                 ssp = p;
  3073                 if (at(p, n, '/')) {
  3070                 if (at(p, n, '/')) {
  3074                     p = parseHierarchical(p, n);
  3071                     p = parseHierarchical(p, n);
  3075                 } else {
  3072                 } else {
  3076                     int q = scan(p, n, "", "#");
  3073                     int q = scan(p, n, "#");
  3077                     if (q <= p)
  3074                     if (q <= p)
  3078                         failExpecting("scheme-specific part", p);
  3075                         failExpecting("scheme-specific part", p);
  3079                     checkChars(p, q, L_URIC, H_URIC, "opaque part");
  3076                     checkChars(p, q, L_URIC, H_URIC, "opaque part");
  3080                     p = q;
  3077                     p = q;
  3081                 }
  3078                 }
  3082             } else {
  3079             } else {
  3083                 ssp = 0;
  3080                 ssp = 0;
  3084                 p = parseHierarchical(0, n);
  3081                 p = parseHierarchical(0, n);
  3085             }
  3082             }
  3086             schemeSpecificPart = substring(ssp, p);
  3083             schemeSpecificPart = input.substring(ssp, p);
  3087             if (at(p, n, '#')) {
  3084             if (at(p, n, '#')) {
  3088                 checkChars(p + 1, n, L_URIC, H_URIC, "fragment");
  3085                 checkChars(p + 1, n, L_URIC, H_URIC, "fragment");
  3089                 fragment = substring(p + 1, n);
  3086                 fragment = input.substring(p + 1, n);
  3090                 p = n;
  3087                 p = n;
  3091             }
  3088             }
  3092             if (p < n)
  3089             if (p < n)
  3093                 fail("end of URI", p);
  3090                 fail("end of URI", p);
  3094         }
  3091         }
  3111             throws URISyntaxException
  3108             throws URISyntaxException
  3112         {
  3109         {
  3113             int p = start;
  3110             int p = start;
  3114             if (at(p, n, '/') && at(p + 1, n, '/')) {
  3111             if (at(p, n, '/') && at(p + 1, n, '/')) {
  3115                 p += 2;
  3112                 p += 2;
  3116                 int q = scan(p, n, "", "/?#");
  3113                 int q = scan(p, n, "/?#");
  3117                 if (q > p) {
  3114                 if (q > p) {
  3118                     p = parseAuthority(p, q);
  3115                     p = parseAuthority(p, q);
  3119                 } else if (q < n) {
  3116                 } else if (q < n) {
  3120                     // DEVIATION: Allow empty authority prior to non-empty
  3117                     // DEVIATION: Allow empty authority prior to non-empty
  3121                     // path, query component or fragment identifier
  3118                     // path, query component or fragment identifier
  3122                 } else
  3119                 } else
  3123                     failExpecting("authority", p);
  3120                     failExpecting("authority", p);
  3124             }
  3121             }
  3125             int q = scan(p, n, "", "?#"); // DEVIATION: May be empty
  3122             int q = scan(p, n, "?#"); // DEVIATION: May be empty
  3126             checkChars(p, q, L_PATH, H_PATH, "path");
  3123             checkChars(p, q, L_PATH, H_PATH, "path");
  3127             path = substring(p, q);
  3124             path = input.substring(p, q);
  3128             p = q;
  3125             p = q;
  3129             if (at(p, n, '?')) {
  3126             if (at(p, n, '?')) {
  3130                 p++;
  3127                 p++;
  3131                 q = scan(p, n, "", "#");
  3128                 q = scan(p, n, "#");
  3132                 checkChars(p, q, L_URIC, H_URIC, "query");
  3129                 checkChars(p, q, L_URIC, H_URIC, "query");
  3133                 query = substring(p, q);
  3130                 query = input.substring(p, q);
  3134                 p = q;
  3131                 p = q;
  3135             }
  3132             }
  3136             return p;
  3133             return p;
  3137         }
  3134         }
  3138 
  3135 
  3152             URISyntaxException ex = null;
  3149             URISyntaxException ex = null;
  3153 
  3150 
  3154             boolean serverChars;
  3151             boolean serverChars;
  3155             boolean regChars;
  3152             boolean regChars;
  3156 
  3153 
  3157             if (scan(p, n, "", "]") > p) {
  3154             if (scan(p, n, "]") > p) {
  3158                 // contains a literal IPv6 address, therefore % is allowed
  3155                 // contains a literal IPv6 address, therefore % is allowed
  3159                 serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n);
  3156                 serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n);
  3160             } else {
  3157             } else {
  3161                 serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);
  3158                 serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);
  3162             }
  3159             }
  3163             regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n);
  3160             regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n);
  3164 
  3161 
  3165             if (regChars && !serverChars) {
  3162             if (regChars && !serverChars) {
  3166                 // Must be a registry-based authority
  3163                 // Must be a registry-based authority
  3167                 authority = substring(p, n);
  3164                 authority = input.substring(p, n);
  3168                 return n;
  3165                 return n;
  3169             }
  3166             }
  3170 
  3167 
  3171             if (serverChars) {
  3168             if (serverChars) {
  3172                 // Might be (probably is) a server-based authority, so attempt
  3169                 // Might be (probably is) a server-based authority, so attempt
  3174                 // as a registry-based authority.
  3171                 // as a registry-based authority.
  3175                 try {
  3172                 try {
  3176                     q = parseServer(p, n);
  3173                     q = parseServer(p, n);
  3177                     if (q < n)
  3174                     if (q < n)
  3178                         failExpecting("end of authority", q);
  3175                         failExpecting("end of authority", q);
  3179                     authority = substring(p, n);
  3176                     authority = input.substring(p, n);
  3180                 } catch (URISyntaxException x) {
  3177                 } catch (URISyntaxException x) {
  3181                     // Undo results of failed parse
  3178                     // Undo results of failed parse
  3182                     userInfo = null;
  3179                     userInfo = null;
  3183                     host = null;
  3180                     host = null;
  3184                     port = -1;
  3181                     port = -1;
  3196             }
  3193             }
  3197 
  3194 
  3198             if (q < n) {
  3195             if (q < n) {
  3199                 if (regChars) {
  3196                 if (regChars) {
  3200                     // Registry-based authority
  3197                     // Registry-based authority
  3201                     authority = substring(p, n);
  3198                     authority = input.substring(p, n);
  3202                 } else if (ex != null) {
  3199                 } else if (ex != null) {
  3203                     // Re-throw exception; it was probably due to
  3200                     // Re-throw exception; it was probably due to
  3204                     // a malformed IPv6 address
  3201                     // a malformed IPv6 address
  3205                     throw ex;
  3202                     throw ex;
  3206                 } else {
  3203                 } else {
  3222 
  3219 
  3223             // userinfo
  3220             // userinfo
  3224             q = scan(p, n, "/?#", "@");
  3221             q = scan(p, n, "/?#", "@");
  3225             if ((q >= p) && at(q, n, '@')) {
  3222             if ((q >= p) && at(q, n, '@')) {
  3226                 checkChars(p, q, L_USERINFO, H_USERINFO, "user info");
  3223                 checkChars(p, q, L_USERINFO, H_USERINFO, "user info");
  3227                 userInfo = substring(p, q);
  3224                 userInfo = input.substring(p, q);
  3228                 p = q + 1;              // Skip '@'
  3225                 p = q + 1;              // Skip '@'
  3229             }
  3226             }
  3230 
  3227 
  3231             // hostname, IPv4 address, or IPv6 address
  3228             // hostname, IPv4 address, or IPv6 address
  3232             if (at(p, n, '[')) {
  3229             if (at(p, n, '[')) {
  3233                 // DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732
  3230                 // DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732
  3234                 p++;
  3231                 p++;
  3235                 q = scan(p, n, "/?#", "]");
  3232                 q = scan(p, n, "/?#", "]");
  3236                 if ((q > p) && at(q, n, ']')) {
  3233                 if ((q > p) && at(q, n, ']')) {
  3237                     // look for a "%" scope id
  3234                     // look for a "%" scope id
  3238                     int r = scan (p, q, "", "%");
  3235                     int r = scan (p, q, "%");
  3239                     if (r > p) {
  3236                     if (r > p) {
  3240                         parseIPv6Reference(p, r);
  3237                         parseIPv6Reference(p, r);
  3241                         if (r+1 == q) {
  3238                         if (r+1 == q) {
  3242                             fail ("scope id expected");
  3239                             fail ("scope id expected");
  3243                         }
  3240                         }
  3244                         checkChars (r+1, q, L_SCOPE_ID, H_SCOPE_ID,
  3241                         checkChars (r+1, q, L_SCOPE_ID, H_SCOPE_ID,
  3245                                                 "scope id");
  3242                                                 "scope id");
  3246                     } else {
  3243                     } else {
  3247                         parseIPv6Reference(p, q);
  3244                         parseIPv6Reference(p, q);
  3248                     }
  3245                     }
  3249                     host = substring(p-1, q+1);
  3246                     host = input.substring(p-1, q+1);
  3250                     p = q + 1;
  3247                     p = q + 1;
  3251                 } else {
  3248                 } else {
  3252                     failExpecting("closing bracket for IPv6 address", q);
  3249                     failExpecting("closing bracket for IPv6 address", q);
  3253                 }
  3250                 }
  3254             } else {
  3251             } else {
  3259             }
  3256             }
  3260 
  3257 
  3261             // port
  3258             // port
  3262             if (at(p, n, ':')) {
  3259             if (at(p, n, ':')) {
  3263                 p++;
  3260                 p++;
  3264                 q = scan(p, n, "", "/");
  3261                 q = scan(p, n, "/");
  3265                 if (q > p) {
  3262                 if (q > p) {
  3266                     checkChars(p, q, L_DIGIT, H_DIGIT, "port number");
  3263                     checkChars(p, q, L_DIGIT, H_DIGIT, "port number");
  3267                     try {
  3264                     try {
  3268                         port = Integer.parseInt(input, p, q, 10);
  3265                         port = Integer.parseInt(input, p, q, 10);
  3269                     } catch (NumberFormatException x) {
  3266                     } catch (NumberFormatException x) {
  3359 
  3356 
  3360             if (p > start && p < n) {
  3357             if (p > start && p < n) {
  3361                 // IPv4 address is followed by something - check that
  3358                 // IPv4 address is followed by something - check that
  3362                 // it's a ":" as this is the only valid character to
  3359                 // it's a ":" as this is the only valid character to
  3363                 // follow an address.
  3360                 // follow an address.
  3364                 if (charAt(p) != ':') {
  3361                 if (input.charAt(p) != ':') {
  3365                     p = -1;
  3362                     p = -1;
  3366                 }
  3363                 }
  3367             }
  3364             }
  3368 
  3365 
  3369             if (p > start)
  3366             if (p > start)
  3370                 host = substring(start, p);
  3367                 host = input.substring(start, p);
  3371 
  3368 
  3372             return p;
  3369             return p;
  3373         }
  3370         }
  3374 
  3371 
  3375         // hostname      = domainlabel [ "." ] | 1*( domainlabel "." ) toplabel [ "." ]
  3372         // hostname      = domainlabel [ "." ] | 1*( domainlabel "." ) toplabel [ "." ]
  3391                 l = p;
  3388                 l = p;
  3392                 if (q > p) {
  3389                 if (q > p) {
  3393                     p = q;
  3390                     p = q;
  3394                     q = scan(p, n, L_ALPHANUM | L_DASH, H_ALPHANUM | H_DASH);
  3391                     q = scan(p, n, L_ALPHANUM | L_DASH, H_ALPHANUM | H_DASH);
  3395                     if (q > p) {
  3392                     if (q > p) {
  3396                         if (charAt(q - 1) == '-')
  3393                         if (input.charAt(q - 1) == '-')
  3397                             fail("Illegal character in hostname", q - 1);
  3394                             fail("Illegal character in hostname", q - 1);
  3398                         p = q;
  3395                         p = q;
  3399                     }
  3396                     }
  3400                 }
  3397                 }
  3401                 q = scan(p, n, '.');
  3398                 q = scan(p, n, '.');
  3410             if (l < 0)
  3407             if (l < 0)
  3411                 failExpecting("hostname", start);
  3408                 failExpecting("hostname", start);
  3412 
  3409 
  3413             // for a fully qualified hostname check that the rightmost
  3410             // for a fully qualified hostname check that the rightmost
  3414             // label starts with an alpha character.
  3411             // label starts with an alpha character.
  3415             if (l > start && !match(charAt(l), L_ALPHA, H_ALPHA)) {
  3412             if (l > start && !match(input.charAt(l), L_ALPHA, H_ALPHA)) {
  3416                 fail("Illegal character in hostname", l);
  3413                 fail("Illegal character in hostname", l);
  3417             }
  3414             }
  3418 
  3415 
  3419             host = substring(start, p);
  3416             host = input.substring(start, p);
  3420             return p;
  3417             return p;
  3421         }
  3418         }
  3422 
  3419 
  3423 
  3420 
  3424         // IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture
  3421         // IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture