2057 if (child.isOpaque() || base.isOpaque()) |
2057 if (child.isOpaque() || base.isOpaque()) |
2058 return child; |
2058 return child; |
2059 |
2059 |
2060 // 5.2 (2): Reference to current document (lone fragment) |
2060 // 5.2 (2): Reference to current document (lone fragment) |
2061 if ((child.scheme == null) && (child.authority == null) |
2061 if ((child.scheme == null) && (child.authority == null) |
2062 && child.path.equals("") && (child.fragment != null) |
2062 && child.path.isEmpty() && (child.fragment != null) |
2063 && (child.query == null)) { |
2063 && (child.query == null)) { |
2064 if ((base.fragment != null) |
2064 if ((base.fragment != null) |
2065 && child.fragment.equals(base.fragment)) { |
2065 && child.fragment.equals(base.fragment)) { |
2066 return base; |
2066 return base; |
2067 } |
2067 } |
2644 private static final long H_LEFT_BRACKET = highMask("["); |
2644 private static final long H_LEFT_BRACKET = highMask("["); |
2645 |
2645 |
2646 // scheme = alpha *( alpha | digit | "+" | "-" | "." ) |
2646 // scheme = alpha *( alpha | digit | "+" | "-" | "." ) |
2647 private static final long L_SCHEME = L_ALPHA | L_DIGIT | lowMask("+-."); |
2647 private static final long L_SCHEME = L_ALPHA | L_DIGIT | lowMask("+-."); |
2648 private static final long H_SCHEME = H_ALPHA | H_DIGIT | highMask("+-."); |
2648 private static final long H_SCHEME = H_ALPHA | H_DIGIT | highMask("+-."); |
2649 |
|
2650 // uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | |
|
2651 // "&" | "=" | "+" | "$" | "," |
|
2652 private static final long L_URIC_NO_SLASH |
|
2653 = L_UNRESERVED | L_ESCAPED | lowMask(";?:@&=+$,"); |
|
2654 private static final long H_URIC_NO_SLASH |
|
2655 = H_UNRESERVED | H_ESCAPED | highMask(";?:@&=+$,"); |
|
2656 |
2649 |
2657 // scope_id = alpha | digit | "_" | "." |
2650 // scope_id = alpha | digit | "_" | "." |
2658 private static final long L_SCOPE_ID |
2651 private static final long L_SCOPE_ID |
2659 = L_ALPHANUM | lowMask("_."); |
2652 = L_ALPHANUM | lowMask("_."); |
2660 private static final long H_SCOPE_ID |
2653 private static final long H_SCOPE_ID |
2882 } |
2875 } |
2883 |
2876 |
2884 |
2877 |
2885 // -- Simple access to the input string -- |
2878 // -- Simple access to the input string -- |
2886 |
2879 |
2887 // Return a substring of the input string |
|
2888 // |
|
2889 private String substring(int start, int end) { |
|
2890 return input.substring(start, end); |
|
2891 } |
|
2892 |
|
2893 // Return the char at position p, |
|
2894 // assuming that p < input.length() |
|
2895 // |
|
2896 private char charAt(int p) { |
|
2897 return input.charAt(p); |
|
2898 } |
|
2899 |
|
2900 // Tells whether start < end and, if so, whether charAt(start) == c |
2880 // Tells whether start < end and, if so, whether charAt(start) == c |
2901 // |
2881 // |
2902 private boolean at(int start, int end, char c) { |
2882 private boolean at(int start, int end, char c) { |
2903 return (start < end) && (charAt(start) == c); |
2883 return (start < end) && (input.charAt(start) == c); |
2904 } |
2884 } |
2905 |
2885 |
2906 // Tells whether start + s.length() < end and, if so, |
2886 // Tells whether start + s.length() < end and, if so, |
2907 // whether the chars at the start position match s exactly |
2887 // whether the chars at the start position match s exactly |
2908 // |
2888 // |
2951 // Scan a specific char: If the char at the given start position is |
2931 // Scan a specific char: If the char at the given start position is |
2952 // equal to c, return the index of the next char; otherwise, return the |
2932 // equal to c, return the index of the next char; otherwise, return the |
2953 // start position. |
2933 // start position. |
2954 // |
2934 // |
2955 private int scan(int start, int end, char c) { |
2935 private int scan(int start, int end, char c) { |
2956 if ((start < end) && (charAt(start) == c)) |
2936 if ((start < end) && (input.charAt(start) == c)) |
2957 return start + 1; |
2937 return start + 1; |
2958 return start; |
2938 return start; |
2959 } |
2939 } |
2960 |
2940 |
2961 // Scan forward from the given start position. Stop at the first char |
2941 // Scan forward from the given start position. Stop at the first char |
2966 // nothing matches. |
2946 // nothing matches. |
2967 // |
2947 // |
2968 private int scan(int start, int end, String err, String stop) { |
2948 private int scan(int start, int end, String err, String stop) { |
2969 int p = start; |
2949 int p = start; |
2970 while (p < end) { |
2950 while (p < end) { |
2971 char c = charAt(p); |
2951 char c = input.charAt(p); |
2972 if (err.indexOf(c) >= 0) |
2952 if (err.indexOf(c) >= 0) |
2973 return -1; |
2953 return -1; |
|
2954 if (stop.indexOf(c) >= 0) |
|
2955 break; |
|
2956 p++; |
|
2957 } |
|
2958 return p; |
|
2959 } |
|
2960 |
|
2961 // Scan forward from the given start position. Stop at the first char |
|
2962 // in the stop string (in which case the index of the preceding char is |
|
2963 // returned), or the end of the input string (in which case the length |
|
2964 // of the input string is returned). May return the start position if |
|
2965 // nothing matches. |
|
2966 // |
|
2967 private int scan(int start, int end, String stop) { |
|
2968 int p = start; |
|
2969 while (p < end) { |
|
2970 char c = input.charAt(p); |
2974 if (stop.indexOf(c) >= 0) |
2971 if (stop.indexOf(c) >= 0) |
2975 break; |
2972 break; |
2976 p++; |
2973 p++; |
2977 } |
2974 } |
2978 return p; |
2975 return p; |
2990 int p = start; |
2987 int p = start; |
2991 char c = first; |
2988 char c = first; |
2992 if (c == '%') { |
2989 if (c == '%') { |
2993 // Process escape pair |
2990 // Process escape pair |
2994 if ((p + 3 <= n) |
2991 if ((p + 3 <= n) |
2995 && match(charAt(p + 1), L_HEX, H_HEX) |
2992 && match(input.charAt(p + 1), L_HEX, H_HEX) |
2996 && match(charAt(p + 2), L_HEX, H_HEX)) { |
2993 && match(input.charAt(p + 2), L_HEX, H_HEX)) { |
2997 return p + 3; |
2994 return p + 3; |
2998 } |
2995 } |
2999 fail("Malformed escape pair", p); |
2996 fail("Malformed escape pair", p); |
3000 } else if ((c > 128) |
2997 } else if ((c > 128) |
3001 && !Character.isSpaceChar(c) |
2998 && !Character.isSpaceChar(c) |
3011 private int scan(int start, int n, long lowMask, long highMask) |
3008 private int scan(int start, int n, long lowMask, long highMask) |
3012 throws URISyntaxException |
3009 throws URISyntaxException |
3013 { |
3010 { |
3014 int p = start; |
3011 int p = start; |
3015 while (p < n) { |
3012 while (p < n) { |
3016 char c = charAt(p); |
3013 char c = input.charAt(p); |
3017 if (match(c, lowMask, highMask)) { |
3014 if (match(c, lowMask, highMask)) { |
3018 p++; |
3015 p++; |
3019 continue; |
3016 continue; |
3020 } |
3017 } |
3021 if ((lowMask & L_ESCAPED) != 0) { |
3018 if ((lowMask & L_ESCAPED) != 0) { |
3065 if ((p >= 0) && at(p, n, ':')) { |
3062 if ((p >= 0) && at(p, n, ':')) { |
3066 if (p == 0) |
3063 if (p == 0) |
3067 failExpecting("scheme name", 0); |
3064 failExpecting("scheme name", 0); |
3068 checkChar(0, L_ALPHA, H_ALPHA, "scheme name"); |
3065 checkChar(0, L_ALPHA, H_ALPHA, "scheme name"); |
3069 checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name"); |
3066 checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name"); |
3070 scheme = substring(0, p); |
3067 scheme = input.substring(0, p); |
3071 p++; // Skip ':' |
3068 p++; // Skip ':' |
3072 ssp = p; |
3069 ssp = p; |
3073 if (at(p, n, '/')) { |
3070 if (at(p, n, '/')) { |
3074 p = parseHierarchical(p, n); |
3071 p = parseHierarchical(p, n); |
3075 } else { |
3072 } else { |
3076 int q = scan(p, n, "", "#"); |
3073 int q = scan(p, n, "#"); |
3077 if (q <= p) |
3074 if (q <= p) |
3078 failExpecting("scheme-specific part", p); |
3075 failExpecting("scheme-specific part", p); |
3079 checkChars(p, q, L_URIC, H_URIC, "opaque part"); |
3076 checkChars(p, q, L_URIC, H_URIC, "opaque part"); |
3080 p = q; |
3077 p = q; |
3081 } |
3078 } |
3082 } else { |
3079 } else { |
3083 ssp = 0; |
3080 ssp = 0; |
3084 p = parseHierarchical(0, n); |
3081 p = parseHierarchical(0, n); |
3085 } |
3082 } |
3086 schemeSpecificPart = substring(ssp, p); |
3083 schemeSpecificPart = input.substring(ssp, p); |
3087 if (at(p, n, '#')) { |
3084 if (at(p, n, '#')) { |
3088 checkChars(p + 1, n, L_URIC, H_URIC, "fragment"); |
3085 checkChars(p + 1, n, L_URIC, H_URIC, "fragment"); |
3089 fragment = substring(p + 1, n); |
3086 fragment = input.substring(p + 1, n); |
3090 p = n; |
3087 p = n; |
3091 } |
3088 } |
3092 if (p < n) |
3089 if (p < n) |
3093 fail("end of URI", p); |
3090 fail("end of URI", p); |
3094 } |
3091 } |
3111 throws URISyntaxException |
3108 throws URISyntaxException |
3112 { |
3109 { |
3113 int p = start; |
3110 int p = start; |
3114 if (at(p, n, '/') && at(p + 1, n, '/')) { |
3111 if (at(p, n, '/') && at(p + 1, n, '/')) { |
3115 p += 2; |
3112 p += 2; |
3116 int q = scan(p, n, "", "/?#"); |
3113 int q = scan(p, n, "/?#"); |
3117 if (q > p) { |
3114 if (q > p) { |
3118 p = parseAuthority(p, q); |
3115 p = parseAuthority(p, q); |
3119 } else if (q < n) { |
3116 } else if (q < n) { |
3120 // DEVIATION: Allow empty authority prior to non-empty |
3117 // DEVIATION: Allow empty authority prior to non-empty |
3121 // path, query component or fragment identifier |
3118 // path, query component or fragment identifier |
3122 } else |
3119 } else |
3123 failExpecting("authority", p); |
3120 failExpecting("authority", p); |
3124 } |
3121 } |
3125 int q = scan(p, n, "", "?#"); // DEVIATION: May be empty |
3122 int q = scan(p, n, "?#"); // DEVIATION: May be empty |
3126 checkChars(p, q, L_PATH, H_PATH, "path"); |
3123 checkChars(p, q, L_PATH, H_PATH, "path"); |
3127 path = substring(p, q); |
3124 path = input.substring(p, q); |
3128 p = q; |
3125 p = q; |
3129 if (at(p, n, '?')) { |
3126 if (at(p, n, '?')) { |
3130 p++; |
3127 p++; |
3131 q = scan(p, n, "", "#"); |
3128 q = scan(p, n, "#"); |
3132 checkChars(p, q, L_URIC, H_URIC, "query"); |
3129 checkChars(p, q, L_URIC, H_URIC, "query"); |
3133 query = substring(p, q); |
3130 query = input.substring(p, q); |
3134 p = q; |
3131 p = q; |
3135 } |
3132 } |
3136 return p; |
3133 return p; |
3137 } |
3134 } |
3138 |
3135 |
3152 URISyntaxException ex = null; |
3149 URISyntaxException ex = null; |
3153 |
3150 |
3154 boolean serverChars; |
3151 boolean serverChars; |
3155 boolean regChars; |
3152 boolean regChars; |
3156 |
3153 |
3157 if (scan(p, n, "", "]") > p) { |
3154 if (scan(p, n, "]") > p) { |
3158 // contains a literal IPv6 address, therefore % is allowed |
3155 // contains a literal IPv6 address, therefore % is allowed |
3159 serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n); |
3156 serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n); |
3160 } else { |
3157 } else { |
3161 serverChars = (scan(p, n, L_SERVER, H_SERVER) == n); |
3158 serverChars = (scan(p, n, L_SERVER, H_SERVER) == n); |
3162 } |
3159 } |
3163 regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n); |
3160 regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n); |
3164 |
3161 |
3165 if (regChars && !serverChars) { |
3162 if (regChars && !serverChars) { |
3166 // Must be a registry-based authority |
3163 // Must be a registry-based authority |
3167 authority = substring(p, n); |
3164 authority = input.substring(p, n); |
3168 return n; |
3165 return n; |
3169 } |
3166 } |
3170 |
3167 |
3171 if (serverChars) { |
3168 if (serverChars) { |
3172 // Might be (probably is) a server-based authority, so attempt |
3169 // Might be (probably is) a server-based authority, so attempt |
3174 // as a registry-based authority. |
3171 // as a registry-based authority. |
3175 try { |
3172 try { |
3176 q = parseServer(p, n); |
3173 q = parseServer(p, n); |
3177 if (q < n) |
3174 if (q < n) |
3178 failExpecting("end of authority", q); |
3175 failExpecting("end of authority", q); |
3179 authority = substring(p, n); |
3176 authority = input.substring(p, n); |
3180 } catch (URISyntaxException x) { |
3177 } catch (URISyntaxException x) { |
3181 // Undo results of failed parse |
3178 // Undo results of failed parse |
3182 userInfo = null; |
3179 userInfo = null; |
3183 host = null; |
3180 host = null; |
3184 port = -1; |
3181 port = -1; |
3222 |
3219 |
3223 // userinfo |
3220 // userinfo |
3224 q = scan(p, n, "/?#", "@"); |
3221 q = scan(p, n, "/?#", "@"); |
3225 if ((q >= p) && at(q, n, '@')) { |
3222 if ((q >= p) && at(q, n, '@')) { |
3226 checkChars(p, q, L_USERINFO, H_USERINFO, "user info"); |
3223 checkChars(p, q, L_USERINFO, H_USERINFO, "user info"); |
3227 userInfo = substring(p, q); |
3224 userInfo = input.substring(p, q); |
3228 p = q + 1; // Skip '@' |
3225 p = q + 1; // Skip '@' |
3229 } |
3226 } |
3230 |
3227 |
3231 // hostname, IPv4 address, or IPv6 address |
3228 // hostname, IPv4 address, or IPv6 address |
3232 if (at(p, n, '[')) { |
3229 if (at(p, n, '[')) { |
3233 // DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732 |
3230 // DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732 |
3234 p++; |
3231 p++; |
3235 q = scan(p, n, "/?#", "]"); |
3232 q = scan(p, n, "/?#", "]"); |
3236 if ((q > p) && at(q, n, ']')) { |
3233 if ((q > p) && at(q, n, ']')) { |
3237 // look for a "%" scope id |
3234 // look for a "%" scope id |
3238 int r = scan (p, q, "", "%"); |
3235 int r = scan (p, q, "%"); |
3239 if (r > p) { |
3236 if (r > p) { |
3240 parseIPv6Reference(p, r); |
3237 parseIPv6Reference(p, r); |
3241 if (r+1 == q) { |
3238 if (r+1 == q) { |
3242 fail ("scope id expected"); |
3239 fail ("scope id expected"); |
3243 } |
3240 } |
3244 checkChars (r+1, q, L_SCOPE_ID, H_SCOPE_ID, |
3241 checkChars (r+1, q, L_SCOPE_ID, H_SCOPE_ID, |
3245 "scope id"); |
3242 "scope id"); |
3246 } else { |
3243 } else { |
3247 parseIPv6Reference(p, q); |
3244 parseIPv6Reference(p, q); |
3248 } |
3245 } |
3249 host = substring(p-1, q+1); |
3246 host = input.substring(p-1, q+1); |
3250 p = q + 1; |
3247 p = q + 1; |
3251 } else { |
3248 } else { |
3252 failExpecting("closing bracket for IPv6 address", q); |
3249 failExpecting("closing bracket for IPv6 address", q); |
3253 } |
3250 } |
3254 } else { |
3251 } else { |
3259 } |
3256 } |
3260 |
3257 |
3261 // port |
3258 // port |
3262 if (at(p, n, ':')) { |
3259 if (at(p, n, ':')) { |
3263 p++; |
3260 p++; |
3264 q = scan(p, n, "", "/"); |
3261 q = scan(p, n, "/"); |
3265 if (q > p) { |
3262 if (q > p) { |
3266 checkChars(p, q, L_DIGIT, H_DIGIT, "port number"); |
3263 checkChars(p, q, L_DIGIT, H_DIGIT, "port number"); |
3267 try { |
3264 try { |
3268 port = Integer.parseInt(input, p, q, 10); |
3265 port = Integer.parseInt(input, p, q, 10); |
3269 } catch (NumberFormatException x) { |
3266 } catch (NumberFormatException x) { |
3359 |
3356 |
3360 if (p > start && p < n) { |
3357 if (p > start && p < n) { |
3361 // IPv4 address is followed by something - check that |
3358 // IPv4 address is followed by something - check that |
3362 // it's a ":" as this is the only valid character to |
3359 // it's a ":" as this is the only valid character to |
3363 // follow an address. |
3360 // follow an address. |
3364 if (charAt(p) != ':') { |
3361 if (input.charAt(p) != ':') { |
3365 p = -1; |
3362 p = -1; |
3366 } |
3363 } |
3367 } |
3364 } |
3368 |
3365 |
3369 if (p > start) |
3366 if (p > start) |
3370 host = substring(start, p); |
3367 host = input.substring(start, p); |
3371 |
3368 |
3372 return p; |
3369 return p; |
3373 } |
3370 } |
3374 |
3371 |
3375 // hostname = domainlabel [ "." ] | 1*( domainlabel "." ) toplabel [ "." ] |
3372 // hostname = domainlabel [ "." ] | 1*( domainlabel "." ) toplabel [ "." ] |
3391 l = p; |
3388 l = p; |
3392 if (q > p) { |
3389 if (q > p) { |
3393 p = q; |
3390 p = q; |
3394 q = scan(p, n, L_ALPHANUM | L_DASH, H_ALPHANUM | H_DASH); |
3391 q = scan(p, n, L_ALPHANUM | L_DASH, H_ALPHANUM | H_DASH); |
3395 if (q > p) { |
3392 if (q > p) { |
3396 if (charAt(q - 1) == '-') |
3393 if (input.charAt(q - 1) == '-') |
3397 fail("Illegal character in hostname", q - 1); |
3394 fail("Illegal character in hostname", q - 1); |
3398 p = q; |
3395 p = q; |
3399 } |
3396 } |
3400 } |
3397 } |
3401 q = scan(p, n, '.'); |
3398 q = scan(p, n, '.'); |
3410 if (l < 0) |
3407 if (l < 0) |
3411 failExpecting("hostname", start); |
3408 failExpecting("hostname", start); |
3412 |
3409 |
3413 // for a fully qualified hostname check that the rightmost |
3410 // for a fully qualified hostname check that the rightmost |
3414 // label starts with an alpha character. |
3411 // label starts with an alpha character. |
3415 if (l > start && !match(charAt(l), L_ALPHA, H_ALPHA)) { |
3412 if (l > start && !match(input.charAt(l), L_ALPHA, H_ALPHA)) { |
3416 fail("Illegal character in hostname", l); |
3413 fail("Illegal character in hostname", l); |
3417 } |
3414 } |
3418 |
3415 |
3419 host = substring(start, p); |
3416 host = input.substring(start, p); |
3420 return p; |
3417 return p; |
3421 } |
3418 } |
3422 |
3419 |
3423 |
3420 |
3424 // IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture |
3421 // IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture |