2526 // This approach is more efficient than sequentially searching arrays of |
2525 // This approach is more efficient than sequentially searching arrays of |
2527 // permitted characters. It could be made still more efficient by |
2526 // permitted characters. It could be made still more efficient by |
2528 // precompiling the mask information so that a character's presence in a |
2527 // precompiling the mask information so that a character's presence in a |
2529 // given mask could be determined by a single table lookup. |
2528 // given mask could be determined by a single table lookup. |
2530 |
2529 |
|
2530 // To save startup time, we manually calculate the low-/highMask constants. |
|
2531 // For reference, the following methods were used to calculate the values: |
|
2532 |
2531 // Compute the low-order mask for the characters in the given string |
2533 // Compute the low-order mask for the characters in the given string |
2532 private static long lowMask(String chars) { |
2534 // private static long lowMask(String chars) { |
2533 int n = chars.length(); |
2535 // int n = chars.length(); |
2534 long m = 0; |
2536 // long m = 0; |
2535 for (int i = 0; i < n; i++) { |
2537 // for (int i = 0; i < n; i++) { |
2536 char c = chars.charAt(i); |
2538 // char c = chars.charAt(i); |
2537 if (c < 64) |
2539 // if (c < 64) |
2538 m |= (1L << c); |
2540 // m |= (1L << c); |
2539 } |
2541 // } |
2540 return m; |
2542 // return m; |
2541 } |
2543 // } |
2542 |
2544 |
2543 // Compute the high-order mask for the characters in the given string |
2545 // Compute the high-order mask for the characters in the given string |
2544 private static long highMask(String chars) { |
2546 // private static long highMask(String chars) { |
2545 int n = chars.length(); |
2547 // int n = chars.length(); |
2546 long m = 0; |
2548 // long m = 0; |
2547 for (int i = 0; i < n; i++) { |
2549 // for (int i = 0; i < n; i++) { |
2548 char c = chars.charAt(i); |
2550 // char c = chars.charAt(i); |
2549 if ((c >= 64) && (c < 128)) |
2551 // if ((c >= 64) && (c < 128)) |
2550 m |= (1L << (c - 64)); |
2552 // m |= (1L << (c - 64)); |
2551 } |
2553 // } |
2552 return m; |
2554 // return m; |
2553 } |
2555 // } |
2554 |
2556 |
2555 // Compute a low-order mask for the characters |
2557 // Compute a low-order mask for the characters |
2556 // between first and last, inclusive |
2558 // between first and last, inclusive |
2557 private static long lowMask(char first, char last) { |
2559 // private static long lowMask(char first, char last) { |
2558 long m = 0; |
2560 // long m = 0; |
2559 int f = Math.max(Math.min(first, 63), 0); |
2561 // int f = Math.max(Math.min(first, 63), 0); |
2560 int l = Math.max(Math.min(last, 63), 0); |
2562 // int l = Math.max(Math.min(last, 63), 0); |
2561 for (int i = f; i <= l; i++) |
2563 // for (int i = f; i <= l; i++) |
2562 m |= 1L << i; |
2564 // m |= 1L << i; |
2563 return m; |
2565 // return m; |
2564 } |
2566 // } |
2565 |
2567 |
2566 // Compute a high-order mask for the characters |
2568 // Compute a high-order mask for the characters |
2567 // between first and last, inclusive |
2569 // between first and last, inclusive |
2568 private static long highMask(char first, char last) { |
2570 // private static long highMask(char first, char last) { |
2569 long m = 0; |
2571 // long m = 0; |
2570 int f = Math.max(Math.min(first, 127), 64) - 64; |
2572 // int f = Math.max(Math.min(first, 127), 64) - 64; |
2571 int l = Math.max(Math.min(last, 127), 64) - 64; |
2573 // int l = Math.max(Math.min(last, 127), 64) - 64; |
2572 for (int i = f; i <= l; i++) |
2574 // for (int i = f; i <= l; i++) |
2573 m |= 1L << i; |
2575 // m |= 1L << i; |
2574 return m; |
2576 // return m; |
2575 } |
2577 // } |
2576 |
2578 |
2577 // Tell whether the given character is permitted by the given mask pair |
2579 // Tell whether the given character is permitted by the given mask pair |
2578 private static boolean match(char c, long lowMask, long highMask) { |
2580 private static boolean match(char c, long lowMask, long highMask) { |
2579 if (c == 0) // 0 doesn't have a slot in the mask. So, it never matches. |
2581 if (c == 0) // 0 doesn't have a slot in the mask. So, it never matches. |
2580 return false; |
2582 return false; |
2588 // Character-class masks, in reverse order from RFC2396 because |
2590 // Character-class masks, in reverse order from RFC2396 because |
2589 // initializers for static fields cannot make forward references. |
2591 // initializers for static fields cannot make forward references. |
2590 |
2592 |
2591 // digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | |
2593 // digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | |
2592 // "8" | "9" |
2594 // "8" | "9" |
2593 private static final long L_DIGIT = lowMask('0', '9'); |
2595 private static final long L_DIGIT = 0x3FF000000000000L; // lowMask('0', '9'); |
2594 private static final long H_DIGIT = 0L; |
2596 private static final long H_DIGIT = 0L; |
2595 |
2597 |
2596 // upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | |
2598 // upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | |
2597 // "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | |
2599 // "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | |
2598 // "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" |
2600 // "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" |
2599 private static final long L_UPALPHA = 0L; |
2601 private static final long L_UPALPHA = 0L; |
2600 private static final long H_UPALPHA = highMask('A', 'Z'); |
2602 private static final long H_UPALPHA = 0x7FFFFFEL; // highMask('A', 'Z'); |
2601 |
2603 |
2602 // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | |
2604 // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | |
2603 // "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | |
2605 // "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | |
2604 // "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" |
2606 // "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" |
2605 private static final long L_LOWALPHA = 0L; |
2607 private static final long L_LOWALPHA = 0L; |
2606 private static final long H_LOWALPHA = highMask('a', 'z'); |
2608 private static final long H_LOWALPHA = 0x7FFFFFE00000000L; // highMask('a', 'z'); |
2607 |
2609 |
2608 // alpha = lowalpha | upalpha |
2610 // alpha = lowalpha | upalpha |
2609 private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA; |
2611 private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA; |
2610 private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA; |
2612 private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA; |
2611 |
2613 |
2614 private static final long H_ALPHANUM = H_DIGIT | H_ALPHA; |
2616 private static final long H_ALPHANUM = H_DIGIT | H_ALPHA; |
2615 |
2617 |
2616 // hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | |
2618 // hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | |
2617 // "a" | "b" | "c" | "d" | "e" | "f" |
2619 // "a" | "b" | "c" | "d" | "e" | "f" |
2618 private static final long L_HEX = L_DIGIT; |
2620 private static final long L_HEX = L_DIGIT; |
2619 private static final long H_HEX = highMask('A', 'F') | highMask('a', 'f'); |
2621 private static final long H_HEX = 0x7E0000007EL; // highMask('A', 'F') | highMask('a', 'f'); |
2620 |
2622 |
2621 // mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | |
2623 // mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | |
2622 // "(" | ")" |
2624 // "(" | ")" |
2623 private static final long L_MARK = lowMask("-_.!~*'()"); |
2625 private static final long L_MARK = 0x678200000000L; // lowMask("-_.!~*'()"); |
2624 private static final long H_MARK = highMask("-_.!~*'()"); |
2626 private static final long H_MARK = 0x4000000080000000L; // highMask("-_.!~*'()"); |
2625 |
2627 |
2626 // unreserved = alphanum | mark |
2628 // unreserved = alphanum | mark |
2627 private static final long L_UNRESERVED = L_ALPHANUM | L_MARK; |
2629 private static final long L_UNRESERVED = L_ALPHANUM | L_MARK; |
2628 private static final long H_UNRESERVED = H_ALPHANUM | H_MARK; |
2630 private static final long H_UNRESERVED = H_ALPHANUM | H_MARK; |
2629 |
2631 |
2630 // reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | |
2632 // reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | |
2631 // "$" | "," | "[" | "]" |
2633 // "$" | "," | "[" | "]" |
2632 // Added per RFC2732: "[", "]" |
2634 // Added per RFC2732: "[", "]" |
2633 private static final long L_RESERVED = lowMask(";/?:@&=+$,[]"); |
2635 private static final long L_RESERVED = 0xAC00985000000000L; // lowMask(";/?:@&=+$,[]"); |
2634 private static final long H_RESERVED = highMask(";/?:@&=+$,[]"); |
2636 private static final long H_RESERVED = 0x28000001L; // highMask(";/?:@&=+$,[]"); |
2635 |
2637 |
2636 // The zero'th bit is used to indicate that escape pairs and non-US-ASCII |
2638 // The zero'th bit is used to indicate that escape pairs and non-US-ASCII |
2637 // characters are allowed; this is handled by the scanEscape method below. |
2639 // characters are allowed; this is handled by the scanEscape method below. |
2638 private static final long L_ESCAPED = 1L; |
2640 private static final long L_ESCAPED = 1L; |
2639 private static final long H_ESCAPED = 0L; |
2641 private static final long H_ESCAPED = 0L; |
2643 private static final long H_URIC = H_RESERVED | H_UNRESERVED | H_ESCAPED; |
2645 private static final long H_URIC = H_RESERVED | H_UNRESERVED | H_ESCAPED; |
2644 |
2646 |
2645 // pchar = unreserved | escaped | |
2647 // pchar = unreserved | escaped | |
2646 // ":" | "@" | "&" | "=" | "+" | "$" | "," |
2648 // ":" | "@" | "&" | "=" | "+" | "$" | "," |
2647 private static final long L_PCHAR |
2649 private static final long L_PCHAR |
2648 = L_UNRESERVED | L_ESCAPED | lowMask(":@&=+$,"); |
2650 = L_UNRESERVED | L_ESCAPED | 0x2400185000000000L; // lowMask(":@&=+$,"); |
2649 private static final long H_PCHAR |
2651 private static final long H_PCHAR |
2650 = H_UNRESERVED | H_ESCAPED | highMask(":@&=+$,"); |
2652 = H_UNRESERVED | H_ESCAPED | 0x1L; // highMask(":@&=+$,"); |
2651 |
2653 |
2652 // All valid path characters |
2654 // All valid path characters |
2653 private static final long L_PATH = L_PCHAR | lowMask(";/"); |
2655 private static final long L_PATH = L_PCHAR | 0x800800000000000L; // lowMask(";/"); |
2654 private static final long H_PATH = H_PCHAR | highMask(";/"); |
2656 private static final long H_PATH = H_PCHAR; // highMask(";/") == 0x0L; |
2655 |
2657 |
2656 // Dash, for use in domainlabel and toplabel |
2658 // Dash, for use in domainlabel and toplabel |
2657 private static final long L_DASH = lowMask("-"); |
2659 private static final long L_DASH = 0x200000000000L; // lowMask("-"); |
2658 private static final long H_DASH = highMask("-"); |
2660 private static final long H_DASH = 0x0L; // highMask("-"); |
2659 |
2661 |
2660 // Dot, for use in hostnames |
2662 // Dot, for use in hostnames |
2661 private static final long L_DOT = lowMask("."); |
2663 private static final long L_DOT = 0x400000000000L; // lowMask("."); |
2662 private static final long H_DOT = highMask("."); |
2664 private static final long H_DOT = 0x0L; // highMask("."); |
2663 |
2665 |
2664 // userinfo = *( unreserved | escaped | |
2666 // userinfo = *( unreserved | escaped | |
2665 // ";" | ":" | "&" | "=" | "+" | "$" | "," ) |
2667 // ";" | ":" | "&" | "=" | "+" | "$" | "," ) |
2666 private static final long L_USERINFO |
2668 private static final long L_USERINFO |
2667 = L_UNRESERVED | L_ESCAPED | lowMask(";:&=+$,"); |
2669 = L_UNRESERVED | L_ESCAPED | 0x2C00185000000000L; // lowMask(";:&=+$,"); |
2668 private static final long H_USERINFO |
2670 private static final long H_USERINFO |
2669 = H_UNRESERVED | H_ESCAPED | highMask(";:&=+$,"); |
2671 = H_UNRESERVED | H_ESCAPED; // | highMask(";:&=+$,") == 0L; |
2670 |
2672 |
2671 // reg_name = 1*( unreserved | escaped | "$" | "," | |
2673 // reg_name = 1*( unreserved | escaped | "$" | "," | |
2672 // ";" | ":" | "@" | "&" | "=" | "+" ) |
2674 // ";" | ":" | "@" | "&" | "=" | "+" ) |
2673 private static final long L_REG_NAME |
2675 private static final long L_REG_NAME |
2674 = L_UNRESERVED | L_ESCAPED | lowMask("$,;:@&=+"); |
2676 = L_UNRESERVED | L_ESCAPED | 0x2C00185000000000L; // lowMask("$,;:@&=+"); |
2675 private static final long H_REG_NAME |
2677 private static final long H_REG_NAME |
2676 = H_UNRESERVED | H_ESCAPED | highMask("$,;:@&=+"); |
2678 = H_UNRESERVED | H_ESCAPED | 0x1L; // highMask("$,;:@&=+"); |
2677 |
2679 |
2678 // All valid characters for server-based authorities |
2680 // All valid characters for server-based authorities |
2679 private static final long L_SERVER |
2681 private static final long L_SERVER |
2680 = L_USERINFO | L_ALPHANUM | L_DASH | lowMask(".:@[]"); |
2682 = L_USERINFO | L_ALPHANUM | L_DASH | 0x400400000000000L; // lowMask(".:@[]"); |
2681 private static final long H_SERVER |
2683 private static final long H_SERVER |
2682 = H_USERINFO | H_ALPHANUM | H_DASH | highMask(".:@[]"); |
2684 = H_USERINFO | H_ALPHANUM | H_DASH | 0x28000001L; // highMask(".:@[]"); |
2683 |
2685 |
2684 // Special case of server authority that represents an IPv6 address |
2686 // Special case of server authority that represents an IPv6 address |
2685 // In this case, a % does not signify an escape sequence |
2687 // In this case, a % does not signify an escape sequence |
2686 private static final long L_SERVER_PERCENT |
2688 private static final long L_SERVER_PERCENT |
2687 = L_SERVER | lowMask("%"); |
2689 = L_SERVER | 0x2000000000L; // lowMask("%"); |
2688 private static final long H_SERVER_PERCENT |
2690 private static final long H_SERVER_PERCENT |
2689 = H_SERVER | highMask("%"); |
2691 = H_SERVER; // | highMask("%") == 0L; |
2690 private static final long L_LEFT_BRACKET = lowMask("["); |
|
2691 private static final long H_LEFT_BRACKET = highMask("["); |
|
2692 |
2692 |
2693 // scheme = alpha *( alpha | digit | "+" | "-" | "." ) |
2693 // scheme = alpha *( alpha | digit | "+" | "-" | "." ) |
2694 private static final long L_SCHEME = L_ALPHA | L_DIGIT | lowMask("+-."); |
2694 private static final long L_SCHEME = L_ALPHA | L_DIGIT | 0x680000000000L; // lowMask("+-."); |
2695 private static final long H_SCHEME = H_ALPHA | H_DIGIT | highMask("+-."); |
2695 private static final long H_SCHEME = H_ALPHA | H_DIGIT; // | highMask("+-.") == 0L |
2696 |
2696 |
2697 // scope_id = alpha | digit | "_" | "." |
2697 // scope_id = alpha | digit | "_" | "." |
2698 private static final long L_SCOPE_ID |
2698 private static final long L_SCOPE_ID |
2699 = L_ALPHANUM | lowMask("_."); |
2699 = L_ALPHANUM | 0x400000000000L; // lowMask("_."); |
2700 private static final long H_SCOPE_ID |
2700 private static final long H_SCOPE_ID |
2701 = H_ALPHANUM | highMask("_."); |
2701 = H_ALPHANUM | 0x80000000L; // highMask("_."); |
2702 |
2702 |
2703 // -- Escaping and encoding -- |
2703 // -- Escaping and encoding -- |
2704 |
2704 |
2705 private static final char[] hexDigits = { |
2705 private static final char[] hexDigits = { |
2706 '0', '1', '2', '3', '4', '5', '6', '7', |
2706 '0', '1', '2', '3', '4', '5', '6', '7', |