author | redestad |
Thu, 13 Dec 2018 15:31:05 +0100 | |
changeset 53018 | 8bf9268df0e2 |
parent 48897 | 3f19b5965355 |
child 54688 | 96ad739cfc39 |
permissions | -rw-r--r-- |
2 | 1 |
/* |
5506 | 2 |
* Copyright (c) 1998, 2007, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
package sun.net.www; |
|
27 |
||
28 |
import java.io.File; |
|
29 |
import java.net.URL; |
|
30 |
import java.net.MalformedURLException; |
|
31 |
import java.net.URI; |
|
32 |
import java.net.URISyntaxException; |
|
33 |
import java.nio.ByteBuffer; |
|
34 |
import java.nio.CharBuffer; |
|
35 |
import java.nio.charset.CharacterCodingException; |
|
47024 | 36 |
|
2 | 37 |
import sun.nio.cs.ThreadLocalCoders; |
38 |
import java.nio.charset.CharsetDecoder; |
|
39 |
import java.nio.charset.CoderResult; |
|
40 |
import java.nio.charset.CodingErrorAction; |
|
41 |
||
42 |
/** |
|
43 |
* A class that contains useful routines common to sun.net.www |
|
44 |
* @author Mike McCloskey |
|
45 |
*/ |
|
46 |
||
48897
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
47 |
public final class ParseUtil { |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
48 |
|
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
49 |
private ParseUtil() {} |
2 | 50 |
|
51 |
/** |
|
52 |
* Constructs an encoded version of the specified path string suitable |
|
53 |
* for use in the construction of a URL. |
|
54 |
* |
|
55 |
* A path separator is replaced by a forward slash. The string is UTF8 |
|
56 |
* encoded. The % escape sequence is used for characters that are above |
|
57 |
* 0x7F or those defined in RFC2396 as reserved or excluded in the path |
|
58 |
* component of a URL. |
|
59 |
*/ |
|
60 |
public static String encodePath(String path) { |
|
61 |
return encodePath(path, true); |
|
62 |
} |
|
63 |
/* |
|
64 |
* flag indicates whether path uses platform dependent |
|
65 |
* File.separatorChar or not. True indicates path uses platform |
|
66 |
* dependent File.separatorChar. |
|
67 |
*/ |
|
68 |
public static String encodePath(String path, boolean flag) { |
|
42992
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
69 |
if (flag && File.separatorChar != '/') { |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
70 |
return encodePath(path, 0, File.separatorChar); |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
71 |
} else { |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
72 |
int index = firstEncodeIndex(path); |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
73 |
if (index > -1) { |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
74 |
return encodePath(path, index, '/'); |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
75 |
} else { |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
76 |
return path; |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
77 |
} |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
78 |
} |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
79 |
} |
2 | 80 |
|
42992
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
81 |
private static int firstEncodeIndex(String path) { |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
82 |
int len = path.length(); |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
83 |
for (int i = 0; i < len; i++) { |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
84 |
char c = path.charAt(i); |
48897
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
85 |
// Ordering in the following test is performance sensitive, |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
86 |
// and typically paths have most chars in the a-z range, then |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
87 |
// in the symbol range '&'-':' (includes '.', '/' and '0'-'9') |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
88 |
// and more rarely in the A-Z range. |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
89 |
if (c >= 'a' && c <= 'z' || |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
90 |
c >= '&' && c <= ':' || |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
91 |
c >= 'A' && c <= 'Z') { |
42992
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
92 |
continue; |
47024 | 93 |
} else if (c > 0x007F || match(c, L_ENCODED, H_ENCODED)) { |
42992
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
94 |
return i; |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
95 |
} |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
96 |
} |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
97 |
return -1; |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
98 |
} |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
99 |
|
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
100 |
private static String encodePath(String path, int index, char sep) { |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
101 |
char[] pathCC = path.toCharArray(); |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
102 |
char[] retCC = new char[pathCC.length * 2 + 16 - index]; |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
103 |
if (index > 0) { |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
104 |
System.arraycopy(pathCC, 0, retCC, 0, index); |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
105 |
} |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
106 |
int retLen = index; |
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
107 |
|
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
108 |
for (int i = index; i < pathCC.length; i++) { |
2 | 109 |
char c = pathCC[i]; |
42992
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
110 |
if (c == sep) |
2 | 111 |
retCC[retLen++] = '/'; |
112 |
else { |
|
113 |
if (c <= 0x007F) { |
|
114 |
if (c >= 'a' && c <= 'z' || |
|
115 |
c >= 'A' && c <= 'Z' || |
|
116 |
c >= '0' && c <= '9') { |
|
117 |
retCC[retLen++] = c; |
|
47024 | 118 |
} else if (match(c, L_ENCODED, H_ENCODED)) { |
2 | 119 |
retLen = escape(retCC, c, retLen); |
42992
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
120 |
} else { |
2 | 121 |
retCC[retLen++] = c; |
42992
c692f1d73e14
8170785: Excessive allocation in ParseUtil.encodePath
redestad
parents:
32649
diff
changeset
|
122 |
} |
2 | 123 |
} else if (c > 0x07FF) { |
124 |
retLen = escape(retCC, (char)(0xE0 | ((c >> 12) & 0x0F)), retLen); |
|
125 |
retLen = escape(retCC, (char)(0x80 | ((c >> 6) & 0x3F)), retLen); |
|
126 |
retLen = escape(retCC, (char)(0x80 | ((c >> 0) & 0x3F)), retLen); |
|
127 |
} else { |
|
128 |
retLen = escape(retCC, (char)(0xC0 | ((c >> 6) & 0x1F)), retLen); |
|
129 |
retLen = escape(retCC, (char)(0x80 | ((c >> 0) & 0x3F)), retLen); |
|
130 |
} |
|
131 |
} |
|
132 |
//worst case scenario for character [0x7ff-] every single |
|
133 |
//character will be encoded into 9 characters. |
|
134 |
if (retLen + 9 > retCC.length) { |
|
135 |
int newLen = retCC.length * 2 + 16; |
|
136 |
if (newLen < 0) { |
|
137 |
newLen = Integer.MAX_VALUE; |
|
138 |
} |
|
139 |
char[] buf = new char[newLen]; |
|
140 |
System.arraycopy(retCC, 0, buf, 0, retLen); |
|
141 |
retCC = buf; |
|
142 |
} |
|
143 |
} |
|
144 |
return new String(retCC, 0, retLen); |
|
145 |
} |
|
146 |
||
147 |
/** |
|
148 |
* Appends the URL escape sequence for the specified char to the |
|
149 |
* specified StringBuffer. |
|
150 |
*/ |
|
151 |
private static int escape(char[] cc, char c, int index) { |
|
152 |
cc[index++] = '%'; |
|
153 |
cc[index++] = Character.forDigit((c >> 4) & 0xF, 16); |
|
154 |
cc[index++] = Character.forDigit(c & 0xF, 16); |
|
155 |
return index; |
|
156 |
} |
|
157 |
||
158 |
/** |
|
159 |
* Un-escape and return the character at position i in string s. |
|
160 |
*/ |
|
161 |
private static byte unescape(String s, int i) { |
|
26720
6b160d97c51d
8055032: Improve numerical parsing in java.net and sun.net
redestad
parents:
25859
diff
changeset
|
162 |
return (byte) Integer.parseInt(s, i + 1, i + 3, 16); |
2 | 163 |
} |
164 |
||
165 |
||
166 |
/** |
|
167 |
* Returns a new String constructed from the specified String by replacing |
|
168 |
* the URL escape sequences and UTF8 encoding with the characters they |
|
169 |
* represent. |
|
170 |
*/ |
|
171 |
public static String decode(String s) { |
|
172 |
int n = s.length(); |
|
173 |
if ((n == 0) || (s.indexOf('%') < 0)) |
|
174 |
return s; |
|
175 |
||
176 |
StringBuilder sb = new StringBuilder(n); |
|
177 |
ByteBuffer bb = ByteBuffer.allocate(n); |
|
178 |
CharBuffer cb = CharBuffer.allocate(n); |
|
179 |
CharsetDecoder dec = ThreadLocalCoders.decoderFor("UTF-8") |
|
180 |
.onMalformedInput(CodingErrorAction.REPORT) |
|
181 |
.onUnmappableCharacter(CodingErrorAction.REPORT); |
|
182 |
||
183 |
char c = s.charAt(0); |
|
184 |
for (int i = 0; i < n;) { |
|
185 |
assert c == s.charAt(i); |
|
186 |
if (c != '%') { |
|
187 |
sb.append(c); |
|
188 |
if (++i >= n) |
|
189 |
break; |
|
190 |
c = s.charAt(i); |
|
191 |
continue; |
|
192 |
} |
|
193 |
bb.clear(); |
|
194 |
int ui = i; |
|
195 |
for (;;) { |
|
196 |
assert (n - i >= 2); |
|
197 |
try { |
|
198 |
bb.put(unescape(s, i)); |
|
199 |
} catch (NumberFormatException e) { |
|
200 |
throw new IllegalArgumentException(); |
|
201 |
} |
|
202 |
i += 3; |
|
203 |
if (i >= n) |
|
204 |
break; |
|
205 |
c = s.charAt(i); |
|
206 |
if (c != '%') |
|
207 |
break; |
|
208 |
} |
|
209 |
bb.flip(); |
|
210 |
cb.clear(); |
|
211 |
dec.reset(); |
|
212 |
CoderResult cr = dec.decode(bb, cb, true); |
|
213 |
if (cr.isError()) |
|
214 |
throw new IllegalArgumentException("Error decoding percent encoded characters"); |
|
215 |
cr = dec.flush(cb); |
|
216 |
if (cr.isError()) |
|
217 |
throw new IllegalArgumentException("Error decoding percent encoded characters"); |
|
218 |
sb.append(cb.flip().toString()); |
|
219 |
} |
|
220 |
||
221 |
return sb.toString(); |
|
222 |
} |
|
223 |
||
224 |
/** |
|
225 |
* Returns a canonical version of the specified string. |
|
226 |
*/ |
|
48897
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
227 |
public static String canonizeString(String file) { |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
228 |
int len = file.length(); |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
229 |
if (len == 0 || (file.indexOf("./") == -1 && file.charAt(len - 1) != '.')) { |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
230 |
return file; |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
231 |
} else { |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
232 |
return doCanonize(file); |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
233 |
} |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
234 |
} |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
235 |
|
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
236 |
private static String doCanonize(String file) { |
3f19b5965355
8197849: Misc improvements to jar resource handling
redestad
parents:
47216
diff
changeset
|
237 |
int i, lim; |
2 | 238 |
|
239 |
// Remove embedded /../ |
|
240 |
while ((i = file.indexOf("/../")) >= 0) { |
|
241 |
if ((lim = file.lastIndexOf('/', i - 1)) >= 0) { |
|
242 |
file = file.substring(0, lim) + file.substring(i + 3); |
|
243 |
} else { |
|
244 |
file = file.substring(i + 3); |
|
245 |
} |
|
246 |
} |
|
247 |
// Remove embedded /./ |
|
248 |
while ((i = file.indexOf("/./")) >= 0) { |
|
249 |
file = file.substring(0, i) + file.substring(i + 2); |
|
250 |
} |
|
251 |
// Remove trailing .. |
|
252 |
while (file.endsWith("/..")) { |
|
253 |
i = file.indexOf("/.."); |
|
254 |
if ((lim = file.lastIndexOf('/', i - 1)) >= 0) { |
|
255 |
file = file.substring(0, lim+1); |
|
256 |
} else { |
|
257 |
file = file.substring(0, i); |
|
258 |
} |
|
259 |
} |
|
260 |
// Remove trailing . |
|
261 |
if (file.endsWith("/.")) |
|
262 |
file = file.substring(0, file.length() -1); |
|
263 |
||
264 |
return file; |
|
265 |
} |
|
266 |
||
267 |
public static URL fileToEncodedURL(File file) |
|
268 |
throws MalformedURLException |
|
269 |
{ |
|
270 |
String path = file.getAbsolutePath(); |
|
271 |
path = ParseUtil.encodePath(path); |
|
272 |
if (!path.startsWith("/")) { |
|
273 |
path = "/" + path; |
|
274 |
} |
|
275 |
if (!path.endsWith("/") && file.isDirectory()) { |
|
276 |
path = path + "/"; |
|
277 |
} |
|
278 |
return new URL("file", "", path); |
|
279 |
} |
|
280 |
||
281 |
public static java.net.URI toURI(URL url) { |
|
282 |
String protocol = url.getProtocol(); |
|
283 |
String auth = url.getAuthority(); |
|
284 |
String path = url.getPath(); |
|
285 |
String query = url.getQuery(); |
|
286 |
String ref = url.getRef(); |
|
287 |
if (path != null && !(path.startsWith("/"))) |
|
288 |
path = "/" + path; |
|
289 |
||
290 |
// |
|
291 |
// In java.net.URI class, a port number of -1 implies the default |
|
292 |
// port number. So get it stripped off before creating URI instance. |
|
293 |
// |
|
294 |
if (auth != null && auth.endsWith(":-1")) |
|
295 |
auth = auth.substring(0, auth.length() - 3); |
|
296 |
||
297 |
java.net.URI uri; |
|
298 |
try { |
|
299 |
uri = createURI(protocol, auth, path, query, ref); |
|
300 |
} catch (java.net.URISyntaxException e) { |
|
301 |
uri = null; |
|
302 |
} |
|
303 |
return uri; |
|
304 |
} |
|
305 |
||
306 |
// |
|
307 |
// createURI() and its auxiliary code are cloned from java.net.URI. |
|
308 |
// Most of the code are just copy and paste, except that quote() |
|
309 |
// has been modified to avoid double-escape. |
|
310 |
// |
|
311 |
// Usually it is unacceptable, but we're forced to do it because |
|
312 |
// otherwise we need to change public API, namely java.net.URI's |
|
313 |
// multi-argument constructors. It turns out that the changes cause |
|
314 |
// incompatibilities so can't be done. |
|
315 |
// |
|
316 |
private static URI createURI(String scheme, |
|
317 |
String authority, |
|
318 |
String path, |
|
319 |
String query, |
|
320 |
String fragment) throws URISyntaxException |
|
321 |
{ |
|
322 |
String s = toString(scheme, null, |
|
323 |
authority, null, null, -1, |
|
324 |
path, query, fragment); |
|
325 |
checkPath(s, scheme, path); |
|
326 |
return new URI(s); |
|
327 |
} |
|
328 |
||
329 |
private static String toString(String scheme, |
|
330 |
String opaquePart, |
|
331 |
String authority, |
|
332 |
String userInfo, |
|
333 |
String host, |
|
334 |
int port, |
|
335 |
String path, |
|
336 |
String query, |
|
337 |
String fragment) |
|
338 |
{ |
|
339 |
StringBuffer sb = new StringBuffer(); |
|
340 |
if (scheme != null) { |
|
341 |
sb.append(scheme); |
|
342 |
sb.append(':'); |
|
343 |
} |
|
344 |
appendSchemeSpecificPart(sb, opaquePart, |
|
345 |
authority, userInfo, host, port, |
|
346 |
path, query); |
|
347 |
appendFragment(sb, fragment); |
|
348 |
return sb.toString(); |
|
349 |
} |
|
350 |
||
351 |
private static void appendSchemeSpecificPart(StringBuffer sb, |
|
352 |
String opaquePart, |
|
353 |
String authority, |
|
354 |
String userInfo, |
|
355 |
String host, |
|
356 |
int port, |
|
357 |
String path, |
|
358 |
String query) |
|
359 |
{ |
|
360 |
if (opaquePart != null) { |
|
361 |
/* check if SSP begins with an IPv6 address |
|
362 |
* because we must not quote a literal IPv6 address |
|
363 |
*/ |
|
364 |
if (opaquePart.startsWith("//[")) { |
|
24685
215fa91e1b4c
8044461: Cleanup new Boolean and single character strings
rriggs
parents:
5506
diff
changeset
|
365 |
int end = opaquePart.indexOf(']'); |
215fa91e1b4c
8044461: Cleanup new Boolean and single character strings
rriggs
parents:
5506
diff
changeset
|
366 |
if (end != -1 && opaquePart.indexOf(':')!=-1) { |
2 | 367 |
String doquote, dontquote; |
368 |
if (end == opaquePart.length()) { |
|
369 |
dontquote = opaquePart; |
|
370 |
doquote = ""; |
|
371 |
} else { |
|
372 |
dontquote = opaquePart.substring(0,end+1); |
|
373 |
doquote = opaquePart.substring(end+1); |
|
374 |
} |
|
375 |
sb.append (dontquote); |
|
376 |
sb.append(quote(doquote, L_URIC, H_URIC)); |
|
377 |
} |
|
378 |
} else { |
|
379 |
sb.append(quote(opaquePart, L_URIC, H_URIC)); |
|
380 |
} |
|
381 |
} else { |
|
382 |
appendAuthority(sb, authority, userInfo, host, port); |
|
383 |
if (path != null) |
|
384 |
sb.append(quote(path, L_PATH, H_PATH)); |
|
385 |
if (query != null) { |
|
386 |
sb.append('?'); |
|
387 |
sb.append(quote(query, L_URIC, H_URIC)); |
|
388 |
} |
|
389 |
} |
|
390 |
} |
|
391 |
||
392 |
private static void appendAuthority(StringBuffer sb, |
|
393 |
String authority, |
|
394 |
String userInfo, |
|
395 |
String host, |
|
396 |
int port) |
|
397 |
{ |
|
398 |
if (host != null) { |
|
399 |
sb.append("//"); |
|
400 |
if (userInfo != null) { |
|
401 |
sb.append(quote(userInfo, L_USERINFO, H_USERINFO)); |
|
402 |
sb.append('@'); |
|
403 |
} |
|
404 |
boolean needBrackets = ((host.indexOf(':') >= 0) |
|
405 |
&& !host.startsWith("[") |
|
406 |
&& !host.endsWith("]")); |
|
407 |
if (needBrackets) sb.append('['); |
|
408 |
sb.append(host); |
|
409 |
if (needBrackets) sb.append(']'); |
|
410 |
if (port != -1) { |
|
411 |
sb.append(':'); |
|
412 |
sb.append(port); |
|
413 |
} |
|
414 |
} else if (authority != null) { |
|
415 |
sb.append("//"); |
|
416 |
if (authority.startsWith("[")) { |
|
24685
215fa91e1b4c
8044461: Cleanup new Boolean and single character strings
rriggs
parents:
5506
diff
changeset
|
417 |
int end = authority.indexOf(']'); |
215fa91e1b4c
8044461: Cleanup new Boolean and single character strings
rriggs
parents:
5506
diff
changeset
|
418 |
if (end != -1 && authority.indexOf(':')!=-1) { |
2 | 419 |
String doquote, dontquote; |
420 |
if (end == authority.length()) { |
|
421 |
dontquote = authority; |
|
422 |
doquote = ""; |
|
423 |
} else { |
|
424 |
dontquote = authority.substring(0,end+1); |
|
425 |
doquote = authority.substring(end+1); |
|
426 |
} |
|
427 |
sb.append (dontquote); |
|
428 |
sb.append(quote(doquote, |
|
429 |
L_REG_NAME | L_SERVER, |
|
430 |
H_REG_NAME | H_SERVER)); |
|
431 |
} |
|
432 |
} else { |
|
433 |
sb.append(quote(authority, |
|
434 |
L_REG_NAME | L_SERVER, |
|
435 |
H_REG_NAME | H_SERVER)); |
|
436 |
} |
|
437 |
} |
|
438 |
} |
|
439 |
||
440 |
private static void appendFragment(StringBuffer sb, String fragment) { |
|
441 |
if (fragment != null) { |
|
442 |
sb.append('#'); |
|
443 |
sb.append(quote(fragment, L_URIC, H_URIC)); |
|
444 |
} |
|
445 |
} |
|
446 |
||
447 |
// Quote any characters in s that are not permitted |
|
448 |
// by the given mask pair |
|
449 |
// |
|
450 |
private static String quote(String s, long lowMask, long highMask) { |
|
451 |
int n = s.length(); |
|
452 |
StringBuffer sb = null; |
|
453 |
boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0); |
|
454 |
for (int i = 0; i < s.length(); i++) { |
|
455 |
char c = s.charAt(i); |
|
456 |
if (c < '\u0080') { |
|
457 |
if (!match(c, lowMask, highMask) && !isEscaped(s, i)) { |
|
458 |
if (sb == null) { |
|
459 |
sb = new StringBuffer(); |
|
31471
ae27c6f1d8bf
8077242: (str) Optimize AbstractStringBuilder.append(CharSequence, int, int) for String argument
igerasim
parents:
26720
diff
changeset
|
460 |
sb.append(s, 0, i); |
2 | 461 |
} |
462 |
appendEscape(sb, (byte)c); |
|
463 |
} else { |
|
464 |
if (sb != null) |
|
465 |
sb.append(c); |
|
466 |
} |
|
467 |
} else if (allowNonASCII |
|
468 |
&& (Character.isSpaceChar(c) |
|
469 |
|| Character.isISOControl(c))) { |
|
470 |
if (sb == null) { |
|
471 |
sb = new StringBuffer(); |
|
31471
ae27c6f1d8bf
8077242: (str) Optimize AbstractStringBuilder.append(CharSequence, int, int) for String argument
igerasim
parents:
26720
diff
changeset
|
472 |
sb.append(s, 0, i); |
2 | 473 |
} |
474 |
appendEncoded(sb, c); |
|
475 |
} else { |
|
476 |
if (sb != null) |
|
477 |
sb.append(c); |
|
478 |
} |
|
479 |
} |
|
480 |
return (sb == null) ? s : sb.toString(); |
|
481 |
} |
|
482 |
||
483 |
// |
|
484 |
// To check if the given string has an escaped triplet |
|
485 |
// at the given position |
|
486 |
// |
|
487 |
private static boolean isEscaped(String s, int pos) { |
|
488 |
if (s == null || (s.length() <= (pos + 2))) |
|
489 |
return false; |
|
490 |
||
491 |
return s.charAt(pos) == '%' |
|
492 |
&& match(s.charAt(pos + 1), L_HEX, H_HEX) |
|
493 |
&& match(s.charAt(pos + 2), L_HEX, H_HEX); |
|
494 |
} |
|
495 |
||
496 |
private static void appendEncoded(StringBuffer sb, char c) { |
|
497 |
ByteBuffer bb = null; |
|
498 |
try { |
|
499 |
bb = ThreadLocalCoders.encoderFor("UTF-8") |
|
500 |
.encode(CharBuffer.wrap("" + c)); |
|
501 |
} catch (CharacterCodingException x) { |
|
502 |
assert false; |
|
503 |
} |
|
504 |
while (bb.hasRemaining()) { |
|
505 |
int b = bb.get() & 0xff; |
|
506 |
if (b >= 0x80) |
|
507 |
appendEscape(sb, (byte)b); |
|
508 |
else |
|
509 |
sb.append((char)b); |
|
510 |
} |
|
511 |
} |
|
512 |
||
32649
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
31471
diff
changeset
|
513 |
private static final char[] hexDigits = { |
2 | 514 |
'0', '1', '2', '3', '4', '5', '6', '7', |
515 |
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F' |
|
516 |
}; |
|
517 |
||
518 |
private static void appendEscape(StringBuffer sb, byte b) { |
|
519 |
sb.append('%'); |
|
520 |
sb.append(hexDigits[(b >> 4) & 0x0f]); |
|
521 |
sb.append(hexDigits[(b >> 0) & 0x0f]); |
|
522 |
} |
|
523 |
||
524 |
// Tell whether the given character is permitted by the given mask pair |
|
525 |
private static boolean match(char c, long lowMask, long highMask) { |
|
526 |
if (c < 64) |
|
527 |
return ((1L << c) & lowMask) != 0; |
|
528 |
if (c < 128) |
|
529 |
return ((1L << (c - 64)) & highMask) != 0; |
|
530 |
return false; |
|
531 |
} |
|
532 |
||
533 |
// If a scheme is given then the path, if given, must be absolute |
|
534 |
// |
|
535 |
private static void checkPath(String s, String scheme, String path) |
|
536 |
throws URISyntaxException |
|
537 |
{ |
|
538 |
if (scheme != null) { |
|
53018
8bf9268df0e2
8215281: Use String.isEmpty() when applicable in java.base
redestad
parents:
48897
diff
changeset
|
539 |
if (path != null && !path.isEmpty() && path.charAt(0) != '/') |
2 | 540 |
throw new URISyntaxException(s, |
541 |
"Relative path in absolute URI"); |
|
542 |
} |
|
543 |
} |
|
544 |
||
545 |
||
546 |
// -- Character classes for parsing -- |
|
547 |
||
47024 | 548 |
// To save startup time, we manually calculate the low-/highMask constants. |
549 |
// For reference, the following methods were used to calculate the values: |
|
550 |
||
2 | 551 |
// Compute a low-order mask for the characters |
552 |
// between first and last, inclusive |
|
47024 | 553 |
// private static long lowMask(char first, char last) { |
554 |
// long m = 0; |
|
555 |
// int f = Math.max(Math.min(first, 63), 0); |
|
556 |
// int l = Math.max(Math.min(last, 63), 0); |
|
557 |
// for (int i = f; i <= l; i++) |
|
558 |
// m |= 1L << i; |
|
559 |
// return m; |
|
560 |
// } |
|
2 | 561 |
|
562 |
// Compute the low-order mask for the characters in the given string |
|
47024 | 563 |
// private static long lowMask(String chars) { |
564 |
// int n = chars.length(); |
|
565 |
// long m = 0; |
|
566 |
// for (int i = 0; i < n; i++) { |
|
567 |
// char c = chars.charAt(i); |
|
568 |
// if (c < 64) |
|
569 |
// m |= (1L << c); |
|
570 |
// } |
|
571 |
// return m; |
|
572 |
// } |
|
2 | 573 |
|
574 |
// Compute a high-order mask for the characters |
|
575 |
// between first and last, inclusive |
|
47024 | 576 |
// private static long highMask(char first, char last) { |
577 |
// long m = 0; |
|
578 |
// int f = Math.max(Math.min(first, 127), 64) - 64; |
|
579 |
// int l = Math.max(Math.min(last, 127), 64) - 64; |
|
580 |
// for (int i = f; i <= l; i++) |
|
581 |
// m |= 1L << i; |
|
582 |
// return m; |
|
583 |
// } |
|
2 | 584 |
|
585 |
// Compute the high-order mask for the characters in the given string |
|
47024 | 586 |
// private static long highMask(String chars) { |
587 |
// int n = chars.length(); |
|
588 |
// long m = 0; |
|
589 |
// for (int i = 0; i < n; i++) { |
|
590 |
// char c = chars.charAt(i); |
|
591 |
// if ((c >= 64) && (c < 128)) |
|
592 |
// m |= (1L << (c - 64)); |
|
593 |
// } |
|
594 |
// return m; |
|
595 |
// } |
|
2 | 596 |
|
597 |
||
598 |
// Character-class masks |
|
599 |
||
600 |
// digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | |
|
601 |
// "8" | "9" |
|
47024 | 602 |
private static final long L_DIGIT = 0x3FF000000000000L; // lowMask('0', '9'); |
2 | 603 |
private static final long H_DIGIT = 0L; |
604 |
||
605 |
// hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | |
|
606 |
// "a" | "b" | "c" | "d" | "e" | "f" |
|
607 |
private static final long L_HEX = L_DIGIT; |
|
47024 | 608 |
private static final long H_HEX = 0x7E0000007EL; // highMask('A', 'F') | highMask('a', 'f'); |
2 | 609 |
|
610 |
// upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | |
|
611 |
// "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | |
|
612 |
// "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" |
|
613 |
private static final long L_UPALPHA = 0L; |
|
47024 | 614 |
private static final long H_UPALPHA = 0x7FFFFFEL; // highMask('A', 'Z'); |
2 | 615 |
|
616 |
// lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | |
|
617 |
// "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | |
|
618 |
// "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" |
|
619 |
private static final long L_LOWALPHA = 0L; |
|
47024 | 620 |
private static final long H_LOWALPHA = 0x7FFFFFE00000000L; // highMask('a', 'z'); |
2 | 621 |
|
622 |
// alpha = lowalpha | upalpha |
|
623 |
private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA; |
|
624 |
private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA; |
|
625 |
||
626 |
// alphanum = alpha | digit |
|
627 |
private static final long L_ALPHANUM = L_DIGIT | L_ALPHA; |
|
628 |
private static final long H_ALPHANUM = H_DIGIT | H_ALPHA; |
|
629 |
||
630 |
// mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | |
|
631 |
// "(" | ")" |
|
47024 | 632 |
private static final long L_MARK = 0x678200000000L; // lowMask("-_.!~*'()"); |
633 |
private static final long H_MARK = 0x4000000080000000L; // highMask("-_.!~*'()"); |
|
2 | 634 |
|
635 |
// unreserved = alphanum | mark |
|
636 |
private static final long L_UNRESERVED = L_ALPHANUM | L_MARK; |
|
637 |
private static final long H_UNRESERVED = H_ALPHANUM | H_MARK; |
|
638 |
||
639 |
// reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | |
|
640 |
// "$" | "," | "[" | "]" |
|
641 |
// Added per RFC2732: "[", "]" |
|
47024 | 642 |
private static final long L_RESERVED = 0xAC00985000000000L; // lowMask(";/?:@&=+$,[]"); |
643 |
private static final long H_RESERVED = 0x28000001L; // highMask(";/?:@&=+$,[]"); |
|
2 | 644 |
|
645 |
// The zero'th bit is used to indicate that escape pairs and non-US-ASCII |
|
646 |
// characters are allowed; this is handled by the scanEscape method below. |
|
647 |
private static final long L_ESCAPED = 1L; |
|
648 |
private static final long H_ESCAPED = 0L; |
|
649 |
||
650 |
// uric = reserved | unreserved | escaped |
|
651 |
private static final long L_URIC = L_RESERVED | L_UNRESERVED | L_ESCAPED; |
|
652 |
private static final long H_URIC = H_RESERVED | H_UNRESERVED | H_ESCAPED; |
|
653 |
||
654 |
// pchar = unreserved | escaped | |
|
655 |
// ":" | "@" | "&" | "=" | "+" | "$" | "," |
|
656 |
private static final long L_PCHAR |
|
47024 | 657 |
= L_UNRESERVED | L_ESCAPED | 0x2400185000000000L; // lowMask(":@&=+$,"); |
2 | 658 |
private static final long H_PCHAR |
47024 | 659 |
= H_UNRESERVED | H_ESCAPED | 0x1L; // highMask(":@&=+$,"); |
2 | 660 |
|
661 |
// All valid path characters |
|
47024 | 662 |
private static final long L_PATH = L_PCHAR | 0x800800000000000L; // lowMask(";/"); |
663 |
private static final long H_PATH = H_PCHAR; // highMask(";/") == 0x0L; |
|
664 |
||
665 |
// Dash, for use in domainlabel and toplabel |
|
666 |
private static final long L_DASH = 0x200000000000L; // lowMask("-"); |
|
667 |
private static final long H_DASH = 0x0L; // highMask("-"); |
|
2 | 668 |
|
669 |
// userinfo = *( unreserved | escaped | |
|
670 |
// ";" | ":" | "&" | "=" | "+" | "$" | "," ) |
|
671 |
private static final long L_USERINFO |
|
47024 | 672 |
= L_UNRESERVED | L_ESCAPED | 0x2C00185000000000L; // lowMask(";:&=+$,"); |
2 | 673 |
private static final long H_USERINFO |
47024 | 674 |
= H_UNRESERVED | H_ESCAPED; // | highMask(";:&=+$,") == 0L; |
2 | 675 |
|
676 |
// reg_name = 1*( unreserved | escaped | "$" | "," | |
|
677 |
// ";" | ":" | "@" | "&" | "=" | "+" ) |
|
678 |
private static final long L_REG_NAME |
|
47024 | 679 |
= L_UNRESERVED | L_ESCAPED | 0x2C00185000000000L; // lowMask("$,;:@&=+"); |
2 | 680 |
private static final long H_REG_NAME |
47024 | 681 |
= H_UNRESERVED | H_ESCAPED | 0x1L; // highMask("$,;:@&=+"); |
2 | 682 |
|
683 |
// All valid characters for server-based authorities |
|
684 |
private static final long L_SERVER |
|
47024 | 685 |
= L_USERINFO | L_ALPHANUM | L_DASH | 0x400400000000000L; // lowMask(".:@[]"); |
2 | 686 |
private static final long H_SERVER |
47024 | 687 |
= H_USERINFO | H_ALPHANUM | H_DASH | 0x28000001L; // highMask(".:@[]"); |
688 |
||
689 |
// Characters that are encoded in the path component of a URI. |
|
690 |
// |
|
691 |
// These characters are reserved in the path segment as described in |
|
692 |
// RFC2396 section 3.3: |
|
693 |
// "=" | ";" | "?" | "/" |
|
694 |
// |
|
695 |
// These characters are defined as excluded in RFC2396 section 2.4.3 |
|
696 |
// and must be escaped if they occur in the data part of a URI: |
|
697 |
// "#" | " " | "<" | ">" | "%" | "\"" | "{" | "}" | "|" | "\\" | "^" | |
|
698 |
// "[" | "]" | "`" |
|
699 |
// |
|
700 |
// Also US ASCII control characters 00-1F and 7F. |
|
701 |
||
702 |
// lowMask((char)0, (char)31) | lowMask("=;?/# <>%\"{}|\\^[]`"); |
|
703 |
private static final long L_ENCODED = 0xF800802DFFFFFFFFL; |
|
704 |
||
705 |
// highMask((char)0x7F, (char)0x7F) | highMask("=;?/# <>%\"{}|\\^[]`"); |
|
706 |
private static final long H_ENCODED = 0xB800000178000000L; |
|
707 |
||
2 | 708 |
} |