|
1 /* |
|
2 * Copyright 1998-2006 Sun Microsystems, Inc. All Rights Reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Sun designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Sun in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
|
22 * CA 95054 USA or visit www.sun.com if you need additional information or |
|
23 * have any questions. |
|
24 */ |
|
25 |
|
26 package java.net; |
|
27 |
|
28 import java.io.*; |
|
29 |
|
30 /** |
|
31 * Utility class for HTML form decoding. This class contains static methods |
|
32 * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE> |
|
33 * MIME format. |
|
34 * <p> |
|
35 * The conversion process is the reverse of that used by the URLEncoder class. It is assumed |
|
36 * that all characters in the encoded string are one of the following: |
|
37 * "<code>a</code>" through "<code>z</code>", |
|
38 * "<code>A</code>" through "<code>Z</code>", |
|
39 * "<code>0</code>" through "<code>9</code>", and |
|
40 * "<code>-</code>", "<code>_</code>", |
|
41 * "<code>.</code>", and "<code>*</code>". The |
|
42 * character "<code>%</code>" is allowed but is interpreted |
|
43 * as the start of a special escaped sequence. |
|
44 * <p> |
|
45 * The following rules are applied in the conversion: |
|
46 * <p> |
|
47 * <ul> |
|
48 * <li>The alphanumeric characters "<code>a</code>" through |
|
49 * "<code>z</code>", "<code>A</code>" through |
|
50 * "<code>Z</code>" and "<code>0</code>" |
|
51 * through "<code>9</code>" remain the same. |
|
52 * <li>The special characters "<code>.</code>", |
|
53 * "<code>-</code>", "<code>*</code>", and |
|
54 * "<code>_</code>" remain the same. |
|
55 * <li>The plus sign "<code>+</code>" is converted into a |
|
56 * space character "<code> </code>" . |
|
57 * <li>A sequence of the form "<code>%<i>xy</i></code>" will be |
|
58 * treated as representing a byte where <i>xy</i> is the two-digit |
|
59 * hexadecimal representation of the 8 bits. Then, all substrings |
|
60 * that contain one or more of these byte sequences consecutively |
|
61 * will be replaced by the character(s) whose encoding would result |
|
62 * in those consecutive bytes. |
|
63 * The encoding scheme used to decode these characters may be specified, |
|
64 * or if unspecified, the default encoding of the platform will be used. |
|
65 * </ul> |
|
66 * <p> |
|
67 * There are two possible ways in which this decoder could deal with |
|
68 * illegal strings. It could either leave illegal characters alone or |
|
69 * it could throw an <tt>{@link java.lang.IllegalArgumentException}</tt>. |
|
70 * Which approach the decoder takes is left to the |
|
71 * implementation. |
|
72 * |
|
73 * @author Mark Chamness |
|
74 * @author Michael McCloskey |
|
75 * @since 1.2 |
|
76 */ |
|
77 |
|
78 public class URLDecoder { |
|
79 |
|
80 // The platform default encoding |
|
81 static String dfltEncName = URLEncoder.dfltEncName; |
|
82 |
|
83 /** |
|
84 * Decodes a <code>x-www-form-urlencoded</code> string. |
|
85 * The platform's default encoding is used to determine what characters |
|
86 * are represented by any consecutive sequences of the form |
|
87 * "<code>%<i>xy</i></code>". |
|
88 * @param s the <code>String</code> to decode |
|
89 * @deprecated The resulting string may vary depending on the platform's |
|
90 * default encoding. Instead, use the decode(String,String) method |
|
91 * to specify the encoding. |
|
92 * @return the newly decoded <code>String</code> |
|
93 */ |
|
94 @Deprecated |
|
95 public static String decode(String s) { |
|
96 |
|
97 String str = null; |
|
98 |
|
99 try { |
|
100 str = decode(s, dfltEncName); |
|
101 } catch (UnsupportedEncodingException e) { |
|
102 // The system should always have the platform default |
|
103 } |
|
104 |
|
105 return str; |
|
106 } |
|
107 |
|
108 /** |
|
109 * Decodes a <code>application/x-www-form-urlencoded</code> string using a specific |
|
110 * encoding scheme. |
|
111 * The supplied encoding is used to determine |
|
112 * what characters are represented by any consecutive sequences of the |
|
113 * form "<code>%<i>xy</i></code>". |
|
114 * <p> |
|
115 * <em><strong>Note:</strong> The <a href= |
|
116 * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"> |
|
117 * World Wide Web Consortium Recommendation</a> states that |
|
118 * UTF-8 should be used. Not doing so may introduce |
|
119 * incompatibilites.</em> |
|
120 * |
|
121 * @param s the <code>String</code> to decode |
|
122 * @param enc The name of a supported |
|
123 * <a href="../lang/package-summary.html#charenc">character |
|
124 * encoding</a>. |
|
125 * @return the newly decoded <code>String</code> |
|
126 * @exception UnsupportedEncodingException |
|
127 * If character encoding needs to be consulted, but |
|
128 * named character encoding is not supported |
|
129 * @see URLEncoder#encode(java.lang.String, java.lang.String) |
|
130 * @since 1.4 |
|
131 */ |
|
132 public static String decode(String s, String enc) |
|
133 throws UnsupportedEncodingException{ |
|
134 |
|
135 boolean needToChange = false; |
|
136 int numChars = s.length(); |
|
137 StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars); |
|
138 int i = 0; |
|
139 |
|
140 if (enc.length() == 0) { |
|
141 throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter"); |
|
142 } |
|
143 |
|
144 char c; |
|
145 byte[] bytes = null; |
|
146 while (i < numChars) { |
|
147 c = s.charAt(i); |
|
148 switch (c) { |
|
149 case '+': |
|
150 sb.append(' '); |
|
151 i++; |
|
152 needToChange = true; |
|
153 break; |
|
154 case '%': |
|
155 /* |
|
156 * Starting with this instance of %, process all |
|
157 * consecutive substrings of the form %xy. Each |
|
158 * substring %xy will yield a byte. Convert all |
|
159 * consecutive bytes obtained this way to whatever |
|
160 * character(s) they represent in the provided |
|
161 * encoding. |
|
162 */ |
|
163 |
|
164 try { |
|
165 |
|
166 // (numChars-i)/3 is an upper bound for the number |
|
167 // of remaining bytes |
|
168 if (bytes == null) |
|
169 bytes = new byte[(numChars-i)/3]; |
|
170 int pos = 0; |
|
171 |
|
172 while ( ((i+2) < numChars) && |
|
173 (c=='%')) { |
|
174 int v = Integer.parseInt(s.substring(i+1,i+3),16); |
|
175 if (v < 0) |
|
176 throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value"); |
|
177 bytes[pos++] = (byte) v; |
|
178 i+= 3; |
|
179 if (i < numChars) |
|
180 c = s.charAt(i); |
|
181 } |
|
182 |
|
183 // A trailing, incomplete byte encoding such as |
|
184 // "%x" will cause an exception to be thrown |
|
185 |
|
186 if ((i < numChars) && (c=='%')) |
|
187 throw new IllegalArgumentException( |
|
188 "URLDecoder: Incomplete trailing escape (%) pattern"); |
|
189 |
|
190 sb.append(new String(bytes, 0, pos, enc)); |
|
191 } catch (NumberFormatException e) { |
|
192 throw new IllegalArgumentException( |
|
193 "URLDecoder: Illegal hex characters in escape (%) pattern - " |
|
194 + e.getMessage()); |
|
195 } |
|
196 needToChange = true; |
|
197 break; |
|
198 default: |
|
199 sb.append(c); |
|
200 i++; |
|
201 break; |
|
202 } |
|
203 } |
|
204 |
|
205 return (needToChange? sb.toString() : s); |
|
206 } |
|
207 } |