jdk/src/share/classes/java/net/URLDecoder.java
author never
Mon, 12 Jul 2010 22:27:18 -0700
changeset 5926 a36f90d986b6
parent 5506 202f599c92aa
child 19069 1d9cb0d080e3
permissions -rw-r--r--
6968385: malformed xml in sweeper logging Reviewed-by: kvn
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     2
 * Copyright (c) 1998, 2006, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
package java.net;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
import java.io.*;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
 * Utility class for HTML form decoding. This class contains static methods
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
 * MIME format.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
 * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
 * that all characters in the encoded string are one of the following:
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
 * &quot;<code>a</code>&quot; through &quot;<code>z</code>&quot;,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
 * &quot;<code>A</code>&quot; through &quot;<code>Z</code>&quot;,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
 * &quot;<code>0</code>&quot; through &quot;<code>9</code>&quot;, and
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
 * &quot;<code>-</code>&quot;, &quot;<code>_</code>&quot;,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
 * &quot;<code>.</code>&quot;, and &quot;<code>*</code>&quot;. The
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
 * character &quot;<code>%</code>&quot; is allowed but is interpreted
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
 * as the start of a special escaped sequence.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
 * The following rules are applied in the conversion:
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
 * <ul>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
 * <li>The alphanumeric characters &quot;<code>a</code>&quot; through
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
 *     &quot;<code>z</code>&quot;, &quot;<code>A</code>&quot; through
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
 *     &quot;<code>Z</code>&quot; and &quot;<code>0</code>&quot;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
 *     through &quot;<code>9</code>&quot; remain the same.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
 * <li>The special characters &quot;<code>.</code>&quot;,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
 *     &quot;<code>-</code>&quot;, &quot;<code>*</code>&quot;, and
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
 *     &quot;<code>_</code>&quot; remain the same.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
 * <li>The plus sign &quot;<code>+</code>&quot; is converted into a
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
 *     space character &quot;<code>&nbsp;</code>&quot; .
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
 * <li>A sequence of the form "<code>%<i>xy</i></code>" will be
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 *     treated as representing a byte where <i>xy</i> is the two-digit
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 *     hexadecimal representation of the 8 bits. Then, all substrings
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
 *     that contain one or more of these byte sequences consecutively
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
 *     will be replaced by the character(s) whose encoding would result
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
 *     in those consecutive bytes.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
 *     The encoding scheme used to decode these characters may be specified,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
 *     or if unspecified, the default encoding of the platform will be used.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
 * </ul>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
 * There are two possible ways in which this decoder could deal with
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
 * illegal strings.  It could either leave illegal characters alone or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
 * it could throw an <tt>{@link java.lang.IllegalArgumentException}</tt>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
 * Which approach the decoder takes is left to the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
 * implementation.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
 * @author  Mark Chamness
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
 * @author  Michael McCloskey
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
 * @since   1.2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
public class URLDecoder {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
    // The platform default encoding
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
    static String dfltEncName = URLEncoder.dfltEncName;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
     * Decodes a <code>x-www-form-urlencoded</code> string.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
     * The platform's default encoding is used to determine what characters
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
     * are represented by any consecutive sequences of the form
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
     * "<code>%<i>xy</i></code>".
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
     * @param s the <code>String</code> to decode
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
     * @deprecated The resulting string may vary depending on the platform's
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
     *          default encoding. Instead, use the decode(String,String) method
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
     *          to specify the encoding.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
     * @return the newly decoded <code>String</code>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
    @Deprecated
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
    public static String decode(String s) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
        String str = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
        try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
            str = decode(s, dfltEncName);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
        } catch (UnsupportedEncodingException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
            // The system should always have the platform default
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
        return str;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
     * Decodes a <code>application/x-www-form-urlencoded</code> string using a specific
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
     * encoding scheme.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
     * The supplied encoding is used to determine
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
     * what characters are represented by any consecutive sequences of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
     * form "<code>%<i>xy</i></code>".
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
     * <em><strong>Note:</strong> The <a href=
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
     * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
     * World Wide Web Consortium Recommendation</a> states that
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
     * UTF-8 should be used. Not doing so may introduce
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
     * incompatibilites.</em>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
     * @param s the <code>String</code> to decode
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
     * @param enc   The name of a supported
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
     *    <a href="../lang/package-summary.html#charenc">character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
     *    encoding</a>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
     * @return the newly decoded <code>String</code>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
     * @exception  UnsupportedEncodingException
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
     *             If character encoding needs to be consulted, but
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
     *             named character encoding is not supported
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
     * @see URLEncoder#encode(java.lang.String, java.lang.String)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
     * @since 1.4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
    public static String decode(String s, String enc)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
        throws UnsupportedEncodingException{
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
        boolean needToChange = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
        int numChars = s.length();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
        StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
        int i = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
        if (enc.length() == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
            throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
        char c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
        byte[] bytes = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
        while (i < numChars) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
            c = s.charAt(i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
            switch (c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
            case '+':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
                sb.append(' ');
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
                i++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
                needToChange = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
            case '%':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
                /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
                 * Starting with this instance of %, process all
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
                 * consecutive substrings of the form %xy. Each
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
                 * substring %xy will yield a byte. Convert all
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
                 * consecutive  bytes obtained this way to whatever
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
                 * character(s) they represent in the provided
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
                 * encoding.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
                 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
                try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
                    // (numChars-i)/3 is an upper bound for the number
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
                    // of remaining bytes
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
                    if (bytes == null)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
                        bytes = new byte[(numChars-i)/3];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
                    int pos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
                    while ( ((i+2) < numChars) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
                            (c=='%')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
                        int v = Integer.parseInt(s.substring(i+1,i+3),16);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
                        if (v < 0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
                            throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
                        bytes[pos++] = (byte) v;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
                        i+= 3;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
                        if (i < numChars)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
                            c = s.charAt(i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
                    // A trailing, incomplete byte encoding such as
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
                    // "%x" will cause an exception to be thrown
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
                    if ((i < numChars) && (c=='%'))
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
                        throw new IllegalArgumentException(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
                         "URLDecoder: Incomplete trailing escape (%) pattern");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
                    sb.append(new String(bytes, 0, pos, enc));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
                } catch (NumberFormatException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
                    throw new IllegalArgumentException(
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
                    "URLDecoder: Illegal hex characters in escape (%) pattern - "
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
                    + e.getMessage());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
                needToChange = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
            default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
                sb.append(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
                i++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
        return (needToChange? sb.toString() : s);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
}