src/java.base/share/classes/java/net/URLDecoder.java
changeset 47216 71c04702a3d5
parent 37880 60ec48925dc6
child 48252 77b88d8f8380
equal deleted inserted replaced
47215:4ebc2e2fb97c 47216:71c04702a3d5
       
     1 /*
       
     2  * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package java.net;
       
    27 
       
    28 import java.io.*;
       
    29 
       
    30 /**
       
    31  * Utility class for HTML form decoding. This class contains static methods
       
    32  * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
       
    33  * MIME format.
       
    34  * <p>
       
    35  * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
       
    36  * that all characters in the encoded string are one of the following:
       
    37  * &quot;{@code a}&quot; through &quot;{@code z}&quot;,
       
    38  * &quot;{@code A}&quot; through &quot;{@code Z}&quot;,
       
    39  * &quot;{@code 0}&quot; through &quot;{@code 9}&quot;, and
       
    40  * &quot;{@code -}&quot;, &quot;{@code _}&quot;,
       
    41  * &quot;{@code .}&quot;, and &quot;{@code *}&quot;. The
       
    42  * character &quot;{@code %}&quot; is allowed but is interpreted
       
    43  * as the start of a special escaped sequence.
       
    44  * <p>
       
    45  * The following rules are applied in the conversion:
       
    46  *
       
    47  * <ul>
       
    48  * <li>The alphanumeric characters &quot;{@code a}&quot; through
       
    49  *     &quot;{@code z}&quot;, &quot;{@code A}&quot; through
       
    50  *     &quot;{@code Z}&quot; and &quot;{@code 0}&quot;
       
    51  *     through &quot;{@code 9}&quot; remain the same.
       
    52  * <li>The special characters &quot;{@code .}&quot;,
       
    53  *     &quot;{@code -}&quot;, &quot;{@code *}&quot;, and
       
    54  *     &quot;{@code _}&quot; remain the same.
       
    55  * <li>The plus sign &quot;{@code +}&quot; is converted into a
       
    56  *     space character &quot; &nbsp; &quot; .
       
    57  * <li>A sequence of the form "<i>{@code %xy}</i>" will be
       
    58  *     treated as representing a byte where <i>xy</i> is the two-digit
       
    59  *     hexadecimal representation of the 8 bits. Then, all substrings
       
    60  *     that contain one or more of these byte sequences consecutively
       
    61  *     will be replaced by the character(s) whose encoding would result
       
    62  *     in those consecutive bytes.
       
    63  *     The encoding scheme used to decode these characters may be specified,
       
    64  *     or if unspecified, the default encoding of the platform will be used.
       
    65  * </ul>
       
    66  * <p>
       
    67  * There are two possible ways in which this decoder could deal with
       
    68  * illegal strings.  It could either leave illegal characters alone or
       
    69  * it could throw an {@link java.lang.IllegalArgumentException}.
       
    70  * Which approach the decoder takes is left to the
       
    71  * implementation.
       
    72  *
       
    73  * @author  Mark Chamness
       
    74  * @author  Michael McCloskey
       
    75  * @since   1.2
       
    76  */
       
    77 
       
    78 public class URLDecoder {
       
    79 
       
    80     // The platform default encoding
       
    81     static String dfltEncName = URLEncoder.dfltEncName;
       
    82 
       
    83     /**
       
    84      * Decodes a {@code x-www-form-urlencoded} string.
       
    85      * The platform's default encoding is used to determine what characters
       
    86      * are represented by any consecutive sequences of the form
       
    87      * "<i>{@code %xy}</i>".
       
    88      * @param s the {@code String} to decode
       
    89      * @deprecated The resulting string may vary depending on the platform's
       
    90      *          default encoding. Instead, use the decode(String,String) method
       
    91      *          to specify the encoding.
       
    92      * @return the newly decoded {@code String}
       
    93      */
       
    94     @Deprecated
       
    95     public static String decode(String s) {
       
    96 
       
    97         String str = null;
       
    98 
       
    99         try {
       
   100             str = decode(s, dfltEncName);
       
   101         } catch (UnsupportedEncodingException e) {
       
   102             // The system should always have the platform default
       
   103         }
       
   104 
       
   105         return str;
       
   106     }
       
   107 
       
   108     /**
       
   109      * Decodes an {@code application/x-www-form-urlencoded} string using
       
   110      * a specific encoding scheme.
       
   111      * The supplied encoding is used to determine
       
   112      * what characters are represented by any consecutive sequences of the
       
   113      * form "<i>{@code %xy}</i>".
       
   114      * <p>
       
   115      * <em><strong>Note:</strong> The <a href=
       
   116      * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
       
   117      * World Wide Web Consortium Recommendation</a> states that
       
   118      * UTF-8 should be used. Not doing so may introduce
       
   119      * incompatibilities.</em>
       
   120      *
       
   121      * @param s the {@code String} to decode
       
   122      * @param enc   The name of a supported
       
   123      *    <a href="../lang/package-summary.html#charenc">character
       
   124      *    encoding</a>.
       
   125      * @return the newly decoded {@code String}
       
   126      * @exception  UnsupportedEncodingException
       
   127      *             If character encoding needs to be consulted, but
       
   128      *             named character encoding is not supported
       
   129      * @see URLEncoder#encode(java.lang.String, java.lang.String)
       
   130      * @since 1.4
       
   131      */
       
   132     public static String decode(String s, String enc)
       
   133         throws UnsupportedEncodingException{
       
   134 
       
   135         boolean needToChange = false;
       
   136         int numChars = s.length();
       
   137         StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars);
       
   138         int i = 0;
       
   139 
       
   140         if (enc.length() == 0) {
       
   141             throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
       
   142         }
       
   143 
       
   144         char c;
       
   145         byte[] bytes = null;
       
   146         while (i < numChars) {
       
   147             c = s.charAt(i);
       
   148             switch (c) {
       
   149             case '+':
       
   150                 sb.append(' ');
       
   151                 i++;
       
   152                 needToChange = true;
       
   153                 break;
       
   154             case '%':
       
   155                 /*
       
   156                  * Starting with this instance of %, process all
       
   157                  * consecutive substrings of the form %xy. Each
       
   158                  * substring %xy will yield a byte. Convert all
       
   159                  * consecutive  bytes obtained this way to whatever
       
   160                  * character(s) they represent in the provided
       
   161                  * encoding.
       
   162                  */
       
   163 
       
   164                 try {
       
   165 
       
   166                     // (numChars-i)/3 is an upper bound for the number
       
   167                     // of remaining bytes
       
   168                     if (bytes == null)
       
   169                         bytes = new byte[(numChars-i)/3];
       
   170                     int pos = 0;
       
   171 
       
   172                     while ( ((i+2) < numChars) &&
       
   173                             (c=='%')) {
       
   174                         int v = Integer.parseInt(s, i + 1, i + 3, 16);
       
   175                         if (v < 0)
       
   176                             throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value");
       
   177                         bytes[pos++] = (byte) v;
       
   178                         i+= 3;
       
   179                         if (i < numChars)
       
   180                             c = s.charAt(i);
       
   181                     }
       
   182 
       
   183                     // A trailing, incomplete byte encoding such as
       
   184                     // "%x" will cause an exception to be thrown
       
   185 
       
   186                     if ((i < numChars) && (c=='%'))
       
   187                         throw new IllegalArgumentException(
       
   188                          "URLDecoder: Incomplete trailing escape (%) pattern");
       
   189 
       
   190                     sb.append(new String(bytes, 0, pos, enc));
       
   191                 } catch (NumberFormatException e) {
       
   192                     throw new IllegalArgumentException(
       
   193                     "URLDecoder: Illegal hex characters in escape (%) pattern - "
       
   194                     + e.getMessage());
       
   195                 }
       
   196                 needToChange = true;
       
   197                 break;
       
   198             default:
       
   199                 sb.append(c);
       
   200                 i++;
       
   201                 break;
       
   202             }
       
   203         }
       
   204 
       
   205         return (needToChange? sb.toString() : s);
       
   206     }
       
   207 }