jdk/src/share/classes/java/net/URLDecoder.java
changeset 2 90ce3da70b43
child 5506 202f599c92aa
equal deleted inserted replaced
0:fd16c54261b3 2:90ce3da70b43
       
     1 /*
       
     2  * Copyright 1998-2006 Sun Microsystems, Inc.  All Rights Reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Sun designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Sun in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
       
    22  * CA 95054 USA or visit www.sun.com if you need additional information or
       
    23  * have any questions.
       
    24  */
       
    25 
       
    26 package java.net;
       
    27 
       
    28 import java.io.*;
       
    29 
       
    30 /**
       
    31  * Utility class for HTML form decoding. This class contains static methods
       
    32  * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
       
    33  * MIME format.
       
    34  * <p>
       
    35  * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
       
    36  * that all characters in the encoded string are one of the following:
       
    37  * &quot;<code>a</code>&quot; through &quot;<code>z</code>&quot;,
       
    38  * &quot;<code>A</code>&quot; through &quot;<code>Z</code>&quot;,
       
    39  * &quot;<code>0</code>&quot; through &quot;<code>9</code>&quot;, and
       
    40  * &quot;<code>-</code>&quot;, &quot;<code>_</code>&quot;,
       
    41  * &quot;<code>.</code>&quot;, and &quot;<code>*</code>&quot;. The
       
    42  * character &quot;<code>%</code>&quot; is allowed but is interpreted
       
    43  * as the start of a special escaped sequence.
       
    44  * <p>
       
    45  * The following rules are applied in the conversion:
       
    46  * <p>
       
    47  * <ul>
       
    48  * <li>The alphanumeric characters &quot;<code>a</code>&quot; through
       
    49  *     &quot;<code>z</code>&quot;, &quot;<code>A</code>&quot; through
       
    50  *     &quot;<code>Z</code>&quot; and &quot;<code>0</code>&quot;
       
    51  *     through &quot;<code>9</code>&quot; remain the same.
       
    52  * <li>The special characters &quot;<code>.</code>&quot;,
       
    53  *     &quot;<code>-</code>&quot;, &quot;<code>*</code>&quot;, and
       
    54  *     &quot;<code>_</code>&quot; remain the same.
       
    55  * <li>The plus sign &quot;<code>+</code>&quot; is converted into a
       
    56  *     space character &quot;<code>&nbsp;</code>&quot; .
       
    57  * <li>A sequence of the form "<code>%<i>xy</i></code>" will be
       
    58  *     treated as representing a byte where <i>xy</i> is the two-digit
       
    59  *     hexadecimal representation of the 8 bits. Then, all substrings
       
    60  *     that contain one or more of these byte sequences consecutively
       
    61  *     will be replaced by the character(s) whose encoding would result
       
    62  *     in those consecutive bytes.
       
    63  *     The encoding scheme used to decode these characters may be specified,
       
    64  *     or if unspecified, the default encoding of the platform will be used.
       
    65  * </ul>
       
    66  * <p>
       
    67  * There are two possible ways in which this decoder could deal with
       
    68  * illegal strings.  It could either leave illegal characters alone or
       
    69  * it could throw an <tt>{@link java.lang.IllegalArgumentException}</tt>.
       
    70  * Which approach the decoder takes is left to the
       
    71  * implementation.
       
    72  *
       
    73  * @author  Mark Chamness
       
    74  * @author  Michael McCloskey
       
    75  * @since   1.2
       
    76  */
       
    77 
       
    78 public class URLDecoder {
       
    79 
       
    80     // The platform default encoding
       
    81     static String dfltEncName = URLEncoder.dfltEncName;
       
    82 
       
    83     /**
       
    84      * Decodes a <code>x-www-form-urlencoded</code> string.
       
    85      * The platform's default encoding is used to determine what characters
       
    86      * are represented by any consecutive sequences of the form
       
    87      * "<code>%<i>xy</i></code>".
       
    88      * @param s the <code>String</code> to decode
       
    89      * @deprecated The resulting string may vary depending on the platform's
       
    90      *          default encoding. Instead, use the decode(String,String) method
       
    91      *          to specify the encoding.
       
    92      * @return the newly decoded <code>String</code>
       
    93      */
       
    94     @Deprecated
       
    95     public static String decode(String s) {
       
    96 
       
    97         String str = null;
       
    98 
       
    99         try {
       
   100             str = decode(s, dfltEncName);
       
   101         } catch (UnsupportedEncodingException e) {
       
   102             // The system should always have the platform default
       
   103         }
       
   104 
       
   105         return str;
       
   106     }
       
   107 
       
   108     /**
       
   109      * Decodes a <code>application/x-www-form-urlencoded</code> string using a specific
       
   110      * encoding scheme.
       
   111      * The supplied encoding is used to determine
       
   112      * what characters are represented by any consecutive sequences of the
       
   113      * form "<code>%<i>xy</i></code>".
       
   114      * <p>
       
   115      * <em><strong>Note:</strong> The <a href=
       
   116      * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
       
   117      * World Wide Web Consortium Recommendation</a> states that
       
   118      * UTF-8 should be used. Not doing so may introduce
       
   119      * incompatibilites.</em>
       
   120      *
       
   121      * @param s the <code>String</code> to decode
       
   122      * @param enc   The name of a supported
       
   123      *    <a href="../lang/package-summary.html#charenc">character
       
   124      *    encoding</a>.
       
   125      * @return the newly decoded <code>String</code>
       
   126      * @exception  UnsupportedEncodingException
       
   127      *             If character encoding needs to be consulted, but
       
   128      *             named character encoding is not supported
       
   129      * @see URLEncoder#encode(java.lang.String, java.lang.String)
       
   130      * @since 1.4
       
   131      */
       
   132     public static String decode(String s, String enc)
       
   133         throws UnsupportedEncodingException{
       
   134 
       
   135         boolean needToChange = false;
       
   136         int numChars = s.length();
       
   137         StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars);
       
   138         int i = 0;
       
   139 
       
   140         if (enc.length() == 0) {
       
   141             throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
       
   142         }
       
   143 
       
   144         char c;
       
   145         byte[] bytes = null;
       
   146         while (i < numChars) {
       
   147             c = s.charAt(i);
       
   148             switch (c) {
       
   149             case '+':
       
   150                 sb.append(' ');
       
   151                 i++;
       
   152                 needToChange = true;
       
   153                 break;
       
   154             case '%':
       
   155                 /*
       
   156                  * Starting with this instance of %, process all
       
   157                  * consecutive substrings of the form %xy. Each
       
   158                  * substring %xy will yield a byte. Convert all
       
   159                  * consecutive  bytes obtained this way to whatever
       
   160                  * character(s) they represent in the provided
       
   161                  * encoding.
       
   162                  */
       
   163 
       
   164                 try {
       
   165 
       
   166                     // (numChars-i)/3 is an upper bound for the number
       
   167                     // of remaining bytes
       
   168                     if (bytes == null)
       
   169                         bytes = new byte[(numChars-i)/3];
       
   170                     int pos = 0;
       
   171 
       
   172                     while ( ((i+2) < numChars) &&
       
   173                             (c=='%')) {
       
   174                         int v = Integer.parseInt(s.substring(i+1,i+3),16);
       
   175                         if (v < 0)
       
   176                             throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value");
       
   177                         bytes[pos++] = (byte) v;
       
   178                         i+= 3;
       
   179                         if (i < numChars)
       
   180                             c = s.charAt(i);
       
   181                     }
       
   182 
       
   183                     // A trailing, incomplete byte encoding such as
       
   184                     // "%x" will cause an exception to be thrown
       
   185 
       
   186                     if ((i < numChars) && (c=='%'))
       
   187                         throw new IllegalArgumentException(
       
   188                          "URLDecoder: Incomplete trailing escape (%) pattern");
       
   189 
       
   190                     sb.append(new String(bytes, 0, pos, enc));
       
   191                 } catch (NumberFormatException e) {
       
   192                     throw new IllegalArgumentException(
       
   193                     "URLDecoder: Illegal hex characters in escape (%) pattern - "
       
   194                     + e.getMessage());
       
   195                 }
       
   196                 needToChange = true;
       
   197                 break;
       
   198             default:
       
   199                 sb.append(c);
       
   200                 i++;
       
   201                 break;
       
   202             }
       
   203         }
       
   204 
       
   205         return (needToChange? sb.toString() : s);
       
   206     }
       
   207 }