jdk/src/share/classes/sun/net/URLCanonicalizer.java
author juh
Wed, 10 Jul 2013 18:01:22 -0700 (2013-07-11)
changeset 18800 e7fa560afcfb
parent 5506 202f599c92aa
permissions -rw-r--r--
8020318: Fix doclint issues in java.net Reviewed-by: darcy, khazra
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     2
 * Copyright (c) 1996, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
package sun.net;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
 * Helper class to map URL "abbreviations" to real URLs.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
 * The default implementation supports the following mappings:
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
 *   ftp.mumble.bar/... => ftp://ftp.mumble.bar/...
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
 *   gopher.mumble.bar/... => gopher://gopher.mumble.bar/...
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
 *   other.name.dom/... => http://other.name.dom/...
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
 *   /foo/... => file:/foo/...
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
 * Full URLs (those including a protocol name) are passed through unchanged.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
 * Subclassers can override or extend this behavior to support different
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
 * or additional canonicalization policies.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
 * @author      Steve Byrne
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
public class URLCanonicalizer {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
     * Creates the default canonicalizer instance.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
    public URLCanonicalizer() { }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
     * Given a possibly abbreviated URL (missing a protocol name, typically),
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
     * this method's job is to transform that URL into a canonical form,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
     * by including a protocol name and additional syntax, if necessary.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
     * For a correctly formed URL, this method should just return its argument.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
    public String canonicalize(String simpleURL) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
        String resultURL = simpleURL;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
        if (simpleURL.startsWith("ftp.")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
            resultURL = "ftp://" + simpleURL;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
        } else if (simpleURL.startsWith("gopher.")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
            resultURL = "gopher://" + simpleURL;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
        } else if (simpleURL.startsWith("/")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
            resultURL = "file:" + simpleURL;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
        } else if (!hasProtocolName(simpleURL)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
            if (isSimpleHostName(simpleURL)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
                simpleURL = "www." + simpleURL + ".com";
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
            resultURL = "http://" + simpleURL;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
        return resultURL;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
     * Given a possibly abbreviated URL, this predicate function returns
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
     * true if it appears that the URL contains a protocol name
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
    public boolean hasProtocolName(String url) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
        int index = url.indexOf(':');
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
        if (index <= 0) {       // treat ":foo" as not having a protocol spec
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
            return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
        for (int i = 0; i < index; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
            char c = url.charAt(i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
            // REMIND: this is a guess at legal characters in a protocol --
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
            // need to be verified
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
            if ((c >= 'A' && c <= 'Z')
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
                || (c >= 'a' && c <= 'z')
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
                || (c == '-')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
                continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
            // found an illegal character
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
            return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
        return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
     * Returns true if the URL is just a single name, no periods or
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
     * slashes, false otherwise
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
     **/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
    protected boolean isSimpleHostName(String url) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
        for (int i = 0; i < url.length(); i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
            char c = url.charAt(i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
            // REMIND: this is a guess at legal characters in a protocol --
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
            // need to be verified
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
            if ((c >= 'A' && c <= 'Z')
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
                || (c >= 'a' && c <= 'z')
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
                || (c >= '0' && c <= '9')
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
                || (c == '-')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
                continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
            // found an illegal character
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
            return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
        return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
}