src/java.base/share/classes/java/net/IDN.java
author prappo
Tue, 13 Nov 2018 12:24:34 +0000
changeset 52499 768b1c612100
parent 47216 71c04702a3d5
permissions -rw-r--r--
8213490: Networking area typos and inconsistencies cleanup Reviewed-by: alanb, chegar, dfuchs
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
19069
1d9cb0d080e3 8021833: javadoc cleanup in java.net
juh
parents: 5506
diff changeset
     2
 * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 2
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
package java.net;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
import java.io.InputStream;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
import java.io.IOException;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
import java.security.AccessController;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
import java.security.PrivilegedAction;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
import sun.net.idn.StringPrep;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
import sun.net.idn.Punycode;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
import sun.text.normalizer.UCharacterIterator;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
 * Provides methods to convert internationalized domain names (IDNs) between
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
 * a normal Unicode representation and an ASCII Compatible Encoding (ACE) representation.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
 * Internationalized domain names can use characters from the entire range of
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
 * Unicode, while traditional domain names are restricted to ASCII characters.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
 * ACE is an encoding of Unicode strings that uses only ASCII characters and
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
 * can be used with software (such as the Domain Name System) that only
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
 * understands traditional domain names.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
 * <p>Internationalized domain names are defined in <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
 * RFC 3490 defines two operations: ToASCII and ToUnicode. These 2 operations employ
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
 * <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a> algorithm, which is a
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
 * profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a>, and
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
 * <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a> algorithm to convert
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
 * domain name string back and forth.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
 * <p>The behavior of aforementioned conversion process can be adjusted by various flags:
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
 *   <ul>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
 *     <li>If the ALLOW_UNASSIGNED flag is used, the domain name string to be converted
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
 *         can contain code points that are unassigned in Unicode 3.2, which is the
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
 *         Unicode version on which IDN conversion is based. If the flag is not used,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
 *         the presence of such unassigned code points is treated as an error.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 *     <li>If the USE_STD3_ASCII_RULES flag is used, ASCII strings are checked against <a href="http://www.ietf.org/rfc/rfc1122.txt">RFC 1122</a> and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC 1123</a>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 *         It is an error if they don't meet the requirements.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
 *   </ul>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
 * These flags can be logically OR'ed together.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
 * <p>The security consideration is important with respect to internationalization
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
 * domain name support. For example, English domain names may be <i>homographed</i>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
 * - maliciously misspelled by substitution of non-Latin letters.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
 * <a href="http://www.unicode.org/reports/tr36/">Unicode Technical Report #36</a>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
 * discusses security issues of IDN support as well as possible solutions.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
 * Applications are responsible for taking adequate security measures when using
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
 * international domain names.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
 * @author Edward Wang
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
 * @since 1.6
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
public final class IDN {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
     * Flag to allow processing of unassigned code points
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
    public static final int ALLOW_UNASSIGNED = 0x01;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
     * Flag to turn on the check against STD-3 ASCII rules
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
    public static final int USE_STD3_ASCII_RULES = 0x02;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
     * Translates a string from Unicode to ASCII Compatible Encoding (ACE),
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
     * as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
     * <p>ToASCII operation can fail. ToASCII fails if any step of it fails.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
     * If ToASCII operation fails, an IllegalArgumentException will be thrown.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
     * In this case, the input string should not be used in an internationalized domain name.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
     * <p> A label is an individual part of a domain name. The original ToASCII operation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
     * as defined in RFC 3490, only operates on a single label. This method can handle
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
     * both label and entire domain name, by assuming that labels in a domain name are
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
     * always separated by dots. The following characters are recognized as dots:
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
     * &#0092;u002E (full stop), &#0092;u3002 (ideographic full stop), &#0092;uFF0E (fullwidth full stop),
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
     * and &#0092;uFF61 (halfwidth ideographic full stop). if dots are
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
     * used as label separators, this method also changes all of them to &#0092;u002E (full stop)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
     * in output translated string.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
     * @param input     the string to be processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
     * @param flag      process flag; can be 0 or any logical OR of possible flags
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
     *
19069
1d9cb0d080e3 8021833: javadoc cleanup in java.net
juh
parents: 5506
diff changeset
   107
     * @return          the translated {@code String}
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
     * @throws IllegalArgumentException   if the input string doesn't conform to RFC 3490 specification
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
    public static String toASCII(String input, int flag)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
    {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
        int p = 0, q = 0;
24969
afa6934dd8e8 8041679: Replace uses of StringBuffer with StringBuilder within core library classes
psandoz
parents: 19790
diff changeset
   114
        StringBuilder out = new StringBuilder();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
19440
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   116
        if (isRootLabel(input)) {
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   117
            return ".";
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   118
        }
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   119
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
        while (p < input.length()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
            q = searchDots(input, p);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
            out.append(toASCIIInternal(input.substring(p, q),  flag));
19440
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   123
            if (q != (input.length())) {
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   124
               // has more labels, or keep the trailing dot as at present
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   125
               out.append('.');
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   126
            }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
            p = q + 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
        return out.toString();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
     * Translates a string from Unicode to ASCII Compatible Encoding (ACE),
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
     * as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
     * <p> This convenience method works as if by invoking the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
     * two-argument counterpart as follows:
19069
1d9cb0d080e3 8021833: javadoc cleanup in java.net
juh
parents: 5506
diff changeset
   140
     * <blockquote>
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
     * {@link #toASCII(String, int) toASCII}(input,&nbsp;0);
19069
1d9cb0d080e3 8021833: javadoc cleanup in java.net
juh
parents: 5506
diff changeset
   142
     * </blockquote>
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
     * @param input     the string to be processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
     *
19069
1d9cb0d080e3 8021833: javadoc cleanup in java.net
juh
parents: 5506
diff changeset
   146
     * @return          the translated {@code String}
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
     * @throws IllegalArgumentException   if the input string doesn't conform to RFC 3490 specification
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
    public static String toASCII(String input) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
        return toASCII(input, 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
     * Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
     * as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
     * <p>ToUnicode never fails. In case of any error, the input string is returned unmodified.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
     * <p> A label is an individual part of a domain name. The original ToUnicode operation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
     * as defined in RFC 3490, only operates on a single label. This method can handle
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
     * both label and entire domain name, by assuming that labels in a domain name are
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
     * always separated by dots. The following characters are recognized as dots:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
     * &#0092;u002E (full stop), &#0092;u3002 (ideographic full stop), &#0092;uFF0E (fullwidth full stop),
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
     * and &#0092;uFF61 (halfwidth ideographic full stop).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
     * @param input     the string to be processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
     * @param flag      process flag; can be 0 or any logical OR of possible flags
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
     *
19069
1d9cb0d080e3 8021833: javadoc cleanup in java.net
juh
parents: 5506
diff changeset
   171
     * @return          the translated {@code String}
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
    public static String toUnicode(String input, int flag) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
        int p = 0, q = 0;
24969
afa6934dd8e8 8041679: Replace uses of StringBuffer with StringBuilder within core library classes
psandoz
parents: 19790
diff changeset
   175
        StringBuilder out = new StringBuilder();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
19440
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   177
        if (isRootLabel(input)) {
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   178
            return ".";
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   179
        }
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   180
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
        while (p < input.length()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
            q = searchDots(input, p);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
            out.append(toUnicodeInternal(input.substring(p, q),  flag));
19440
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   184
            if (q != (input.length())) {
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   185
               // has more labels, or keep the trailing dot as at present
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   186
               out.append('.');
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   187
            }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
            p = q + 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
        return out.toString();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
     * Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
     * as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
     * <p> This convenience method works as if by invoking the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
     * two-argument counterpart as follows:
19069
1d9cb0d080e3 8021833: javadoc cleanup in java.net
juh
parents: 5506
diff changeset
   201
     * <blockquote>
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
     * {@link #toUnicode(String, int) toUnicode}(input,&nbsp;0);
19069
1d9cb0d080e3 8021833: javadoc cleanup in java.net
juh
parents: 5506
diff changeset
   203
     * </blockquote>
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
     * @param input     the string to be processed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
     *
19069
1d9cb0d080e3 8021833: javadoc cleanup in java.net
juh
parents: 5506
diff changeset
   207
     * @return          the translated {@code String}
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
    public static String toUnicode(String input) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
        return toUnicode(input, 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
    /* ---------------- Private members -------------- */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
    // ACE Prefix is "xn--"
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
    private static final String ACE_PREFIX = "xn--";
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
    private static final int ACE_PREFIX_LENGTH = ACE_PREFIX.length();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
    private static final int MAX_LABEL_LENGTH   = 63;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
    // single instance of nameprep
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
    private static StringPrep namePrep = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
    static {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
        InputStream stream = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
        try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
            final String IDN_PROFILE = "uidna.spp";
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
            if (System.getSecurityManager() != null) {
29986
97167d851fc4 8078467: Update core libraries to use diamond with anonymous classes
darcy
parents: 25859
diff changeset
   231
                stream = AccessController.doPrivileged(new PrivilegedAction<>() {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
                    public InputStream run() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
                        return StringPrep.class.getResourceAsStream(IDN_PROFILE);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
                });
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
                stream = StringPrep.class.getResourceAsStream(IDN_PROFILE);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
            namePrep = new StringPrep(stream);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
            stream.close();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
        } catch (IOException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
            // should never reach here
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
            assert false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
    /* ---------------- Private operations -------------- */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
    // to suppress the default zero-argument constructor
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
    private IDN() {}
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
    // toASCII operation; should only apply to a single label
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
    private static String toASCIIInternal(String label, int flag)
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
    {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
        // step 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
        // Check if the string contains code points outside the ASCII range 0..0x7c.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
        boolean isASCII  = isAllASCII(label);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
        StringBuffer dest;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
        // step 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
        // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
        if (!isASCII) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
            UCharacterIterator iter = UCharacterIterator.getInstance(label);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
            try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
                dest = namePrep.prepare(iter, flag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
            } catch (java.text.ParseException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
                throw new IllegalArgumentException(e);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
            dest = new StringBuffer(label);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
19440
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   280
        // step 8, move forward to check the smallest number of the code points
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   281
        // the length must be inside 1..63
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   282
        if (dest.length() == 0) {
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   283
            throw new IllegalArgumentException(
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   284
                        "Empty label is not a legal name");
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   285
        }
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   286
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
        // step 3
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
        // Verify the absence of non-LDH ASCII code points
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
        //   0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60, 0x7b..0x7f
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
        // Verify the absence of leading and trailing hyphen
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
        boolean useSTD3ASCIIRules = ((flag & USE_STD3_ASCII_RULES) != 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
        if (useSTD3ASCIIRules) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
            for (int i = 0; i < dest.length(); i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
                int c = dest.charAt(i);
19790
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   295
                if (isNonLDHAsciiCodePoint(c)) {
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   296
                    throw new IllegalArgumentException(
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   297
                        "Contains non-LDH ASCII characters");
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
19790
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   301
            if (dest.charAt(0) == '-' ||
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   302
                dest.charAt(dest.length() - 1) == '-') {
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   303
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   304
                throw new IllegalArgumentException(
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   305
                        "Has leading or trailing hyphen");
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
        if (!isASCII) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
            // step 4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
            // If all code points are inside 0..0x7f, skip to step 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
            if (!isAllASCII(dest.toString())) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
                // step 5
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
                // verify the sequence does not begin with ACE prefix
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
                if(!startsWithACEPrefix(dest)){
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
                    // step 6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   318
                    // encode the sequence with punycode
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
                    try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
                        dest = Punycode.encode(dest, null);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
                    } catch (java.text.ParseException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
                        throw new IllegalArgumentException(e);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
                    dest = toASCIILower(dest);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
                    // step 7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
                    // prepend the ACE prefix
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
                    dest.insert(0, ACE_PREFIX);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
                    throw new IllegalArgumentException("The input starts with the ACE Prefix");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
        // step 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
        // the length must be inside 1..63
19440
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   339
        if (dest.length() > MAX_LABEL_LENGTH) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
            throw new IllegalArgumentException("The label in the input is too long");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
        return dest.toString();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
    // toUnicode operation; should only apply to a single label
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
    private static String toUnicodeInternal(String label, int flag) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
        boolean[] caseFlags = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
        StringBuffer dest;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
        // step 1
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
        // find out if all the codepoints in input are ASCII
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
        boolean isASCII = isAllASCII(label);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
        if(!isASCII){
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
            // step 2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
            // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
            try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
                UCharacterIterator iter = UCharacterIterator.getInstance(label);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
                dest = namePrep.prepare(iter, flag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
            } catch (Exception e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
                // toUnicode never fails; if any step fails, return the input string
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
                return label;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   368
            dest = new StringBuffer(label);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   369
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
        // step 3
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
        // verify ACE Prefix
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
        if(startsWithACEPrefix(dest)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
            // step 4
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
            // Remove the ACE Prefix
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
            String temp = dest.substring(ACE_PREFIX_LENGTH, dest.length());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
            try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   380
                // step 5
90ce3da70b43 Initial load
duke
parents:
diff changeset
   381
                // Decode using punycode
90ce3da70b43 Initial load
duke
parents:
diff changeset
   382
                StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
                // step 6
90ce3da70b43 Initial load
duke
parents:
diff changeset
   385
                // Apply toASCII
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
                String toASCIIOut = toASCII(decodeOut.toString(), flag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   387
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
                // step 7
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
                // verify
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
                if (toASCIIOut.equalsIgnoreCase(dest.toString())) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
                    // step 8
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
                    // return output of step 5
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
                    return decodeOut.toString();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
            } catch (Exception ignored) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
                // no-op
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
        // just return the input
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
        return label;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
    // LDH stands for "letter/digit/hyphen", with characters restricted to the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
    // 26-letter Latin alphabet <A-Z a-z>, the digits <0-9>, and the hyphen
19790
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   408
    // <->.
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   409
    // Non LDH refers to characters in the ASCII range, but which are not
52499
768b1c612100 8213490: Networking area typos and inconsistencies cleanup
prappo
parents: 47216
diff changeset
   410
    // letters, digits or the hyphen.
19790
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   411
    //
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   412
    // non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7F
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
    //
19790
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   414
    private static boolean isNonLDHAsciiCodePoint(int ch){
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   415
        return (0x0000 <= ch && ch <= 0x002C) ||
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   416
               (0x002E <= ch && ch <= 0x002F) ||
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   417
               (0x003A <= ch && ch <= 0x0040) ||
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   418
               (0x005B <= ch && ch <= 0x0060) ||
d97d46e9bddf 8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
xuelei
parents: 19440
diff changeset
   419
               (0x007B <= ch && ch <= 0x007F);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   421
90ce3da70b43 Initial load
duke
parents:
diff changeset
   422
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   423
    // search dots in a string and return the index of that character;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   424
    // or if there is no dots, return the length of input string
90ce3da70b43 Initial load
duke
parents:
diff changeset
   425
    // dots might be: \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop),
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
    // and \uFF61 (halfwidth ideographic full stop).
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
    private static int searchDots(String s, int start) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
        int i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
        for (i = start; i < s.length(); i++) {
19440
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   431
            if (isLabelSeparator(s.charAt(i))) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
        return i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
19440
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   439
    //
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   440
    // to check if a string is a root label, ".".
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   441
    //
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   442
    private static boolean isRootLabel(String s) {
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   443
        return (s.length() == 1 && isLabelSeparator(s.charAt(0)));
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   444
    }
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   445
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   446
    //
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   447
    // to check if a character is a label separator, i.e. a dot character.
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   448
    //
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   449
    private static boolean isLabelSeparator(char c) {
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   450
        return (c == '.' || c == '\u3002' || c == '\uFF0E' || c == '\uFF61');
c4414bc88602 8020842: IDN do not throw IAE when hostname ends with a trailing dot
xuelei
parents: 19069
diff changeset
   451
    }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   454
    // to check if a string only contains US-ASCII code point
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
    private static boolean isAllASCII(String input) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
        boolean isASCII = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
        for (int i = 0; i < input.length(); i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
            int c = input.charAt(i);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
            if (c > 0x7F) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
                isASCII = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   462
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   464
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
        return isASCII;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
    // to check if a string starts with ACE-prefix
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
    private static boolean startsWithACEPrefix(StringBuffer input){
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
        boolean startsWithPrefix = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
        if(input.length() < ACE_PREFIX_LENGTH){
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
            return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
        for(int i = 0; i < ACE_PREFIX_LENGTH; i++){
90ce3da70b43 Initial load
duke
parents:
diff changeset
   478
            if(toASCIILower(input.charAt(i)) != ACE_PREFIX.charAt(i)){
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
                startsWithPrefix = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   482
        return startsWithPrefix;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
    private static char toASCIILower(char ch){
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
        if('A' <= ch && ch <= 'Z'){
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
            return (char)(ch + 'a' - 'A');
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
        return ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
    private static StringBuffer toASCIILower(StringBuffer input){
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
        StringBuffer dest = new StringBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   494
        for(int i = 0; i < input.length();i++){
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
            dest.append(toASCIILower(input.charAt(i)));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
        return dest;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   498
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   499
}