8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
authorxuelei
Thu, 29 Aug 2013 18:58:18 -0700
changeset 19790 d97d46e9bddf
parent 19611 0c8d4fbae4f2
child 19791 d0bcc2086175
8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII Reviewed-by: michaelm
jdk/src/share/classes/java/net/IDN.java
jdk/test/java/net/IDN/UseSTD3ASCIIRules.java
--- a/jdk/src/share/classes/java/net/IDN.java	Thu Aug 29 10:43:46 2013 -0700
+++ b/jdk/src/share/classes/java/net/IDN.java	Thu Aug 29 18:58:18 2013 -0700
@@ -292,13 +292,17 @@
         if (useSTD3ASCIIRules) {
             for (int i = 0; i < dest.length(); i++) {
                 int c = dest.charAt(i);
-                if (!isLDHChar(c)) {
-                    throw new IllegalArgumentException("Contains non-LDH characters");
+                if (isNonLDHAsciiCodePoint(c)) {
+                    throw new IllegalArgumentException(
+                        "Contains non-LDH ASCII characters");
                 }
             }
 
-            if (dest.charAt(0) == '-' || dest.charAt(dest.length() - 1) == '-') {
-                throw new IllegalArgumentException("Has leading or trailing hyphen");
+            if (dest.charAt(0) == '-' ||
+                dest.charAt(dest.length() - 1) == '-') {
+
+                throw new IllegalArgumentException(
+                        "Has leading or trailing hyphen");
             }
         }
 
@@ -401,26 +405,20 @@
     //
     // LDH stands for "letter/digit/hyphen", with characters restricted to the
     // 26-letter Latin alphabet <A-Z a-z>, the digits <0-9>, and the hyphen
-    // <->
-    // non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x56..0x60, 0x7B..0x7F
+    // <->.
+    // Non LDH refers to characters in the ASCII range, but which are not
+    // letters, digits or the hypen.
+    //
+    // non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7F
     //
-    private static boolean isLDHChar(int ch){
-        // high runner case
-        if(ch > 0x007A){
-            return false;
-        }
-        //['-' '0'..'9' 'A'..'Z' 'a'..'z']
-        if((ch == 0x002D) ||
-           (0x0030 <= ch && ch <= 0x0039) ||
-           (0x0041 <= ch && ch <= 0x005A) ||
-           (0x0061 <= ch && ch <= 0x007A)
-          ){
-            return true;
-        }
-        return false;
+    private static boolean isNonLDHAsciiCodePoint(int ch){
+        return (0x0000 <= ch && ch <= 0x002C) ||
+               (0x002E <= ch && ch <= 0x002F) ||
+               (0x003A <= ch && ch <= 0x0040) ||
+               (0x005B <= ch && ch <= 0x0060) ||
+               (0x007B <= ch && ch <= 0x007F);
     }
 
-
     //
     // search dots in a string and return the index of that character;
     // or if there is no dots, return the length of input string
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/net/IDN/UseSTD3ASCIIRules.java	Thu Aug 29 18:58:18 2013 -0700
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8023881
+ * @summary IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode
+ *          in IDN.toASCII
+ */
+
+import java.net.*;
+
+public class UseSTD3ASCIIRules {
+
+    public static void main(String[] args) throws Exception {
+        // Per Section 4.1, RFC 3490, if the UseSTD3ASCIIRules flag is set,
+        // then perform these checks:
+        //
+        // (a) Verify the absence of non-LDH ASCII code points; that is, the
+        //     absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
+        //
+        // (b) Verify the absence of leading and trailing hyphen-minus; that
+        //     is, the absence of U+002D at the beginning and end of the
+        //     sequence.
+        String[] illegalNames = {
+                "www.example.com-",
+                "-www.example.com",
+                "-www.example.com-",
+                "www.ex\u002Cmple.com",
+                "www.ex\u007Bmple.com",
+                "www.ex\u007Fmple.com"
+            };
+
+        String[] legalNames = {
+                "www.ex-ample.com",
+                "www.ex\u002Dmple.com",         // www.ex-mple.com
+                "www.ex\u007Ample.com",         // www.exzmple.com
+                "www.ex\u3042mple.com",         // www.xn--exmple-j43e.com
+                "www.\u3042\u3044\u3046.com",   // www.xn--l8jeg.com
+                "www.\u793A\u4F8B.com"          // www.xn--fsq092h.com
+            };
+
+        for (String name : illegalNames) {
+            try {
+                System.out.println("Convering illegal IDN: " + name);
+                IDN.toASCII(name, IDN.USE_STD3_ASCII_RULES);
+                throw new Exception(
+                    "Expected to get IllegalArgumentException for " + name);
+            } catch (IllegalArgumentException iae) {
+                // That's the right behavior.
+            }
+        }
+
+        for (String name : legalNames) {
+            System.out.println("Convering legal IDN: " + name);
+            System.out.println("\tThe ACE form is: " +
+                        IDN.toASCII(name, IDN.USE_STD3_ASCII_RULES));
+        }
+    }
+}