8023881: IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode in IDN.toASCII
Reviewed-by: michaelm
--- a/jdk/src/share/classes/java/net/IDN.java Thu Aug 29 10:43:46 2013 -0700
+++ b/jdk/src/share/classes/java/net/IDN.java Thu Aug 29 18:58:18 2013 -0700
@@ -292,13 +292,17 @@
if (useSTD3ASCIIRules) {
for (int i = 0; i < dest.length(); i++) {
int c = dest.charAt(i);
- if (!isLDHChar(c)) {
- throw new IllegalArgumentException("Contains non-LDH characters");
+ if (isNonLDHAsciiCodePoint(c)) {
+ throw new IllegalArgumentException(
+ "Contains non-LDH ASCII characters");
}
}
- if (dest.charAt(0) == '-' || dest.charAt(dest.length() - 1) == '-') {
- throw new IllegalArgumentException("Has leading or trailing hyphen");
+ if (dest.charAt(0) == '-' ||
+ dest.charAt(dest.length() - 1) == '-') {
+
+ throw new IllegalArgumentException(
+ "Has leading or trailing hyphen");
}
}
@@ -401,26 +405,20 @@
//
// LDH stands for "letter/digit/hyphen", with characters restricted to the
// 26-letter Latin alphabet <A-Z a-z>, the digits <0-9>, and the hyphen
- // <->
- // non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x56..0x60, 0x7B..0x7F
+ // <->.
+ // Non LDH refers to characters in the ASCII range, but which are not
+ // letters, digits or the hypen.
+ //
+ // non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7F
//
- private static boolean isLDHChar(int ch){
- // high runner case
- if(ch > 0x007A){
- return false;
- }
- //['-' '0'..'9' 'A'..'Z' 'a'..'z']
- if((ch == 0x002D) ||
- (0x0030 <= ch && ch <= 0x0039) ||
- (0x0041 <= ch && ch <= 0x005A) ||
- (0x0061 <= ch && ch <= 0x007A)
- ){
- return true;
- }
- return false;
+ private static boolean isNonLDHAsciiCodePoint(int ch){
+ return (0x0000 <= ch && ch <= 0x002C) ||
+ (0x002E <= ch && ch <= 0x002F) ||
+ (0x003A <= ch && ch <= 0x0040) ||
+ (0x005B <= ch && ch <= 0x0060) ||
+ (0x007B <= ch && ch <= 0x007F);
}
-
//
// search dots in a string and return the index of that character;
// or if there is no dots, return the length of input string
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/net/IDN/UseSTD3ASCIIRules.java Thu Aug 29 18:58:18 2013 -0700
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8023881
+ * @summary IDN.USE_STD3_ASCII_RULES option is too strict to use Unicode
+ * in IDN.toASCII
+ */
+
+import java.net.*;
+
+public class UseSTD3ASCIIRules {
+
+ public static void main(String[] args) throws Exception {
+ // Per Section 4.1, RFC 3490, if the UseSTD3ASCIIRules flag is set,
+ // then perform these checks:
+ //
+ // (a) Verify the absence of non-LDH ASCII code points; that is, the
+ // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
+ //
+ // (b) Verify the absence of leading and trailing hyphen-minus; that
+ // is, the absence of U+002D at the beginning and end of the
+ // sequence.
+ String[] illegalNames = {
+ "www.example.com-",
+ "-www.example.com",
+ "-www.example.com-",
+ "www.ex\u002Cmple.com",
+ "www.ex\u007Bmple.com",
+ "www.ex\u007Fmple.com"
+ };
+
+ String[] legalNames = {
+ "www.ex-ample.com",
+ "www.ex\u002Dmple.com", // www.ex-mple.com
+ "www.ex\u007Ample.com", // www.exzmple.com
+ "www.ex\u3042mple.com", // www.xn--exmple-j43e.com
+ "www.\u3042\u3044\u3046.com", // www.xn--l8jeg.com
+ "www.\u793A\u4F8B.com" // www.xn--fsq092h.com
+ };
+
+ for (String name : illegalNames) {
+ try {
+ System.out.println("Convering illegal IDN: " + name);
+ IDN.toASCII(name, IDN.USE_STD3_ASCII_RULES);
+ throw new Exception(
+ "Expected to get IllegalArgumentException for " + name);
+ } catch (IllegalArgumentException iae) {
+ // That's the right behavior.
+ }
+ }
+
+ for (String name : legalNames) {
+ System.out.println("Convering legal IDN: " + name);
+ System.out.println("\tThe ACE form is: " +
+ IDN.toASCII(name, IDN.USE_STD3_ASCII_RULES));
+ }
+ }
+}