2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
Reviewed-by: rupashka
--- a/jdk/src/share/classes/javax/swing/text/html/parser/Parser.java Wed Oct 31 09:25:20 2012 -0700
+++ b/jdk/src/share/classes/javax/swing/text/html/parser/Parser.java Fri Nov 02 15:57:20 2012 +0400
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -952,7 +952,7 @@
ch = readCh();
break;
}
- char data[] = {mapNumericReference((char) n)};
+ char data[] = mapNumericReference(n);
return data;
}
addString('#');
@@ -1021,7 +1021,7 @@
}
/**
- * Converts numeric character reference to Unicode character.
+ * Converts numeric character reference to char array.
*
* Normally the code in a reference should be always converted
* to the Unicode character with the same code, but due to
@@ -1030,13 +1030,21 @@
* to displayable characters with other codes.
*
* @param c the code of numeric character reference.
- * @return the character corresponding to the reference code.
+ * @return a char array corresponding to the reference code.
*/
- private char mapNumericReference(char c) {
- if (c < 130 || c > 159) {
- return c;
+ private char[] mapNumericReference(int c) {
+ char[] data;
+ if (c >= 0xffff) { // outside unicode BMP.
+ try {
+ data = Character.toChars(c);
+ } catch (IllegalArgumentException e) {
+ data = new char[0];
+ }
+ } else {
+ data = new char[1];
+ data[0] = (c < 130 || c > 159) ? (char) c : cp1252Map[c - 130];
}
- return cp1252Map[c - 130];
+ return data;
}
/**
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/javax/swing/text/html/parser/Parser/6836089/bug6836089.java Fri Nov 02 15:57:20 2012 +0400
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 6836089
+ * @summary Tests correct parsing of characters outside Base Multilingual Plane
+ * @author Vladislav Karnaukhov
+ */
+
+import javax.swing.*;
+import javax.swing.text.html.*;
+
+public class bug6836089 {
+
+ public static void main(String[] args) throws Exception {
+ SwingUtilities.invokeAndWait(new Runnable() {
+ @Override
+ public void run() {
+ JTextPane htmlPane = new JTextPane();
+ htmlPane.setEditorKit(new HTMLEditorKit());
+
+ htmlPane.setText("<html><head></head><body>𠀀</body></html>");
+ String str = htmlPane.getText();
+ if (str.contains("�")) {
+ throw new RuntimeException("Test failed");
+ }
+ }
+ });
+ }
+}