8008386: (cs) Unmappable leading should be decoded to replacement.
authorsherman
Mon, 21 Oct 2013 18:22:39 -0700
changeset 21309 5adf83468a1d
parent 21308 638d0533f230
child 21310 4595a9405051
8008386: (cs) Unmappable leading should be decoded to replacement. Summary: updated the unmappable/malformed detecting handling for db charsets Reviewed-by: naoto
jdk/src/share/classes/sun/nio/cs/ext/DoubleByte.java
jdk/test/sun/nio/cs/TestIBMBugs.java
jdk/test/sun/nio/cs/TestUnmappable.java
--- a/jdk/src/share/classes/sun/nio/cs/ext/DoubleByte.java	Tue Oct 22 06:13:01 2013 +0900
+++ b/jdk/src/share/classes/sun/nio/cs/ext/DoubleByte.java	Mon Oct 21 18:22:39 2013 -0700
@@ -111,7 +111,6 @@
     public static class Decoder extends CharsetDecoder
                                 implements DelegatableDecoder, ArrayDecoder
     {
-
         final char[][] b2c;
         final char[] b2cSB;
         final int b2Min;
@@ -122,7 +121,12 @@
             return CoderResult.UNDERFLOW;
         }
 
-        protected CoderResult crMalformedOrUnmappable(int b) {
+        protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
+            if (b2c[b1] == B2C_UNMAPPABLE ||                // isNotLeadingByte(b1)
+                b2c[b2] != B2C_UNMAPPABLE ||                // isLeadingByte(b2)
+                decodeSingle(b2) != UNMAPPABLE_DECODING) {  // isSingle(b2)
+                return CoderResult.malformedForLength(1);
+            }
             return CoderResult.unmappableForLength(2);
         }
 
@@ -161,7 +165,7 @@
                         int b2 = sa[sp + 1] & 0xff;
                         if (b2 < b2Min || b2 > b2Max ||
                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
-                            return crMalformedOrUnmappable(b1);
+                            return crMalformedOrUnmappable(b1, b2);
                         }
                         inSize++;
                     }
@@ -190,7 +194,7 @@
                         int b2 = src.get() & 0xff;
                         if (b2 < b2Min || b2 > b2Max ||
                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING)
-                            return crMalformedOrUnmappable(b1);
+                            return crMalformedOrUnmappable(b1, b2);
                         inSize++;
                     }
                     dst.put(c);
@@ -221,8 +225,13 @@
                 if (c == UNMAPPABLE_DECODING) {
                     if (sp < sl) {
                         int b2 = src[sp++] & 0xff;
-                        if (b2 >= b2Min && b2 <= b2Max) {
-                            c = b2c[b1][b2 - b2Min];
+                        if (b2 < b2Min || b2 > b2Max ||
+                            (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
+                            if (b2c[b1] == B2C_UNMAPPABLE ||  // isNotLeadingByte
+                                b2c[b2] != B2C_UNMAPPABLE ||  // isLeadingByte
+                                decodeSingle(b2) != UNMAPPABLE_DECODING) {
+                                sp--;
+                            }
                         }
                     }
                     if (c == UNMAPPABLE_DECODING) {
@@ -466,8 +475,8 @@
             return CoderResult.UNDERFLOW;
         }
 
-        protected CoderResult crMalformedOrUnmappable(int b) {
-            if (b == SS2 || b == SS3 )
+        protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
+            if (b1 == SS2 || b1 == SS3 )
                 return CoderResult.malformedForLength(1);
             return CoderResult.unmappableForLength(2);
         }
--- a/jdk/test/sun/nio/cs/TestIBMBugs.java	Tue Oct 22 06:13:01 2013 +0900
+++ b/jdk/test/sun/nio/cs/TestIBMBugs.java	Mon Oct 21 18:22:39 2013 -0700
@@ -147,16 +147,17 @@
     }
 
     private static void bug6569191 () throws Exception {
-        byte[] bs = new byte[] { (byte)0x81, (byte)0xad,
-                                 (byte)0x81, (byte)0xae,
-                                 (byte)0x81, (byte)0xaf,
-                                 (byte)0x81, (byte)0xb0,
-                                 (byte)0x85, (byte)0x81,
-                                 (byte)0x85, (byte)0x87,
-                                 (byte)0x85, (byte)0xe0,
-                                 (byte)0x85, (byte)0xf0 };
+        byte[] bs = new byte[] { (byte)0x81, (byte)0xad,  // fffd ff6d
+                                 (byte)0x81, (byte)0xae,  // fffd ff6e
+                                 (byte)0x81, (byte)0xaf,  // fffd ff6f
+                                 (byte)0x81, (byte)0xb0,  // fffd ff70
+                                 (byte)0x85, (byte)0x81,  // fffd ->
+                                 (byte)0x85, (byte)0x87,  // 2266 ->
+                                 (byte)0x85, (byte)0xe0,  // 32a4 ->
+                                 (byte)0x85, (byte)0xf0 };// 7165 fffd
         String s = new String(bs, "Cp943");
-        if (!"\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"
+        // see DoubleByte for how the unmappables are handled
+        if (!"\ufffd\uff6d\ufffd\uff6e\ufffd\uff6f\ufffd\uff70\ufffd\u2266\u32a4\u7165\ufffd"
             .equals(s))
             throw new Exception("Cp943 failed");
     }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/nio/cs/TestUnmappable.java	Mon Oct 21 18:22:39 2013 -0700
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8008386
+ * @summary (cs) Unmappable leading should be decoded to replacement.
+ *          Tests for Shift_JIS and MS932 decoding
+ * @run main TestUnmappable
+ */
+
+import java.nio.*;
+import java.nio.charset.*;
+
+public class TestUnmappable {
+    public static void main(String args[]) throws Exception {
+
+        // illegal leading character test
+        byte[][] inputBytes = {
+                               // Shift_JIS
+                               {(byte)0xce, (byte)0xa0, (byte)0xce, (byte)0x7a},
+                               // MS932
+                               {(byte)0x3c, (byte)0x21, (byte)0x2d, (byte)0x2d,
+                                (byte)0xe5, (byte)0xaf, (byte)0xbe, (byte)0xe5,
+                                (byte)0xbf, (byte)0x9c, (byte)0x2d, (byte)0x2d,
+                                (byte)0x3e, (byte)0xd,  (byte)0xa },
+                               {(byte)0x81, (byte)0xad},
+                               // PCK
+                               {(byte)0xef, (byte)0x90},
+                               {(byte)0x91, (byte)0xfd}
+                              };
+
+        String[] charsets = { "Shift_JIS", "MS932", "PCK" };
+        String[] expectedStrings = {
+                                    // Shift_JIS
+                                    "0xce 0x3f 0xce 0x7a ",
+                                    // MS932
+                                    "0x3c 0x21 0x2d 0x2d 0xe5 0xaf 0xbe 0xe5 0xbf " +
+                                    "0x3f 0x2d 0x2d 0x3e 0xd 0xa ",
+                                    "0x3f 0xad ",
+                                    // PCK
+                                    "0x3f 0x3f ",
+                                    "0x3f "};
+
+        for (int i = 0; i < charsets.length; i++) {
+            String ret = new String(inputBytes[i], charsets[i]);
+            String bString = getByteString(ret.getBytes(Charset.forName(charsets[i])));
+            if (expectedStrings[i].length() != bString.length()
+               || ! expectedStrings[i].equals(bString)){
+                throw new Exception("ByteToChar for " + charsets[i]
+                    + " does not work correctly.\n" +
+                    "Expected: " + expectedStrings[i] + "\n" +
+                    "Received: " + bString);
+            }
+        }
+    }
+
+    private static String getByteString(byte[] bytes) {
+        StringBuffer sb = new StringBuffer();
+        for (int i = 0; i < bytes.length; i++) {
+            sb.append("0x" + Integer.toHexString((int)(bytes[i] & 0xFF)) + " ");
+        }
+        return sb.toString();
+    }
+}