--- a/jdk/src/share/classes/sun/nio/cs/UTF_8.java Sat Apr 12 20:21:09 2014 +0100
+++ b/jdk/src/share/classes/sun/nio/cs/UTF_8.java Sat Apr 12 14:38:50 2014 -0700
@@ -111,12 +111,18 @@
(b4 & 0xc0) != 0x80;
}
- // only used when there is less than 4 bytes left in src buffer
+ // only used when there is less than 4 bytes left in src buffer.
+ // both b1 and b2 should be "& 0xff" before passed in.
private static boolean isMalformed4_2(int b1, int b2) {
- return (b1 == 0xf0 && b2 == 0x90) ||
+ return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
+ (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
(b2 & 0xc0) != 0x80;
}
+ // tests if b1 and b2 are malformed as the first 2 bytes of a
+ // legal`4-byte utf-8 byte sequence.
+ // only used when there is less than 4 bytes left in src buffer,
+ // after isMalformed4_2 has been invoked.
private static boolean isMalformed4_3(int b3) {
return (b3 & 0xc0) != 0x80;
}
@@ -280,7 +286,9 @@
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
int srcRemaining = sl - sp;
if (srcRemaining < 4 || dl - dp < 2) {
- if (srcRemaining > 1 && isMalformed4_2(b1, sa[sp + 1]))
+ b1 &= 0xff;
+ if (b1 > 0xf4 ||
+ srcRemaining > 1 && isMalformed4_2(b1, sa[sp + 1] & 0xff))
return malformedForLength(src, sp, dst, dp, 1);
if (srcRemaining > 2 && isMalformed4_3(sa[sp + 2]))
return malformedForLength(src, sp, dst, dp, 2);
@@ -363,7 +371,9 @@
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
int srcRemaining = limit - mark;
if (srcRemaining < 4 || dst.remaining() < 2) {
- if (srcRemaining > 1 && isMalformed4_2(b1, src.get()))
+ b1 &= 0xff;
+ if (b1 > 0xf4 ||
+ srcRemaining > 1 && isMalformed4_2(b1, src.get() & 0xff))
return malformedForLength(src, mark, 1);
if (srcRemaining > 2 && isMalformed4_3(src.get()))
return malformedForLength(src, mark, 2);
@@ -518,8 +528,9 @@
}
if (malformedInputAction() != CodingErrorAction.REPLACE)
return -1;
-
- if (sp < sl && isMalformed4_2(b1, sa[sp])) {
+ b1 &= 0xff;
+ if (b1 > 0xf4 ||
+ sp < sl && isMalformed4_2(b1, sa[sp] & 0xff)) {
da[dp++] = replacement().charAt(0);
continue;
}
--- a/jdk/test/sun/nio/cs/TestUTF8.java Sat Apr 12 20:21:09 2014 +0100
+++ b/jdk/test/sun/nio/cs/TestUTF8.java Sat Apr 12 14:38:50 2014 -0700
@@ -23,7 +23,7 @@
/*
* @test
- * @bug 4486841 7040220 7096080
+ * @bug 4486841 7040220 7096080 8039751
* @summary Test UTF-8 charset
*/
@@ -291,14 +291,18 @@
{1, (byte)0xE0, (byte)0xC0, (byte)0xBF }, // invalid second byte
{2, (byte)0xE0, (byte)0xA0, (byte)0x7F }, // invalid third byte
{2, (byte)0xE0, (byte)0xA0, (byte)0xC0 }, // invalid third byte
+ {2, (byte)0xE1, (byte)0x80, (byte)0x42}, // invalid third byte
+
{1, (byte)0xFF, (byte)0xFF, (byte)0xFF }, // all ones
{1, (byte)0xE0, (byte)0xC0, (byte)0x80 }, // invalid second byte
{1, (byte)0xE0, (byte)0x80, (byte)0xC0 }, // invalid first byte
{1, (byte)0xE0, (byte)0x41,}, // invalid second byte & 2 bytes
+ {1, (byte)0xE1, (byte)0x40,}, // invalid second byte & 2 bytes
{3, (byte)0xED, (byte)0xAE, (byte)0x80 }, // 3 bytes surrogate
{3, (byte)0xED, (byte)0xB0, (byte)0x80 }, // 3 bytes surrogate
+
// Four-byte sequences
{1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
{1, (byte)0xF0, (byte)0x80, (byte)0x81, (byte)0xBF }, // U+007F zero-padded
@@ -323,6 +327,32 @@
{1, (byte)0xF4, (byte)0xC0, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
{1, (byte)0xF5, (byte)0x80, (byte)0x80, (byte)0xC0 }, // out-range 4-byte
+ // #8039751
+ {1, (byte)0xF6, (byte)0x80, (byte)0x80, (byte)0x80 }, // out-range 1st byte
+ {1, (byte)0xF6, (byte)0x80, (byte)0x80, },
+ {1, (byte)0xF6, (byte)0x80, },
+ {1, (byte)0xF6, },
+ {1, (byte)0xF5, (byte)0x80, (byte)0x80, (byte)0x80 }, // out-range 1st byte
+ {1, (byte)0xF5, (byte)0x80, (byte)0x80, },
+ {1, (byte)0xF5, (byte)0x80, },
+ {1, (byte)0xF5 },
+
+ {1, (byte)0xF4, (byte)0x90, (byte)0x80, (byte)0x80 }, // out-range 2nd byte
+ {1, (byte)0xF4, (byte)0x90, (byte)0x80 },
+ {1, (byte)0xF4, (byte)0x90 },
+
+ {1, (byte)0xF4, (byte)0x7f, (byte)0x80, (byte)0x80 }, // out-range/ascii 2nd byte
+ {1, (byte)0xF4, (byte)0x7f, (byte)0x80 },
+ {1, (byte)0xF4, (byte)0x7f },
+
+ {1, (byte)0xF0, (byte)0x80, (byte)0x80, (byte)0x80 }, // out-range 2nd byte
+ {1, (byte)0xF0, (byte)0x80, (byte)0x80 },
+ {1, (byte)0xF0, (byte)0x80 },
+
+ {1, (byte)0xF0, (byte)0xc0, (byte)0x80, (byte)0x80 }, // out-range 2nd byte
+ {1, (byte)0xF0, (byte)0xc0, (byte)0x80 },
+ {1, (byte)0xF0, (byte)0xc0 },
+
// Five-byte sequences
{1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80}, // invalid first byte
{1, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80 }, // U+0000 zero-padded
@@ -553,7 +583,6 @@
check4ByteSurrs("UTF-8");
checkMalformed("UTF-8", malformed);
checkUnderOverflow("UTF-8");
-
checkRoundtrip("CESU-8");
check6ByteSurrs("CESU-8");
checkMalformed("CESU-8", malformed_cesu8);