8006295: Base64.Decoder.wrap(java.io.InputStream) returns InputStream which throws unspecified IOException on attempt to decode invalid Base64 byte stream
8006315: Base64.Decoder decoding methods are not consistent in treating non-padded data
8006530: Base64.getMimeDecoder().decode() throws exception for non-base64 character after adding =
Summary: updated the spec to describe the expected behave explicitly and the implementation to follow
Reviewed-by: alanb, chegar, lancea
--- a/jdk/src/share/classes/java/util/Base64.java Mon Feb 04 17:20:26 2013 +0000
+++ b/jdk/src/share/classes/java/util/Base64.java Mon Feb 04 11:58:43 2013 -0800
@@ -64,7 +64,8 @@
* RFC 2045 for encoding and decoding operation. The encoded output
* must be represented in lines of no more than 76 characters each
* and uses a carriage return {@code '\r'} followed immediately by
- * a linefeed {@code '\n'} as the line separator. All line separators
+ * a linefeed {@code '\n'} as the line separator. No line separator
+ * is added to the end of the encoded output. All line separators
* or other characters not found in the base64 alphabet table are
* ignored in decoding operation.</p></li>
* </ul>
@@ -614,6 +615,13 @@
* This class implements a decoder for decoding byte data using the
* Base64 encoding scheme as specified in RFC 4648 and RFC 2045.
*
+ * <p> The Base64 padding character {@code '='} is accepted and
+ * interpreted as the end of the encoded byte data, but is not
+ * required. So if the final unit of the encoded byte data only has
+ * two or three Base64 characters (without the corresponding padding
+ * character(s) padded), they are decoded as if followed by padding
+ * character(s).
+ *
* <p> Instances of {@link Decoder} class are safe for use by
* multiple concurrent threads.
*
@@ -857,6 +865,9 @@
/**
* Returns an input stream for decoding {@link Base64} encoded byte stream.
*
+ * <p> The {@code read} methods of the returned {@code InputStream} will
+ * throw {@code IOException} when reading bytes that cannot be decoded.
+ *
* <p> Closing the returned input stream will close the underlying
* input stream.
*
@@ -883,13 +894,16 @@
int dl = dst.arrayOffset() + dst.limit();
int dp0 = dp;
int mark = sp;
- boolean padding = false;
try {
while (sp < sl) {
int b = sa[sp++] & 0xff;
if ((b = base64[b]) < 0) {
if (b == -2) { // padding byte
- padding = true;
+ if (shiftto == 6 && (sp == sl || sa[sp++] != '=') ||
+ shiftto == 18) {
+ throw new IllegalArgumentException(
+ "Input byte array has wrong 4-byte ending unit");
+ }
break;
}
if (isMIME) // skip if for rfc2045
@@ -915,24 +929,23 @@
if (shiftto == 6) {
if (dl - dp < 1)
return dp - dp0;
- if (padding && (sp + 1 != sl || sa[sp++] != '='))
- throw new IllegalArgumentException(
- "Input buffer has wrong 4-byte ending unit");
da[dp++] = (byte)(bits >> 16);
- mark = sp;
} else if (shiftto == 0) {
if (dl - dp < 2)
return dp - dp0;
- if (padding && sp != sl)
- throw new IllegalArgumentException(
- "Input buffer has wrong 4-byte ending unit");
da[dp++] = (byte)(bits >> 16);
da[dp++] = (byte)(bits >> 8);
- mark = sp;
- } else if (padding || shiftto != 18) {
+ } else if (shiftto == 12) {
throw new IllegalArgumentException(
"Last unit does not have enough valid bits");
}
+ while (sp < sl) {
+ if (isMIME && base64[sa[sp++]] < 0)
+ continue;
+ throw new IllegalArgumentException(
+ "Input byte array has incorrect ending byte at " + sp);
+ }
+ mark = sp;
return dp - dp0;
} finally {
src.position(mark);
@@ -950,14 +963,16 @@
int dl = dst.limit();
int dp0 = dp;
int mark = sp;
- boolean padding = false;
-
try {
while (sp < sl) {
int b = src.get(sp++) & 0xff;
if ((b = base64[b]) < 0) {
if (b == -2) { // padding byte
- padding = true;
+ if (shiftto == 6 && (sp == sl || src.get(sp++) != '=') ||
+ shiftto == 18) {
+ throw new IllegalArgumentException(
+ "Input byte array has wrong 4-byte ending unit");
+ }
break;
}
if (isMIME) // skip if for rfc2045
@@ -983,24 +998,23 @@
if (shiftto == 6) {
if (dl - dp < 1)
return dp - dp0;
- if (padding && (sp + 1 != sl || src.get(sp++) != '='))
- throw new IllegalArgumentException(
- "Input buffer has wrong 4-byte ending unit");
dst.put(dp++, (byte)(bits >> 16));
- mark = sp;
} else if (shiftto == 0) {
if (dl - dp < 2)
return dp - dp0;
- if (padding && sp != sl)
- throw new IllegalArgumentException(
- "Input buffer has wrong 4-byte ending unit");
dst.put(dp++, (byte)(bits >> 16));
dst.put(dp++, (byte)(bits >> 8));
- mark = sp;
- } else if (padding || shiftto != 18) {
+ } else if (shiftto == 12) {
throw new IllegalArgumentException(
"Last unit does not have enough valid bits");
}
+ while (sp < sl) {
+ if (isMIME && base64[src.get(sp++)] < 0)
+ continue;
+ throw new IllegalArgumentException(
+ "Input byte array has incorrect ending byte at " + sp);
+ }
+ mark = sp;
return dp - dp0;
} finally {
src.position(mark);
@@ -1048,12 +1062,20 @@
int dp = 0;
int bits = 0;
int shiftto = 18; // pos of first byte of 4-byte atom
- boolean padding = false;
while (sp < sl) {
int b = src[sp++] & 0xff;
if ((b = base64[b]) < 0) {
- if (b == -2) { // padding byte
- padding = true;
+ if (b == -2) { // padding byte '='
+ // xx= shiftto==6&&sp==sl missing last =
+ // xx=y shiftto==6 last is not =
+ // = shiftto==18 unnecessary padding
+ // x= shiftto==12 be taken care later
+ // together with single x, invalid anyway
+ if (shiftto == 6 && (sp == sl || src[sp++] != '=') ||
+ shiftto == 18) {
+ throw new IllegalArgumentException(
+ "Input byte array has wrong 4-byte ending unit");
+ }
break;
}
if (isMIME) // skip if for rfc2045
@@ -1073,22 +1095,23 @@
bits = 0;
}
}
- // reach end of byte arry or hit padding '=' characters.
- // if '=' presents, they must be the last one or two.
- if (shiftto == 6) { // xx==
- if (padding && (sp + 1 != sl || src[sp] != '='))
- throw new IllegalArgumentException(
- "Input byte array has wrong 4-byte ending unit");
+ // reached end of byte array or hit padding '=' characters.
+ if (shiftto == 6) {
dst[dp++] = (byte)(bits >> 16);
- } else if (shiftto == 0) { // xxx=
- if (padding && sp != sl)
- throw new IllegalArgumentException(
- "Input byte array has wrong 4-byte ending unit");
+ } else if (shiftto == 0) {
dst[dp++] = (byte)(bits >> 16);
dst[dp++] = (byte)(bits >> 8);
- } else if (padding || shiftto != 18) {
- throw new IllegalArgumentException(
- "last unit does not have enough bytes");
+ } else if (shiftto == 12) {
+ throw new IllegalArgumentException(
+ "Last unit does not have enough valid bits");
+ }
+ // anything left is invalid, if is not MIME.
+ // if MIME, ignore all non-base64 character
+ while (sp < sl) {
+ if (isMIME && base64[src[sp++]] < 0)
+ continue;
+ throw new IllegalArgumentException(
+ "Input byte array has incorrect ending byte at " + sp);
}
return dp;
}
@@ -1252,8 +1275,22 @@
int v = is.read();
if (v == -1) {
eof = true;
- if (nextin != 18)
- throw new IOException("Base64 stream has un-decoded dangling byte(s).");
+ if (nextin != 18) {
+ if (nextin == 12)
+ throw new IOException("Base64 stream has one un-decoded dangling byte.");
+ // treat ending xx/xxx without padding character legal.
+ // same logic as v == 'v' below
+ b[off++] = (byte)(bits >> (16));
+ len--;
+ if (nextin == 0) { // only one padding byte
+ if (len == 0) { // no enough output space
+ bits >>= 8; // shift to lowest byte
+ nextout = 0;
+ } else {
+ b[off++] = (byte) (bits >> 8);
+ }
+ }
+ }
if (off == oldOff)
return -1;
else
--- a/jdk/test/java/util/Base64/TestBase64.java Mon Feb 04 17:20:26 2013 +0000
+++ b/jdk/test/java/util/Base64/TestBase64.java Mon Feb 04 11:58:43 2013 -0800
@@ -22,7 +22,7 @@
*/
/**
- * @test 4235519 8004212 8005394 8007298
+ * @test 4235519 8004212 8005394 8007298 8006295 8006315 8006530
* @summary tests java.util.Base64
*/
@@ -112,6 +112,12 @@
// test single-non-base64 character for mime decoding
testSingleNonBase64MimeDec();
+
+ // test decoding of unpadded data
+ testDecodeUnpadded();
+
+ // test mime decoding with ignored character after padding
+ testDecodeIgnoredAfterPadding();
}
private static sun.misc.BASE64Encoder sunmisc = new sun.misc.BASE64Encoder();
@@ -359,6 +365,81 @@
} catch (IllegalArgumentException iae) {}
}
+ private static void testDecodeIgnoredAfterPadding() throws Throwable {
+ for (byte nonBase64 : new byte[] {'#', '(', '!', '\\', '-', '_', '\n', '\r'}) {
+ byte[][] src = new byte[][] {
+ "A".getBytes("ascii"),
+ "AB".getBytes("ascii"),
+ "ABC".getBytes("ascii"),
+ "ABCD".getBytes("ascii"),
+ "ABCDE".getBytes("ascii")
+ };
+ Base64.Encoder encM = Base64.getMimeEncoder();
+ Base64.Decoder decM = Base64.getMimeDecoder();
+ Base64.Encoder enc = Base64.getEncoder();
+ Base64.Decoder dec = Base64.getDecoder();
+ for (int i = 0; i < src.length; i++) {
+ // decode(byte[])
+ byte[] encoded = encM.encode(src[i]);
+ encoded = Arrays.copyOf(encoded, encoded.length + 1);
+ encoded[encoded.length - 1] = nonBase64;
+ checkEqual(decM.decode(encoded), src[i], "Non-base64 char is not ignored");
+ try {
+ dec.decode(encoded);
+ throw new RuntimeException("No IAE for non-base64 char");
+ } catch (IllegalArgumentException iae) {}
+
+ // decode(ByteBuffer[], ByteBuffer[])
+ ByteBuffer encodedBB = ByteBuffer.wrap(encoded);
+ ByteBuffer decodedBB = ByteBuffer.allocate(100);
+ int ret = decM.decode(encodedBB, decodedBB);
+ byte[] buf = new byte[ret];
+ decodedBB.flip();
+ decodedBB.get(buf);
+ checkEqual(buf, src[i], "Non-base64 char is not ignored");
+ try {
+ encodedBB.rewind();
+ decodedBB.clear();
+ dec.decode(encodedBB, decodedBB);
+ throw new RuntimeException("No IAE for non-base64 char");
+ } catch (IllegalArgumentException iae) {}
+ // direct
+ encodedBB.rewind();
+ decodedBB = ByteBuffer.allocateDirect(100);
+ ret = decM.decode(encodedBB, decodedBB);
+ buf = new byte[ret];
+ decodedBB.flip();
+ decodedBB.get(buf);
+ checkEqual(buf, src[i], "Non-base64 char is not ignored");
+ try {
+ encodedBB.rewind();
+ decodedBB.clear();
+ dec.decode(encodedBB, decodedBB);
+ throw new RuntimeException("No IAE for non-base64 char");
+ } catch (IllegalArgumentException iae) {}
+ }
+ }
+ }
+
+ private static void testDecodeUnpadded() throws Throwable {
+ byte[] srcA = new byte[] { 'Q', 'Q' };
+ byte[] srcAA = new byte[] { 'Q', 'Q', 'E'};
+ Base64.Decoder dec = Base64.getDecoder();
+ byte[] ret = dec.decode(srcA);
+ if (ret[0] != 'A')
+ throw new RuntimeException("Decoding unpadding input A failed");
+ ret = dec.decode(srcAA);
+ if (ret[0] != 'A' && ret[1] != 'A')
+ throw new RuntimeException("Decoding unpadding input AA failed");
+ ret = new byte[10];
+ if (dec.wrap(new ByteArrayInputStream(srcA)).read(ret) != 1 &&
+ ret[0] != 'A')
+ throw new RuntimeException("Decoding unpadding input A from stream failed");
+ if (dec.wrap(new ByteArrayInputStream(srcA)).read(ret) != 2 &&
+ ret[0] != 'A' && ret[1] != 'A')
+ throw new RuntimeException("Decoding unpadding input AA from stream failed");
+ }
+
// single-non-base64-char should be ignored for mime decoding, but
// iae for basic decoding
private static void testSingleNonBase64MimeDec() throws Throwable {