# HG changeset patch # User sherman # Date 1360007923 28800 # Node ID 47318ecea33bed7489c79829d911112215c2b022 # Parent 859facd7058057d33d76bce5396fc35846c3054a 8006295: Base64.Decoder.wrap(java.io.InputStream) returns InputStream which throws unspecified IOException on attempt to decode invalid Base64 byte stream 8006315: Base64.Decoder decoding methods are not consistent in treating non-padded data 8006530: Base64.getMimeDecoder().decode() throws exception for non-base64 character after adding = Summary: updated the spec to describe the expected behave explicitly and the implementation to follow Reviewed-by: alanb, chegar, lancea diff -r 859facd70580 -r 47318ecea33b jdk/src/share/classes/java/util/Base64.java --- a/jdk/src/share/classes/java/util/Base64.java Mon Feb 04 17:20:26 2013 +0000 +++ b/jdk/src/share/classes/java/util/Base64.java Mon Feb 04 11:58:43 2013 -0800 @@ -64,7 +64,8 @@ * RFC 2045 for encoding and decoding operation. The encoded output * must be represented in lines of no more than 76 characters each * and uses a carriage return {@code '\r'} followed immediately by - * a linefeed {@code '\n'} as the line separator. All line separators + * a linefeed {@code '\n'} as the line separator. No line separator + * is added to the end of the encoded output. All line separators * or other characters not found in the base64 alphabet table are * ignored in decoding operation.

* @@ -614,6 +615,13 @@ * This class implements a decoder for decoding byte data using the * Base64 encoding scheme as specified in RFC 4648 and RFC 2045. * + *

The Base64 padding character {@code '='} is accepted and + * interpreted as the end of the encoded byte data, but is not + * required. So if the final unit of the encoded byte data only has + * two or three Base64 characters (without the corresponding padding + * character(s) padded), they are decoded as if followed by padding + * character(s). + * *

Instances of {@link Decoder} class are safe for use by * multiple concurrent threads. * @@ -857,6 +865,9 @@ /** * Returns an input stream for decoding {@link Base64} encoded byte stream. * + *

The {@code read} methods of the returned {@code InputStream} will + * throw {@code IOException} when reading bytes that cannot be decoded. + * *

Closing the returned input stream will close the underlying * input stream. * @@ -883,13 +894,16 @@ int dl = dst.arrayOffset() + dst.limit(); int dp0 = dp; int mark = sp; - boolean padding = false; try { while (sp < sl) { int b = sa[sp++] & 0xff; if ((b = base64[b]) < 0) { if (b == -2) { // padding byte - padding = true; + if (shiftto == 6 && (sp == sl || sa[sp++] != '=') || + shiftto == 18) { + throw new IllegalArgumentException( + "Input byte array has wrong 4-byte ending unit"); + } break; } if (isMIME) // skip if for rfc2045 @@ -915,24 +929,23 @@ if (shiftto == 6) { if (dl - dp < 1) return dp - dp0; - if (padding && (sp + 1 != sl || sa[sp++] != '=')) - throw new IllegalArgumentException( - "Input buffer has wrong 4-byte ending unit"); da[dp++] = (byte)(bits >> 16); - mark = sp; } else if (shiftto == 0) { if (dl - dp < 2) return dp - dp0; - if (padding && sp != sl) - throw new IllegalArgumentException( - "Input buffer has wrong 4-byte ending unit"); da[dp++] = (byte)(bits >> 16); da[dp++] = (byte)(bits >> 8); - mark = sp; - } else if (padding || shiftto != 18) { + } else if (shiftto == 12) { throw new IllegalArgumentException( "Last unit does not have enough valid bits"); } + while (sp < sl) { + if (isMIME && base64[sa[sp++]] < 0) + continue; + throw new IllegalArgumentException( + "Input byte array has incorrect ending byte at " + sp); + } + mark = sp; return dp - dp0; } finally { src.position(mark); @@ -950,14 +963,16 @@ int dl = dst.limit(); int dp0 = dp; int mark = sp; - boolean padding = false; - try { while (sp < sl) { int b = src.get(sp++) & 0xff; if ((b = base64[b]) < 0) { if (b == -2) { // padding byte - padding = true; + if (shiftto == 6 && (sp == sl || src.get(sp++) != '=') || + shiftto == 18) { + throw new IllegalArgumentException( + "Input byte array has wrong 4-byte ending unit"); + } break; } if (isMIME) // skip if for rfc2045 @@ -983,24 +998,23 @@ if (shiftto == 6) { if (dl - dp < 1) return dp - dp0; - if (padding && (sp + 1 != sl || src.get(sp++) != '=')) - throw new IllegalArgumentException( - "Input buffer has wrong 4-byte ending unit"); dst.put(dp++, (byte)(bits >> 16)); - mark = sp; } else if (shiftto == 0) { if (dl - dp < 2) return dp - dp0; - if (padding && sp != sl) - throw new IllegalArgumentException( - "Input buffer has wrong 4-byte ending unit"); dst.put(dp++, (byte)(bits >> 16)); dst.put(dp++, (byte)(bits >> 8)); - mark = sp; - } else if (padding || shiftto != 18) { + } else if (shiftto == 12) { throw new IllegalArgumentException( "Last unit does not have enough valid bits"); } + while (sp < sl) { + if (isMIME && base64[src.get(sp++)] < 0) + continue; + throw new IllegalArgumentException( + "Input byte array has incorrect ending byte at " + sp); + } + mark = sp; return dp - dp0; } finally { src.position(mark); @@ -1048,12 +1062,20 @@ int dp = 0; int bits = 0; int shiftto = 18; // pos of first byte of 4-byte atom - boolean padding = false; while (sp < sl) { int b = src[sp++] & 0xff; if ((b = base64[b]) < 0) { - if (b == -2) { // padding byte - padding = true; + if (b == -2) { // padding byte '=' + // xx= shiftto==6&&sp==sl missing last = + // xx=y shiftto==6 last is not = + // = shiftto==18 unnecessary padding + // x= shiftto==12 be taken care later + // together with single x, invalid anyway + if (shiftto == 6 && (sp == sl || src[sp++] != '=') || + shiftto == 18) { + throw new IllegalArgumentException( + "Input byte array has wrong 4-byte ending unit"); + } break; } if (isMIME) // skip if for rfc2045 @@ -1073,22 +1095,23 @@ bits = 0; } } - // reach end of byte arry or hit padding '=' characters. - // if '=' presents, they must be the last one or two. - if (shiftto == 6) { // xx== - if (padding && (sp + 1 != sl || src[sp] != '=')) - throw new IllegalArgumentException( - "Input byte array has wrong 4-byte ending unit"); + // reached end of byte array or hit padding '=' characters. + if (shiftto == 6) { dst[dp++] = (byte)(bits >> 16); - } else if (shiftto == 0) { // xxx= - if (padding && sp != sl) - throw new IllegalArgumentException( - "Input byte array has wrong 4-byte ending unit"); + } else if (shiftto == 0) { dst[dp++] = (byte)(bits >> 16); dst[dp++] = (byte)(bits >> 8); - } else if (padding || shiftto != 18) { - throw new IllegalArgumentException( - "last unit does not have enough bytes"); + } else if (shiftto == 12) { + throw new IllegalArgumentException( + "Last unit does not have enough valid bits"); + } + // anything left is invalid, if is not MIME. + // if MIME, ignore all non-base64 character + while (sp < sl) { + if (isMIME && base64[src[sp++]] < 0) + continue; + throw new IllegalArgumentException( + "Input byte array has incorrect ending byte at " + sp); } return dp; } @@ -1252,8 +1275,22 @@ int v = is.read(); if (v == -1) { eof = true; - if (nextin != 18) - throw new IOException("Base64 stream has un-decoded dangling byte(s)."); + if (nextin != 18) { + if (nextin == 12) + throw new IOException("Base64 stream has one un-decoded dangling byte."); + // treat ending xx/xxx without padding character legal. + // same logic as v == 'v' below + b[off++] = (byte)(bits >> (16)); + len--; + if (nextin == 0) { // only one padding byte + if (len == 0) { // no enough output space + bits >>= 8; // shift to lowest byte + nextout = 0; + } else { + b[off++] = (byte) (bits >> 8); + } + } + } if (off == oldOff) return -1; else diff -r 859facd70580 -r 47318ecea33b jdk/test/java/util/Base64/TestBase64.java --- a/jdk/test/java/util/Base64/TestBase64.java Mon Feb 04 17:20:26 2013 +0000 +++ b/jdk/test/java/util/Base64/TestBase64.java Mon Feb 04 11:58:43 2013 -0800 @@ -22,7 +22,7 @@ */ /** - * @test 4235519 8004212 8005394 8007298 + * @test 4235519 8004212 8005394 8007298 8006295 8006315 8006530 * @summary tests java.util.Base64 */ @@ -112,6 +112,12 @@ // test single-non-base64 character for mime decoding testSingleNonBase64MimeDec(); + + // test decoding of unpadded data + testDecodeUnpadded(); + + // test mime decoding with ignored character after padding + testDecodeIgnoredAfterPadding(); } private static sun.misc.BASE64Encoder sunmisc = new sun.misc.BASE64Encoder(); @@ -359,6 +365,81 @@ } catch (IllegalArgumentException iae) {} } + private static void testDecodeIgnoredAfterPadding() throws Throwable { + for (byte nonBase64 : new byte[] {'#', '(', '!', '\\', '-', '_', '\n', '\r'}) { + byte[][] src = new byte[][] { + "A".getBytes("ascii"), + "AB".getBytes("ascii"), + "ABC".getBytes("ascii"), + "ABCD".getBytes("ascii"), + "ABCDE".getBytes("ascii") + }; + Base64.Encoder encM = Base64.getMimeEncoder(); + Base64.Decoder decM = Base64.getMimeDecoder(); + Base64.Encoder enc = Base64.getEncoder(); + Base64.Decoder dec = Base64.getDecoder(); + for (int i = 0; i < src.length; i++) { + // decode(byte[]) + byte[] encoded = encM.encode(src[i]); + encoded = Arrays.copyOf(encoded, encoded.length + 1); + encoded[encoded.length - 1] = nonBase64; + checkEqual(decM.decode(encoded), src[i], "Non-base64 char is not ignored"); + try { + dec.decode(encoded); + throw new RuntimeException("No IAE for non-base64 char"); + } catch (IllegalArgumentException iae) {} + + // decode(ByteBuffer[], ByteBuffer[]) + ByteBuffer encodedBB = ByteBuffer.wrap(encoded); + ByteBuffer decodedBB = ByteBuffer.allocate(100); + int ret = decM.decode(encodedBB, decodedBB); + byte[] buf = new byte[ret]; + decodedBB.flip(); + decodedBB.get(buf); + checkEqual(buf, src[i], "Non-base64 char is not ignored"); + try { + encodedBB.rewind(); + decodedBB.clear(); + dec.decode(encodedBB, decodedBB); + throw new RuntimeException("No IAE for non-base64 char"); + } catch (IllegalArgumentException iae) {} + // direct + encodedBB.rewind(); + decodedBB = ByteBuffer.allocateDirect(100); + ret = decM.decode(encodedBB, decodedBB); + buf = new byte[ret]; + decodedBB.flip(); + decodedBB.get(buf); + checkEqual(buf, src[i], "Non-base64 char is not ignored"); + try { + encodedBB.rewind(); + decodedBB.clear(); + dec.decode(encodedBB, decodedBB); + throw new RuntimeException("No IAE for non-base64 char"); + } catch (IllegalArgumentException iae) {} + } + } + } + + private static void testDecodeUnpadded() throws Throwable { + byte[] srcA = new byte[] { 'Q', 'Q' }; + byte[] srcAA = new byte[] { 'Q', 'Q', 'E'}; + Base64.Decoder dec = Base64.getDecoder(); + byte[] ret = dec.decode(srcA); + if (ret[0] != 'A') + throw new RuntimeException("Decoding unpadding input A failed"); + ret = dec.decode(srcAA); + if (ret[0] != 'A' && ret[1] != 'A') + throw new RuntimeException("Decoding unpadding input AA failed"); + ret = new byte[10]; + if (dec.wrap(new ByteArrayInputStream(srcA)).read(ret) != 1 && + ret[0] != 'A') + throw new RuntimeException("Decoding unpadding input A from stream failed"); + if (dec.wrap(new ByteArrayInputStream(srcA)).read(ret) != 2 && + ret[0] != 'A' && ret[1] != 'A') + throw new RuntimeException("Decoding unpadding input AA from stream failed"); + } + // single-non-base64-char should be ignored for mime decoding, but // iae for basic decoding private static void testSingleNonBase64MimeDec() throws Throwable {