8025003: Base64 should be less strict with padding
Summary: updated spec and implementation of mime decoder to be lenient for padding
Reviewed-by: alanb
--- a/jdk/src/share/classes/java/util/Base64.java Thu Oct 24 10:52:15 2013 -0700
+++ b/jdk/src/share/classes/java/util/Base64.java Thu Oct 24 11:12:59 2013 -0700
@@ -127,7 +127,7 @@
* character of "The Base64 Alphabet" as specified in Table 1 of
* RFC 2045.
*/
- public static Encoder getEncoder(int lineLength, byte[] lineSeparator) {
+ public static Encoder getMimeEncoder(int lineLength, byte[] lineSeparator) {
Objects.requireNonNull(lineSeparator);
int[] base64 = Decoder.fromBase64;
for (byte b : lineSeparator) {
@@ -619,11 +619,20 @@
* required. So if the final unit of the encoded byte data only has
* two or three Base64 characters (without the corresponding padding
* character(s) padded), they are decoded as if followed by padding
- * character(s). If there is padding character present in the
- * final unit, the correct number of padding character(s) must be
- * present, otherwise {@code IllegalArgumentException} (
- * {@code IOException} when reading from a Base64 stream) is thrown
- * during decoding.
+ * character(s).
+ * <p>
+ * For decoders that use the <a href="#basic">Basic</a> and
+ * <a href="#url">URL and Filename safe</a> type base64 scheme, and
+ * if there is padding character present in the final unit, the
+ * correct number of padding character(s) must be present, otherwise
+ * {@code IllegalArgumentException} ({@code IOException} when reading
+ * from a Base64 stream) is thrown during decoding.
+ * <p>
+ * Decoders that use the <a href="#mime">MIME</a> type base64 scheme
+ * are more lenient when decoding the padding character(s). If the
+ * padding character(s) is incorrectly encoded, the first padding
+ * character encountered is interpreted as the end of the encoded byte
+ * data, the decoding operation will then end and return normally.
*
* <p> Instances of {@link Decoder} class are safe for use by
* multiple concurrent threads.
@@ -902,8 +911,9 @@
int b = sa[sp++] & 0xff;
if ((b = base64[b]) < 0) {
if (b == -2) { // padding byte
- if (shiftto == 6 && (sp == sl || sa[sp++] != '=') ||
- shiftto == 18) {
+ if (!isMIME &&
+ (shiftto == 6 && (sp == sl || sa[sp++] != '=') ||
+ shiftto == 18)) {
throw new IllegalArgumentException(
"Input byte array has wrong 4-byte ending unit");
}
@@ -942,11 +952,12 @@
throw new IllegalArgumentException(
"Last unit does not have enough valid bits");
}
- while (sp < sl) {
- if (isMIME && base64[sa[sp++]] < 0)
- continue;
- throw new IllegalArgumentException(
- "Input byte array has incorrect ending byte at " + sp);
+ if (sp < sl) {
+ if (isMIME)
+ sp = sl;
+ else
+ throw new IllegalArgumentException(
+ "Input byte array has incorrect ending byte at " + sp);
}
mark = sp;
return dp - dp0;
@@ -971,8 +982,9 @@
int b = src.get(sp++) & 0xff;
if ((b = base64[b]) < 0) {
if (b == -2) { // padding byte
- if (shiftto == 6 && (sp == sl || src.get(sp++) != '=') ||
- shiftto == 18) {
+ if (!isMIME &&
+ (shiftto == 6 && (sp == sl || src.get(sp++) != '=') ||
+ shiftto == 18)) {
throw new IllegalArgumentException(
"Input byte array has wrong 4-byte ending unit");
}
@@ -1011,11 +1023,12 @@
throw new IllegalArgumentException(
"Last unit does not have enough valid bits");
}
- while (sp < sl) {
- if (isMIME && base64[src.get(sp++)] < 0)
- continue;
- throw new IllegalArgumentException(
- "Input byte array has incorrect ending byte at " + sp);
+ if (sp < sl) {
+ if (isMIME)
+ sp = sl;
+ else
+ throw new IllegalArgumentException(
+ "Input byte array has incorrect ending byte at " + sp);
}
mark = sp;
return dp - dp0;
@@ -1071,14 +1084,15 @@
while (sp < sl) {
int b = src[sp++] & 0xff;
if ((b = base64[b]) < 0) {
- if (b == -2) { // padding byte '='
- // xx= shiftto==6&&sp==sl missing last =
- // xx=y shiftto==6 last is not =
- // = shiftto==18 unnecessary padding
- // x= shiftto==12 be taken care later
- // together with single x, invalid anyway
- if (shiftto == 6 && (sp == sl || src[sp++] != '=') ||
- shiftto == 18) {
+ if (b == -2) { // padding byte '='
+ if (!isMIME && // be lenient for rfc2045
+ // = shiftto==18 unnecessary padding
+ // x= shiftto==12 a dangling single x
+ // x to be handled together with non-padding case
+ // xx= shiftto==6&&sp==sl missing last =
+ // xx=y shiftto==6 last is not =
+ (shiftto == 6 && (sp == sl || src[sp++] != '=') ||
+ shiftto == 18)) {
throw new IllegalArgumentException(
"Input byte array has wrong 4-byte ending unit");
}
@@ -1108,14 +1122,14 @@
dst[dp++] = (byte)(bits >> 16);
dst[dp++] = (byte)(bits >> 8);
} else if (shiftto == 12) {
+ // dangling single "x", throw exception even in lenient mode,
+ // it's incorrectly encoded.
throw new IllegalArgumentException(
"Last unit does not have enough valid bits");
}
// anything left is invalid, if is not MIME.
- // if MIME, ignore all non-base64 character
- while (sp < sl) {
- if (isMIME && base64[src[sp++]] < 0)
- continue;
+ // if MIME (lenient), just ignore all leftover
+ if (sp < sl && !isMIME) {
throw new IllegalArgumentException(
"Input byte array has incorrect ending byte at " + sp);
}
@@ -1285,7 +1299,7 @@
if (nextin == 12)
throw new IOException("Base64 stream has one un-decoded dangling byte.");
// treat ending xx/xxx without padding character legal.
- // same logic as v == 'v' below
+ // same logic as v == '=' below
b[off++] = (byte)(bits >> (16));
len--;
if (nextin == 0) { // only one padding byte
@@ -1304,21 +1318,31 @@
}
if (v == '=') { // padding byte(s)
// = shiftto==18 unnecessary padding
- // x= shiftto==12 invalid unit
+ // x= shiftto==12 dangling x, invalid unit
// xx= shiftto==6 && missing last '='
- // xx=y or last is not '='
+ // xx=y or last is not '='
if (nextin == 18 || nextin == 12 ||
nextin == 6 && is.read() != '=') {
- throw new IOException("Illegal base64 ending sequence:" + nextin);
- }
- b[off++] = (byte)(bits >> (16));
- len--;
- if (nextin == 0) { // only one padding byte
- if (len == 0) { // no enough output space
- bits >>= 8; // shift to lowest byte
- nextout = 0;
- } else {
- b[off++] = (byte) (bits >> 8);
+ if (!isMIME || nextin == 12) {
+ throw new IOException("Illegal base64 ending sequence:" + nextin);
+ } else if (nextin != 18) {
+ // lenient mode for mime
+ // (1) handle the "unnecessary padding in "xxxx ="
+ // case as the eof (nextin == 18)
+ // (2) decode "xx=" and "xx=y" normally
+ b[off++] = (byte)(bits >> (16));
+ len--;
+ }
+ } else {
+ b[off++] = (byte)(bits >> (16));
+ len--;
+ if (nextin == 0) { // only one padding byte
+ if (len == 0) { // no enough output space
+ bits >>= 8; // shift to lowest byte
+ nextout = 0;
+ } else {
+ b[off++] = (byte) (bits >> 8);
+ }
}
}
eof = true;
--- a/jdk/test/java/util/Base64/Base64GetEncoderTest.java Thu Oct 24 10:52:15 2013 -0700
+++ b/jdk/test/java/util/Base64/Base64GetEncoderTest.java Thu Oct 24 11:12:59 2013 -0700
@@ -41,7 +41,7 @@
public class Base64GetEncoderTest {
public static void main(String args[]) throws Throwable {
- final Base64.Encoder encoder = Base64.getEncoder(0, "$$$".getBytes(US_ASCII));
+ final Base64.Encoder encoder = Base64.getMimeEncoder(0, "$$$".getBytes(US_ASCII));
testEncodeToString(encoder);
--- a/jdk/test/java/util/Base64/TestBase64.java Thu Oct 24 10:52:15 2013 -0700
+++ b/jdk/test/java/util/Base64/TestBase64.java Thu Oct 24 11:12:59 2013 -0700
@@ -23,7 +23,7 @@
/**
* @test 4235519 8004212 8005394 8007298 8006295 8006315 8006530 8007379 8008925
- * 8014217
+ * 8014217 8025003
* @summary tests java.util.Base64
*/
@@ -60,13 +60,13 @@
byte[] nl_3 = new byte[] {'\n', '\r', '\n'};
for (int i = 0; i < 10; i++) {
int len = rnd.nextInt(200) + 4;
- test(Base64.getEncoder(len, nl_1),
+ test(Base64.getMimeEncoder(len, nl_1),
Base64.getMimeDecoder(),
numRuns, numBytes);
- test(Base64.getEncoder(len, nl_2),
+ test(Base64.getMimeEncoder(len, nl_2),
Base64.getMimeDecoder(),
numRuns, numBytes);
- test(Base64.getEncoder(len, nl_3),
+ test(Base64.getMimeEncoder(len, nl_3),
Base64.getMimeDecoder(),
numRuns, numBytes);
}
@@ -74,16 +74,16 @@
testNull(Base64.getEncoder());
testNull(Base64.getUrlEncoder());
testNull(Base64.getMimeEncoder());
- testNull(Base64.getEncoder(10, new byte[]{'\n'}));
+ testNull(Base64.getMimeEncoder(10, new byte[]{'\n'}));
testNull(Base64.getDecoder());
testNull(Base64.getUrlDecoder());
testNull(Base64.getMimeDecoder());
- checkNull(new Runnable() { public void run() { Base64.getEncoder(10, null); }});
+ checkNull(new Runnable() { public void run() { Base64.getMimeEncoder(10, null); }});
testIOE(Base64.getEncoder());
testIOE(Base64.getUrlEncoder());
testIOE(Base64.getMimeEncoder());
- testIOE(Base64.getEncoder(10, new byte[]{'\n'}));
+ testIOE(Base64.getMimeEncoder(10, new byte[]{'\n'}));
byte[] src = new byte[1024];
new Random().nextBytes(src);
@@ -93,7 +93,7 @@
testIOE(Base64.getUrlDecoder(), Base64.getUrlEncoder().encode(src));
// illegal line separator
- checkIAE(new Runnable() { public void run() { Base64.getEncoder(10, new byte[]{'\r', 'N'}); }});
+ checkIAE(new Runnable() { public void run() { Base64.getMimeEncoder(10, new byte[]{'\r', 'N'}); }});
// illegal base64 character
decoded[2] = (byte)0xe0;
@@ -109,8 +109,6 @@
Base64.getDecoder().decode(ByteBuffer.wrap(decoded), ByteBuffer.allocateDirect(1024)); }});
// illegal ending unit
- checkIAE(new Runnable() { public void run() { Base64.getMimeDecoder().decode("$=#"); }});
-
checkIOE(new Testable() { public void test() throws IOException {
byte[] bytes = "AA=".getBytes("ASCII");
try (InputStream stream =
@@ -129,6 +127,10 @@
testDecodeUnpadded();
// test mime decoding with ignored character after padding
testDecodeIgnoredAfterPadding();
+
+ // lenient mode for ending unit
+ testLenientPadding();
+
}
private static sun.misc.BASE64Encoder sunmisc = new sun.misc.BASE64Encoder();
@@ -436,6 +438,48 @@
}
}
+ private static void testLenientPadding() throws Throwable {
+ String[] data = new String[] {
+ "=", "", // unnecessary padding
+ "QUJD=", "ABC", //"ABC".encode() -> "QUJD"
+
+ "QQ=", "A", // incomplete padding
+ "QQ=N", "A", // incorrect padding
+ "QQ=?", "A",
+ "QUJDQQ=", "ABCA",
+ "QUJDQQ=N", "ABCA",
+ "QUJDQQ=?", "ABCA",
+
+ "QUI=X", "AB", // incorrect padding
+ "QUI=?", "AB", // incorrect padding
+ };
+ Base64.Decoder dec = Base64.getMimeDecoder();
+
+ for (int i = 0; i < data.length; i += 2) {
+ byte[] src = data[i].getBytes("ASCII");
+ byte[] expected = data[i + 1].getBytes("ASCII");
+ // decode(byte[])
+ byte[] ret = dec.decode(src);
+ checkEqual(ret, expected, "lenient padding decoding failed!");
+
+ // decode(String)
+ ret = dec.decode(data[i]);
+ checkEqual(ret, expected, "lenient padding decoding failed!");
+
+ // decode(ByteBuffer)
+ ByteBuffer srcBB = ByteBuffer.wrap(src);
+ ByteBuffer retBB = dec.decode(srcBB);
+ checkEqual(srcBB.remaining(), 0, "lenient padding decoding failed!");
+ checkEqual(Arrays.copyOf(retBB.array(), retBB.remaining()),
+ expected, "lenient padding decoding failed!");
+
+ // wrap.decode(byte[])
+ ret = new byte[10];
+ int n = dec.wrap(new ByteArrayInputStream(src)).read(ret);
+ checkEqual(Arrays.copyOf(ret, n), expected, "lenient padding decoding failed!");
+ }
+ }
+
private static void testDecodeUnpadded() throws Throwable {
byte[] srcA = new byte[] { 'Q', 'Q' };
byte[] srcAA = new byte[] { 'Q', 'Q', 'E'};
--- a/jdk/test/java/util/Base64/TestBase64Golden.java Thu Oct 24 10:52:15 2013 -0700
+++ b/jdk/test/java/util/Base64/TestBase64Golden.java Thu Oct 24 11:12:59 2013 -0700
@@ -230,7 +230,7 @@
46, -97, -35, -44, 127, -60, -39, -4, -112, 34, -57, 47, -14, 67,
40, 18, 90, -59, 68, 112, 23, 121, -91, 94, 35, 49, 104, 17, 30,
-80, -104, -3, -53, 27, 38, -72, -47, 113, -52, 18, 5, -126 };
- Encoder encoder = Base64.getEncoder(49, new byte[] { 0x7e });
+ Encoder encoder = Base64.getMimeEncoder(49, new byte[] { 0x7e });
byte[] encoded = encoder.encode(src);
Decoder decoder = Base64.getMimeDecoder();
byte[] decoded = decoder.decode(encoded);