8025003: Base64 should be less strict with padding
authorsherman
Thu, 24 Oct 2013 11:12:59 -0700
changeset 21363 82e139fdd879
parent 21362 8b2d68e8632a
child 21364 decde35d5139
8025003: Base64 should be less strict with padding Summary: updated spec and implementation of mime decoder to be lenient for padding Reviewed-by: alanb
jdk/src/share/classes/java/util/Base64.java
jdk/test/java/util/Base64/Base64GetEncoderTest.java
jdk/test/java/util/Base64/TestBase64.java
jdk/test/java/util/Base64/TestBase64Golden.java
--- a/jdk/src/share/classes/java/util/Base64.java	Thu Oct 24 10:52:15 2013 -0700
+++ b/jdk/src/share/classes/java/util/Base64.java	Thu Oct 24 11:12:59 2013 -0700
@@ -127,7 +127,7 @@
      *          character of "The Base64 Alphabet" as specified in Table 1 of
      *          RFC 2045.
      */
-    public static Encoder getEncoder(int lineLength, byte[] lineSeparator) {
+    public static Encoder getMimeEncoder(int lineLength, byte[] lineSeparator) {
          Objects.requireNonNull(lineSeparator);
          int[] base64 = Decoder.fromBase64;
          for (byte b : lineSeparator) {
@@ -619,11 +619,20 @@
      * required. So if the final unit of the encoded byte data only has
      * two or three Base64 characters (without the corresponding padding
      * character(s) padded), they are decoded as if followed by padding
-     * character(s). If there is padding character present in the
-     * final unit, the correct number of padding character(s) must be
-     * present, otherwise {@code IllegalArgumentException} (
-     * {@code IOException} when reading from a Base64 stream) is thrown
-     * during decoding.
+     * character(s).
+     * <p>
+     * For decoders that use the <a href="#basic">Basic</a> and
+     * <a href="#url">URL and Filename safe</a> type base64 scheme, and
+     * if there is padding character present in the final unit, the
+     * correct number of padding character(s) must be present, otherwise
+     * {@code IllegalArgumentException} ({@code IOException} when reading
+     * from a Base64 stream) is thrown during decoding.
+     * <p>
+     * Decoders that use the <a href="#mime">MIME</a> type base64 scheme
+     * are more lenient when decoding the padding character(s). If the
+     * padding character(s) is incorrectly encoded, the first padding
+     * character encountered is interpreted as the end of the encoded byte
+     * data, the decoding operation will then end and return normally.
      *
      * <p> Instances of {@link Decoder} class are safe for use by
      * multiple concurrent threads.
@@ -902,8 +911,9 @@
                     int b = sa[sp++] & 0xff;
                     if ((b = base64[b]) < 0) {
                         if (b == -2) {   // padding byte
-                            if (shiftto == 6 && (sp == sl || sa[sp++] != '=') ||
-                                shiftto == 18) {
+                            if (!isMIME &&
+                                (shiftto == 6 && (sp == sl || sa[sp++] != '=') ||
+                                 shiftto == 18)) {
                                 throw new IllegalArgumentException(
                                      "Input byte array has wrong 4-byte ending unit");
                             }
@@ -942,11 +952,12 @@
                     throw new IllegalArgumentException(
                         "Last unit does not have enough valid bits");
                 }
-                while (sp < sl) {
-                    if (isMIME && base64[sa[sp++]] < 0)
-                        continue;
-                    throw new IllegalArgumentException(
-                        "Input byte array has incorrect ending byte at " + sp);
+                if (sp < sl) {
+                    if (isMIME)
+                        sp = sl;
+                    else
+                        throw new IllegalArgumentException(
+                            "Input byte array has incorrect ending byte at " + sp);
                 }
                 mark = sp;
                 return dp - dp0;
@@ -971,8 +982,9 @@
                     int b = src.get(sp++) & 0xff;
                     if ((b = base64[b]) < 0) {
                         if (b == -2) {  // padding byte
-                            if (shiftto == 6 && (sp == sl || src.get(sp++) != '=') ||
-                                shiftto == 18) {
+                            if (!isMIME &&
+                                (shiftto == 6 && (sp == sl || src.get(sp++) != '=') ||
+                                 shiftto == 18)) {
                                 throw new IllegalArgumentException(
                                      "Input byte array has wrong 4-byte ending unit");
                             }
@@ -1011,11 +1023,12 @@
                     throw new IllegalArgumentException(
                         "Last unit does not have enough valid bits");
                 }
-                while (sp < sl) {
-                    if (isMIME && base64[src.get(sp++)] < 0)
-                        continue;
-                    throw new IllegalArgumentException(
-                        "Input byte array has incorrect ending byte at " + sp);
+                if (sp < sl) {
+                    if (isMIME)
+                        sp = sl;
+                    else
+                        throw new IllegalArgumentException(
+                            "Input byte array has incorrect ending byte at " + sp);
                 }
                 mark = sp;
                 return dp - dp0;
@@ -1071,14 +1084,15 @@
             while (sp < sl) {
                 int b = src[sp++] & 0xff;
                 if ((b = base64[b]) < 0) {
-                    if (b == -2) {     // padding byte '='
-                        // xx=   shiftto==6&&sp==sl missing last =
-                        // xx=y  shiftto==6 last is not =
-                        // =     shiftto==18 unnecessary padding
-                        // x=    shiftto==12 be taken care later
-                        //       together with single x, invalid anyway
-                        if (shiftto == 6 && (sp == sl || src[sp++] != '=') ||
-                            shiftto == 18) {
+                    if (b == -2) {         // padding byte '='
+                        if (!isMIME  &&    // be lenient for rfc2045
+                            // =     shiftto==18 unnecessary padding
+                            // x=    shiftto==12 a dangling single x
+                            // x     to be handled together with non-padding case
+                            // xx=   shiftto==6&&sp==sl missing last =
+                            // xx=y  shiftto==6 last is not =
+                            (shiftto == 6 && (sp == sl || src[sp++] != '=') ||
+                            shiftto == 18)) {
                             throw new IllegalArgumentException(
                                 "Input byte array has wrong 4-byte ending unit");
                         }
@@ -1108,14 +1122,14 @@
                 dst[dp++] = (byte)(bits >> 16);
                 dst[dp++] = (byte)(bits >>  8);
             } else if (shiftto == 12) {
+                // dangling single "x", throw exception even in lenient mode,
+                // it's incorrectly encoded.
                 throw new IllegalArgumentException(
                     "Last unit does not have enough valid bits");
             }
             // anything left is invalid, if is not MIME.
-            // if MIME, ignore all non-base64 character
-            while (sp < sl) {
-                if (isMIME && base64[src[sp++]] < 0)
-                    continue;
+            // if MIME (lenient), just ignore all leftover
+            if (sp < sl && !isMIME) {
                 throw new IllegalArgumentException(
                     "Input byte array has incorrect ending byte at " + sp);
             }
@@ -1285,7 +1299,7 @@
                         if (nextin == 12)
                             throw new IOException("Base64 stream has one un-decoded dangling byte.");
                         // treat ending xx/xxx without padding character legal.
-                        // same logic as v == 'v' below
+                        // same logic as v == '=' below
                         b[off++] = (byte)(bits >> (16));
                         len--;
                         if (nextin == 0) {           // only one padding byte
@@ -1304,21 +1318,31 @@
                 }
                 if (v == '=') {                  // padding byte(s)
                     // =     shiftto==18 unnecessary padding
-                    // x=    shiftto==12 invalid unit
+                    // x=    shiftto==12 dangling x, invalid unit
                     // xx=   shiftto==6 && missing last '='
-                    // xx=y                or last is not '='
+                    // xx=y  or last is not '='
                     if (nextin == 18 || nextin == 12 ||
                         nextin == 6 && is.read() != '=') {
-                        throw new IOException("Illegal base64 ending sequence:" + nextin);
-                    }
-                    b[off++] = (byte)(bits >> (16));
-                    len--;
-                    if (nextin == 0) {           // only one padding byte
-                        if (len == 0) {          // no enough output space
-                            bits >>= 8;          // shift to lowest byte
-                            nextout = 0;
-                        } else {
-                            b[off++] = (byte) (bits >>  8);
+                        if (!isMIME || nextin == 12) {
+                            throw new IOException("Illegal base64 ending sequence:" + nextin);
+                        } else if (nextin != 18) {
+                            // lenient mode for mime
+                            // (1) handle the "unnecessary padding in "xxxx ="
+                            //     case as the eof (nextin == 18)
+                            // (2) decode "xx=" and "xx=y" normally
+                            b[off++] = (byte)(bits >> (16));
+                            len--;
+                        }
+                    } else {
+                        b[off++] = (byte)(bits >> (16));
+                        len--;
+                        if (nextin == 0) {           // only one padding byte
+                            if (len == 0) {          // no enough output space
+                                bits >>= 8;          // shift to lowest byte
+                                nextout = 0;
+                            } else {
+                                b[off++] = (byte) (bits >>  8);
+                            }
                         }
                     }
                     eof = true;
--- a/jdk/test/java/util/Base64/Base64GetEncoderTest.java	Thu Oct 24 10:52:15 2013 -0700
+++ b/jdk/test/java/util/Base64/Base64GetEncoderTest.java	Thu Oct 24 11:12:59 2013 -0700
@@ -41,7 +41,7 @@
 public class Base64GetEncoderTest {
 
     public static void main(String args[]) throws Throwable {
-        final Base64.Encoder encoder = Base64.getEncoder(0, "$$$".getBytes(US_ASCII));
+        final Base64.Encoder encoder = Base64.getMimeEncoder(0, "$$$".getBytes(US_ASCII));
 
         testEncodeToString(encoder);
 
--- a/jdk/test/java/util/Base64/TestBase64.java	Thu Oct 24 10:52:15 2013 -0700
+++ b/jdk/test/java/util/Base64/TestBase64.java	Thu Oct 24 11:12:59 2013 -0700
@@ -23,7 +23,7 @@
 
 /**
  * @test 4235519 8004212 8005394 8007298 8006295 8006315 8006530 8007379 8008925
- *       8014217
+ *       8014217 8025003
  * @summary tests java.util.Base64
  */
 
@@ -60,13 +60,13 @@
         byte[] nl_3 = new byte[] {'\n', '\r', '\n'};
         for (int i = 0; i < 10; i++) {
             int len = rnd.nextInt(200) + 4;
-            test(Base64.getEncoder(len, nl_1),
+            test(Base64.getMimeEncoder(len, nl_1),
                  Base64.getMimeDecoder(),
                  numRuns, numBytes);
-            test(Base64.getEncoder(len, nl_2),
+            test(Base64.getMimeEncoder(len, nl_2),
                  Base64.getMimeDecoder(),
                  numRuns, numBytes);
-            test(Base64.getEncoder(len, nl_3),
+            test(Base64.getMimeEncoder(len, nl_3),
                  Base64.getMimeDecoder(),
                  numRuns, numBytes);
         }
@@ -74,16 +74,16 @@
         testNull(Base64.getEncoder());
         testNull(Base64.getUrlEncoder());
         testNull(Base64.getMimeEncoder());
-        testNull(Base64.getEncoder(10, new byte[]{'\n'}));
+        testNull(Base64.getMimeEncoder(10, new byte[]{'\n'}));
         testNull(Base64.getDecoder());
         testNull(Base64.getUrlDecoder());
         testNull(Base64.getMimeDecoder());
-        checkNull(new Runnable() { public void run() { Base64.getEncoder(10, null); }});
+        checkNull(new Runnable() { public void run() { Base64.getMimeEncoder(10, null); }});
 
         testIOE(Base64.getEncoder());
         testIOE(Base64.getUrlEncoder());
         testIOE(Base64.getMimeEncoder());
-        testIOE(Base64.getEncoder(10, new byte[]{'\n'}));
+        testIOE(Base64.getMimeEncoder(10, new byte[]{'\n'}));
 
         byte[] src = new byte[1024];
         new Random().nextBytes(src);
@@ -93,7 +93,7 @@
         testIOE(Base64.getUrlDecoder(), Base64.getUrlEncoder().encode(src));
 
         // illegal line separator
-        checkIAE(new Runnable() { public void run() { Base64.getEncoder(10, new byte[]{'\r', 'N'}); }});
+        checkIAE(new Runnable() { public void run() { Base64.getMimeEncoder(10, new byte[]{'\r', 'N'}); }});
 
         // illegal base64 character
         decoded[2] = (byte)0xe0;
@@ -109,8 +109,6 @@
             Base64.getDecoder().decode(ByteBuffer.wrap(decoded), ByteBuffer.allocateDirect(1024)); }});
 
         // illegal ending unit
-        checkIAE(new Runnable() { public void run() { Base64.getMimeDecoder().decode("$=#"); }});
-
         checkIOE(new Testable() { public void test() throws IOException {
                                      byte[] bytes = "AA=".getBytes("ASCII");
                                      try (InputStream stream =
@@ -129,6 +127,10 @@
         testDecodeUnpadded();
         // test mime decoding with ignored character after padding
         testDecodeIgnoredAfterPadding();
+
+        // lenient mode for ending unit
+        testLenientPadding();
+
     }
 
     private static sun.misc.BASE64Encoder sunmisc = new sun.misc.BASE64Encoder();
@@ -436,6 +438,48 @@
         }
     }
 
+    private static void testLenientPadding() throws Throwable {
+        String[] data = new String[] {
+            "=",         "",        // unnecessary padding
+            "QUJD=",     "ABC",     //"ABC".encode() -> "QUJD"
+
+            "QQ=",       "A",       // incomplete padding
+            "QQ=N",      "A",       // incorrect padding
+            "QQ=?",      "A",
+            "QUJDQQ=",   "ABCA",
+            "QUJDQQ=N",  "ABCA",
+            "QUJDQQ=?",  "ABCA",
+
+            "QUI=X",     "AB",      // incorrect padding
+            "QUI=?",     "AB",      // incorrect padding
+        };
+        Base64.Decoder dec = Base64.getMimeDecoder();
+
+        for (int i = 0; i < data.length; i += 2) {
+            byte[] src = data[i].getBytes("ASCII");
+            byte[] expected = data[i + 1].getBytes("ASCII");
+            // decode(byte[])
+            byte[] ret = dec.decode(src);
+            checkEqual(ret, expected, "lenient padding decoding failed!");
+
+            // decode(String)
+            ret = dec.decode(data[i]);
+            checkEqual(ret, expected, "lenient padding decoding failed!");
+
+            // decode(ByteBuffer)
+            ByteBuffer srcBB = ByteBuffer.wrap(src);
+            ByteBuffer retBB = dec.decode(srcBB);
+            checkEqual(srcBB.remaining(), 0, "lenient padding decoding failed!");
+            checkEqual(Arrays.copyOf(retBB.array(), retBB.remaining()),
+                       expected, "lenient padding decoding failed!");
+
+            // wrap.decode(byte[])
+            ret = new byte[10];
+            int n = dec.wrap(new ByteArrayInputStream(src)).read(ret);
+            checkEqual(Arrays.copyOf(ret, n), expected, "lenient padding decoding failed!");
+        }
+    }
+
     private static void  testDecodeUnpadded() throws Throwable {
         byte[] srcA = new byte[] { 'Q', 'Q' };
         byte[] srcAA = new byte[] { 'Q', 'Q', 'E'};
--- a/jdk/test/java/util/Base64/TestBase64Golden.java	Thu Oct 24 10:52:15 2013 -0700
+++ b/jdk/test/java/util/Base64/TestBase64Golden.java	Thu Oct 24 11:12:59 2013 -0700
@@ -230,7 +230,7 @@
             46, -97, -35, -44, 127, -60, -39, -4, -112, 34, -57, 47, -14, 67,
             40, 18, 90, -59, 68, 112, 23, 121, -91, 94, 35, 49, 104, 17, 30,
             -80, -104, -3, -53, 27, 38, -72, -47, 113, -52, 18, 5, -126 };
-        Encoder encoder = Base64.getEncoder(49, new byte[] { 0x7e });
+        Encoder encoder = Base64.getMimeEncoder(49, new byte[] { 0x7e });
         byte[] encoded = encoder.encode(src);
         Decoder decoder = Base64.getMimeDecoder();
         byte[] decoded = decoder.decode(encoded);