4691425: GZIPInputStream fails to read concatenated .gz files
authorsherman
Mon, 24 May 2010 00:39:57 -0400
changeset 5618 d17b52843430
parent 5617 1b0d8c3d6223
child 5619 88a384ff7110
child 5620 ed56433bf5d6
4691425: GZIPInputStream fails to read concatenated .gz files Summary: to support concatenated .gz streams Reviewed-by: martin
jdk/src/share/classes/java/util/zip/GZIPInputStream.java
jdk/test/java/util/zip/GZIP/GZIPInputStreamRead.java
--- a/jdk/src/share/classes/java/util/zip/GZIPInputStream.java	Mon May 24 10:05:04 2010 +0800
+++ b/jdk/src/share/classes/java/util/zip/GZIPInputStream.java	Mon May 24 00:39:57 2010 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright 1996-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1996-2010 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -75,8 +75,7 @@
     public GZIPInputStream(InputStream in, int size) throws IOException {
         super(in, new Inflater(true), size);
         usesDefaultInflater = true;
-        readHeader();
-        crc.reset();
+        readHeader(in);
     }
 
     /**
@@ -114,14 +113,16 @@
         if (eos) {
             return -1;
         }
-        len = super.read(buf, off, len);
-        if (len == -1) {
-            readTrailer();
-            eos = true;
+        int n = super.read(buf, off, len);
+        if (n == -1) {
+            if (readTrailer())
+                eos = true;
+            else
+                return this.read(buf, off, len);
         } else {
-            crc.update(buf, off, len);
+            crc.update(buf, off, n);
         }
-        return len;
+        return n;
     }
 
     /**
@@ -152,10 +153,11 @@
     private final static int FCOMMENT   = 16;   // File comment
 
     /*
-     * Reads GZIP member header.
+     * Reads GZIP member header and returns the total byte number
+     * of this member header.
      */
-    private void readHeader() throws IOException {
-        CheckedInputStream in = new CheckedInputStream(this.in, crc);
+    private int readHeader(InputStream this_in) throws IOException {
+        CheckedInputStream in = new CheckedInputStream(this_in, crc);
         crc.reset();
         // Check header magic
         if (readUShort(in) != GZIP_MAGIC) {
@@ -169,17 +171,24 @@
         int flg = readUByte(in);
         // Skip MTIME, XFL, and OS fields
         skipBytes(in, 6);
+        int n = 2 + 2 + 6;
         // Skip optional extra field
         if ((flg & FEXTRA) == FEXTRA) {
-            skipBytes(in, readUShort(in));
+            int m = readUShort(in);
+            skipBytes(in, m);
+            n += m + 2;
         }
         // Skip optional file name
         if ((flg & FNAME) == FNAME) {
-            while (readUByte(in) != 0) ;
+            do {
+                n++;
+            } while (readUByte(in) != 0);
         }
         // Skip optional file comment
         if ((flg & FCOMMENT) == FCOMMENT) {
-            while (readUByte(in) != 0) ;
+            do {
+                n++;
+            } while (readUByte(in) != 0);
         }
         // Check optional header CRC
         if ((flg & FHCRC) == FHCRC) {
@@ -187,13 +196,18 @@
             if (readUShort(in) != v) {
                 throw new ZipException("Corrupt GZIP header");
             }
+            n += 2;
         }
+        crc.reset();
+        return n;
     }
 
     /*
-     * Reads GZIP member trailer.
+     * Reads GZIP member trailer and returns true if the eos
+     * reached, false if there are more (concatenated gzip
+     * data set)
      */
-    private void readTrailer() throws IOException {
+    private boolean readTrailer() throws IOException {
         InputStream in = this.in;
         int n = inf.getRemaining();
         if (n > 0) {
@@ -205,6 +219,24 @@
             // rfc1952; ISIZE is the input size modulo 2^32
             (readUInt(in) != (inf.getBytesWritten() & 0xffffffffL)))
             throw new ZipException("Corrupt GZIP trailer");
+
+        // If there are more bytes available in "in" or
+        // the leftover in the "inf" is > 26 bytes:
+        // this.trailer(8) + next.header.min(10) + next.trailer(8)
+        // try concatenated case
+        if (this.in.available() > 0 || n > 26) {
+            int m = 8;                  // this.trailer
+            try {
+                m += readHeader(in);    // next.header
+            } catch (IOException ze) {
+                return true;  // ignore any malformed, do nothing
+            }
+            inf.reset();
+            if (n > m)
+                inf.setInput(buf, len - n + m, n - m);
+            return false;
+        }
+        return true;
     }
 
     /*
@@ -239,7 +271,6 @@
         return b;
     }
 
-
     private byte[] tmpbuf = new byte[128];
 
     /*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/util/zip/GZIP/GZIPInputStreamRead.java	Mon May 24 00:39:57 2010 -0400
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/* @test
+ * @bug 4691425
+ * @summary Test the read and write of GZIPInput/OutputStream, including
+ *          concatenated .gz inputstream
+ */
+
+import java.io.*;
+import java.util.*;
+import java.util.zip.*;
+
+public class GZIPInputStreamRead {
+    public static void main(String[] args) throws Throwable {
+        Random rnd = new Random();
+        for (int i = 1; i < 100; i++) {
+            int members = rnd.nextInt(10) + 1;
+
+            ByteArrayOutputStream srcBAOS = new ByteArrayOutputStream();
+            ByteArrayOutputStream dstBAOS = new ByteArrayOutputStream();
+            for (int j = 0; j < members; j++) {
+                byte[] src = new byte[rnd.nextInt(8192) + 1];
+                rnd.nextBytes(src);
+                srcBAOS.write(src);
+
+                GZIPOutputStream gzos = new GZIPOutputStream(dstBAOS);
+                gzos.write(src);
+                gzos.close();
+            }
+            byte[] srcBytes = srcBAOS.toByteArray();
+            byte[] dstBytes = dstBAOS.toByteArray();
+            // try different size of buffer to read the
+            // GZIPInputStream
+            /* just for fun when running manually
+            for (int j = 1; j < 10; j++) {
+                test(srcBytes, dstBytes, j);
+            }
+            */
+            for (int j = 0; j < 10; j++) {
+                int readBufSZ = rnd.nextInt(2048) + 1;
+                test(srcBytes,
+                     dstBytes,
+                     readBufSZ,
+                     512);    // the defualt buffer size
+                test(srcBytes,
+                     dstBytes,
+                     readBufSZ,
+                     rnd.nextInt(4096) + 1);
+            }
+        }
+    }
+
+    private static void test(byte[] src, byte[] dst,
+                             int readBufSize, int gzisBufSize)
+        throws Throwable
+    {
+        GZIPInputStream gzis = new GZIPInputStream(
+                                   new ByteArrayInputStream(dst),
+                                   gzisBufSize);
+        byte[] result = new byte[src.length + 10];
+        byte[] buf = new byte[readBufSize];
+        int n = 0;
+        int off = 0;
+
+        while ((n = gzis.read(buf, 0, buf.length)) != -1) {
+            System.arraycopy(buf, 0, result, off, n);
+            off += n;
+            // no range check, if overflow, let it fail
+        }
+        if (off != src.length || gzis.available() != 0 ||
+            !Arrays.equals(src, Arrays.copyOf(result, off))) {
+            throw new RuntimeException(
+                "GZIPInputStream reading failed! " +
+                ", src.len=" + src.length +
+                ", read=" + off);
+        }
+        gzis.close();
+    }
+}