8219196: DataOutputStream.writeUTF may throw unexpected exceptions
authorbpb
Wed, 27 Mar 2019 07:21:18 -0700
changeset 54309 c4c225b49c5f
parent 54308 1dcacbe612ae
child 54310 646c54d5989b
8219196: DataOutputStream.writeUTF may throw unexpected exceptions Reviewed-by: martin, darcy, rriggs Contributed-by: Martin Buchholz <martinrb@google.com>
src/java.base/share/classes/java/io/DataOutputStream.java
test/jdk/java/io/DataOutputStream/WriteUTF.java
--- a/src/java.base/share/classes/java/io/DataOutputStream.java	Wed Mar 27 09:29:34 2019 -0400
+++ b/src/java.base/share/classes/java/io/DataOutputStream.java	Wed Mar 27 07:21:18 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1994, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -317,7 +317,10 @@
      * thrice the length of <code>str</code>.
      *
      * @param      str   a string to be written.
-     * @exception  IOException  if an I/O error occurs.
+     * @throws     UTFDataFormatException  if the modified UTF-8 encoding of
+     *             {@code str} would exceed 65535 bytes in length
+     * @throws     IOException  if some other I/O error occurs.
+     * @see        #writeChars(String)
      */
     public final void writeUTF(String str) throws IOException {
         writeUTF(str, this);
@@ -341,55 +344,49 @@
      * @param      str   a string to be written.
      * @param      out   destination to write to
      * @return     The number of bytes written out.
-     * @exception  IOException  if an I/O error occurs.
+     * @throws     UTFDataFormatException  if the modified UTF-8 encoding of
+     *             {@code str} would exceed 65535 bytes in length
+     * @throws     IOException  if some other I/O error occurs.
      */
     static int writeUTF(String str, DataOutput out) throws IOException {
-        int strlen = str.length();
-        int utflen = 0;
-        int c, count = 0;
+        final int strlen = str.length();
+        int utflen = strlen; // optimized for ASCII
 
-        /* use charAt instead of copying String to char array */
         for (int i = 0; i < strlen; i++) {
-            c = str.charAt(i);
-            if ((c >= 0x0001) && (c <= 0x007F)) {
-                utflen++;
-            } else if (c > 0x07FF) {
-                utflen += 3;
-            } else {
-                utflen += 2;
-            }
+            int c = str.charAt(i);
+            if (c >= 0x80 || c == 0)
+                utflen += (c >= 0x800) ? 2 : 1;
         }
 
-        if (utflen > 65535)
-            throw new UTFDataFormatException(
-                "encoded string too long: " + utflen + " bytes");
+        if (utflen > 65535 || /* overflow */ utflen < strlen)
+            throw new UTFDataFormatException(tooLongMsg(str, utflen));
 
-        byte[] bytearr = null;
+        final byte[] bytearr;
         if (out instanceof DataOutputStream) {
             DataOutputStream dos = (DataOutputStream)out;
-            if(dos.bytearr == null || (dos.bytearr.length < (utflen+2)))
+            if (dos.bytearr == null || (dos.bytearr.length < (utflen + 2)))
                 dos.bytearr = new byte[(utflen*2) + 2];
             bytearr = dos.bytearr;
         } else {
-            bytearr = new byte[utflen+2];
+            bytearr = new byte[utflen + 2];
         }
 
+        int count = 0;
         bytearr[count++] = (byte) ((utflen >>> 8) & 0xFF);
         bytearr[count++] = (byte) ((utflen >>> 0) & 0xFF);
 
-        int i=0;
-        for (i=0; i<strlen; i++) {
-           c = str.charAt(i);
-           if (!((c >= 0x0001) && (c <= 0x007F))) break;
-           bytearr[count++] = (byte) c;
+        int i = 0;
+        for (i = 0; i < strlen; i++) { // optimized for initial run of ASCII
+            int c = str.charAt(i);
+            if (c >= 0x80 || c == 0) break;
+            bytearr[count++] = (byte) c;
         }
 
-        for (;i < strlen; i++){
-            c = str.charAt(i);
-            if ((c >= 0x0001) && (c <= 0x007F)) {
+        for (; i < strlen; i++) {
+            int c = str.charAt(i);
+            if (c < 0x80 && c != 0) {
                 bytearr[count++] = (byte) c;
-
-            } else if (c > 0x07FF) {
+            } else if (c >= 0x800) {
                 bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
                 bytearr[count++] = (byte) (0x80 | ((c >>  6) & 0x3F));
                 bytearr[count++] = (byte) (0x80 | ((c >>  0) & 0x3F));
@@ -398,10 +395,20 @@
                 bytearr[count++] = (byte) (0x80 | ((c >>  0) & 0x3F));
             }
         }
-        out.write(bytearr, 0, utflen+2);
+        out.write(bytearr, 0, utflen + 2);
         return utflen + 2;
     }
 
+    private static String tooLongMsg(String s, int bits32) {
+        int slen = s.length();
+        String head = s.substring(0, 8);
+        String tail = s.substring(slen - 8, slen);
+        // handle int overflow with max 3x expansion
+        long actualLength = (long)slen + Integer.toUnsignedLong(bits32 - slen);
+        return "encoded string (" + head + "..." + tail + ") too long: "
+            + actualLength + " bytes";
+    }
+
     /**
      * Returns the current value of the counter <code>written</code>,
      * the number of bytes written to this data output stream so far.
--- a/test/jdk/java/io/DataOutputStream/WriteUTF.java	Wed Mar 27 09:29:34 2019 -0400
+++ b/test/jdk/java/io/DataOutputStream/WriteUTF.java	Wed Mar 27 07:21:18 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,14 +22,21 @@
  */
 
 /* @test
-   @bug 4260284
-   @summary Test if DataOutputStream will overcount written field.
-*/
+ * @bug 4260284 8219196
+ * @summary Test if DataOutputStream will overcount written field.
+ * @run testng/othervm -Xmx2g WriteUTF
+ */
 
-import java.io.*;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.UTFDataFormatException;
+
+import org.testng.annotations.Test;
 
 public class WriteUTF {
-    public static void main(String[] args) throws Exception {
+    @Test
+    public static void overcountWrittenField() throws IOException {
         ByteArrayOutputStream baos = new ByteArrayOutputStream();
         DataOutputStream dos = new DataOutputStream(baos);
         dos.writeUTF("Hello, World!");  // 15
@@ -37,4 +44,25 @@
         if  (baos.size() != dos.size())
             throw new RuntimeException("Miscounted bytes in DataOutputStream.");
     }
+
+    private static void writeUTF(int size) throws IOException {
+        // this character gives 3 bytes when encoded using UTF-8
+        String s = "\u0800".repeat(size);
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        DataOutputStream dos = new DataOutputStream(bos);
+        dos.writeUTF(s);
+    }
+
+    @Test(expectedExceptions = UTFDataFormatException.class)
+    public void utfDataFormatException() throws IOException {
+        writeUTF(1 << 16);
+    }
+
+    // Without 8219196 fix, throws ArrayIndexOutOfBoundsException instead of
+    // expected UTFDataFormatException. Requires 2GB of heap (-Xmx2g) to run
+    // without throwing an OutOfMemoryError.
+    @Test(expectedExceptions = UTFDataFormatException.class)
+    public void arrayIndexOutOfBoundsException() throws IOException {
+        writeUTF(Integer.MAX_VALUE / 3 + 1);
+    }
 }