8159638: Improve array caches and renderer stats in Marlin renderer
authorlbourges
Wed, 03 Aug 2016 22:53:38 +0200
changeset 40421 d5ee65e2b0fb
parent 40173 668a4cd8de97
child 40422 e7e8293ac07e
8159638: Improve array caches and renderer stats in Marlin renderer Summary: Byte/Float/Int ArrayCache (Reference) refactoring + improved cache statistics and tuning settings Reviewed-by: flar, prr
jdk/src/java.desktop/share/classes/sun/java2d/marlin/ArrayCache.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/ArrayCacheConst.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/ByteArrayCache.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/Dasher.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/FloatArrayCache.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/IntArrayCache.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinCache.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinConst.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinProperties.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinRenderingEngine.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/Renderer.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/RendererContext.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/RendererStats.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/Stroker.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/Version.java
jdk/test/sun/java2d/marlin/ArrayCacheSizeTest.java
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/ArrayCache.java	Wed Aug 03 14:49:01 2016 +0530
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package sun.java2d.marlin;
-
-import java.util.Arrays;
-import static sun.java2d.marlin.MarlinUtils.logInfo;
-
-public final class ArrayCache implements MarlinConst {
-
-    static final int BUCKETS = 4;
-    static final int MIN_ARRAY_SIZE = 4096;
-    static final int MAX_ARRAY_SIZE;
-    static final int MASK_CLR_1 = ~1;
-    // threshold to grow arrays only by (3/2) instead of 2
-    static final int THRESHOLD_ARRAY_SIZE;
-    static final int[] ARRAY_SIZES = new int[BUCKETS];
-    // dirty byte array sizes
-    static final int MIN_DIRTY_BYTE_ARRAY_SIZE = 32 * 2048; // 32px x 2048px
-    static final int MAX_DIRTY_BYTE_ARRAY_SIZE;
-    static final int[] DIRTY_BYTE_ARRAY_SIZES = new int[BUCKETS];
-    // large array thresholds:
-    static final long THRESHOLD_LARGE_ARRAY_SIZE;
-    static final long THRESHOLD_HUGE_ARRAY_SIZE;
-    // stats
-    private static int resizeInt = 0;
-    private static int resizeDirtyInt = 0;
-    private static int resizeDirtyFloat = 0;
-    private static int resizeDirtyByte = 0;
-    private static int oversize = 0;
-
-    static {
-        // initialize buckets for int/float arrays
-        int arraySize = MIN_ARRAY_SIZE;
-
-        for (int i = 0; i < BUCKETS; i++, arraySize <<= 2) {
-            ARRAY_SIZES[i] = arraySize;
-
-            if (DO_TRACE) {
-                logInfo("arraySize[" + i + "]: " + arraySize);
-            }
-        }
-        MAX_ARRAY_SIZE = arraySize >> 2;
-
-        /* initialize buckets for dirty byte arrays
-         (large AA chunk = 32 x 2048 pixels) */
-        arraySize = MIN_DIRTY_BYTE_ARRAY_SIZE;
-
-        for (int i = 0; i < BUCKETS; i++, arraySize <<= 1) {
-            DIRTY_BYTE_ARRAY_SIZES[i] = arraySize;
-
-            if (DO_TRACE) {
-                logInfo("dirty arraySize[" + i + "]: " + arraySize);
-            }
-        }
-        MAX_DIRTY_BYTE_ARRAY_SIZE = arraySize >> 1;
-
-        // threshold to grow arrays only by (3/2) instead of 2
-        THRESHOLD_ARRAY_SIZE = Math.max(2 * 1024 * 1024, MAX_ARRAY_SIZE); // 2M
-
-        THRESHOLD_LARGE_ARRAY_SIZE = 8L * THRESHOLD_ARRAY_SIZE; // 16M
-        THRESHOLD_HUGE_ARRAY_SIZE  = 8L * THRESHOLD_LARGE_ARRAY_SIZE; // 128M
-
-        if (DO_STATS || DO_MONITORS) {
-            logInfo("ArrayCache.BUCKETS        = " + BUCKETS);
-            logInfo("ArrayCache.MIN_ARRAY_SIZE = " + MIN_ARRAY_SIZE);
-            logInfo("ArrayCache.MAX_ARRAY_SIZE = " + MAX_ARRAY_SIZE);
-            logInfo("ArrayCache.ARRAY_SIZES = "
-                    + Arrays.toString(ARRAY_SIZES));
-            logInfo("ArrayCache.MIN_DIRTY_BYTE_ARRAY_SIZE = "
-                    + MIN_DIRTY_BYTE_ARRAY_SIZE);
-            logInfo("ArrayCache.MAX_DIRTY_BYTE_ARRAY_SIZE = "
-                    + MAX_DIRTY_BYTE_ARRAY_SIZE);
-            logInfo("ArrayCache.ARRAY_SIZES = "
-                    + Arrays.toString(DIRTY_BYTE_ARRAY_SIZES));
-            logInfo("ArrayCache.THRESHOLD_ARRAY_SIZE = "
-                    + THRESHOLD_ARRAY_SIZE);
-            logInfo("ArrayCache.THRESHOLD_LARGE_ARRAY_SIZE = "
-                    + THRESHOLD_LARGE_ARRAY_SIZE);
-            logInfo("ArrayCache.THRESHOLD_HUGE_ARRAY_SIZE = "
-                    + THRESHOLD_HUGE_ARRAY_SIZE);
-        }
-    }
-
-    private ArrayCache() {
-        // Utility class
-    }
-
-    static synchronized void incResizeInt() {
-        resizeInt++;
-    }
-
-    static synchronized void incResizeDirtyInt() {
-        resizeDirtyInt++;
-    }
-
-    static synchronized void incResizeDirtyFloat() {
-        resizeDirtyFloat++;
-    }
-
-    static synchronized void incResizeDirtyByte() {
-        resizeDirtyByte++;
-    }
-
-    static synchronized void incOversize() {
-        oversize++;
-    }
-
-    static void dumpStats() {
-        if (resizeInt != 0 || resizeDirtyInt != 0 || resizeDirtyFloat != 0
-                || resizeDirtyByte != 0 || oversize != 0) {
-            logInfo("ArrayCache: int resize: " + resizeInt
-                    + " - dirty int resize: " + resizeDirtyInt
-                    + " - dirty float resize: " + resizeDirtyFloat
-                    + " - dirty byte resize: " + resizeDirtyByte
-                    + " - oversize: " + oversize);
-        }
-    }
-
-    // small methods used a lot (to be inlined / optimized by hotspot)
-
-    static int getBucket(final int length) {
-        for (int i = 0; i < ARRAY_SIZES.length; i++) {
-            if (length <= ARRAY_SIZES[i]) {
-                return i;
-            }
-        }
-        return -1;
-    }
-
-    static int getBucketDirtyBytes(final int length) {
-        for (int i = 0; i < DIRTY_BYTE_ARRAY_SIZES.length; i++) {
-            if (length <= DIRTY_BYTE_ARRAY_SIZES[i]) {
-                return i;
-            }
-        }
-        return -1;
-    }
-
-    /**
-     * Return the new array size (~ x2)
-     * @param curSize current used size
-     * @param needSize needed size
-     * @return new array size
-     */
-    public static int getNewSize(final int curSize, final int needSize) {
-        // check if needSize is negative or integer overflow:
-        if (needSize < 0) {
-            // hard overflow failure - we can't even accommodate
-            // new items without overflowing
-            throw new ArrayIndexOutOfBoundsException(
-                          "array exceeds maximum capacity !");
-        }
-        assert curSize >= 0;
-        final int initial = (curSize & MASK_CLR_1);
-        int size;
-        if (initial > THRESHOLD_ARRAY_SIZE) {
-            size = initial + (initial >> 1); // x(3/2)
-        } else {
-            size = (initial << 1); // x2
-        }
-        // ensure the new size is >= needed size:
-        if (size < needSize) {
-            // align to 4096 (may overflow):
-            size = ((needSize >> 12) + 1) << 12;
-        }
-        // check integer overflow:
-        if (size < 0) {
-            // resize to maximum capacity:
-            size = Integer.MAX_VALUE;
-        }
-        return size;
-    }
-
-    /**
-     * Return the new array size (~ x2)
-     * @param curSize current used size
-     * @param needSize needed size
-     * @return new array size
-     */
-    public static long getNewLargeSize(final long curSize, final long needSize) {
-        // check if needSize is negative or integer overflow:
-        if ((needSize >> 31L) != 0L) {
-            // hard overflow failure - we can't even accommodate
-            // new items without overflowing
-            throw new ArrayIndexOutOfBoundsException(
-                          "array exceeds maximum capacity !");
-        }
-        assert curSize >= 0L;
-        long size;
-        if (curSize > THRESHOLD_HUGE_ARRAY_SIZE) {
-            size = curSize + (curSize >> 2L); // x(5/4)
-        } else  if (curSize > THRESHOLD_LARGE_ARRAY_SIZE) {
-            size = curSize + (curSize >> 1L); // x(3/2)
-        } else {
-            size = (curSize << 1L); // x2
-        }
-        // ensure the new size is >= needed size:
-        if (size < needSize) {
-            // align to 4096:
-            size = ((needSize >> 12L) + 1L) << 12L;
-        }
-        // check integer overflow:
-        if (size > Integer.MAX_VALUE) {
-            // resize to maximum capacity:
-            size = Integer.MAX_VALUE;
-        }
-        return size;
-    }
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/ArrayCacheConst.java	Wed Aug 03 22:53:38 2016 +0200
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.util.Arrays;
+import static sun.java2d.marlin.MarlinUtils.logInfo;
+
+public final class ArrayCacheConst implements MarlinConst {
+
+    static final int BUCKETS = 8;
+    static final int MIN_ARRAY_SIZE = 4096;
+    // maximum array size
+    static final int MAX_ARRAY_SIZE;
+    // threshold below to grow arrays by 4
+    static final int THRESHOLD_SMALL_ARRAY_SIZE = 4 * 1024 * 1024;
+    // threshold to grow arrays only by (3/2) instead of 2
+    static final int THRESHOLD_ARRAY_SIZE;
+    // threshold to grow arrays only by (5/4) instead of (3/2)
+    static final long THRESHOLD_HUGE_ARRAY_SIZE;
+    static final int[] ARRAY_SIZES = new int[BUCKETS];
+
+    static {
+        // initialize buckets for int/float arrays
+        int arraySize = MIN_ARRAY_SIZE;
+
+        int inc_lg = 2; // x4
+
+        for (int i = 0; i < BUCKETS; i++, arraySize <<= inc_lg) {
+            ARRAY_SIZES[i] = arraySize;
+
+            if (DO_TRACE) {
+                logInfo("arraySize[" + i + "]: " + arraySize);
+            }
+
+            if (arraySize >= THRESHOLD_SMALL_ARRAY_SIZE) {
+                inc_lg = 1; // x2
+            }
+        }
+        MAX_ARRAY_SIZE = arraySize >> inc_lg;
+
+        if (MAX_ARRAY_SIZE <= 0) {
+            throw new IllegalStateException("Invalid max array size !");
+        }
+
+        THRESHOLD_ARRAY_SIZE       =  16  * 1024 * 1024; // >16M
+        THRESHOLD_HUGE_ARRAY_SIZE  =  48L * 1024 * 1024; // >48M
+
+        if (DO_STATS || DO_MONITORS) {
+            logInfo("ArrayCache.BUCKETS        = " + BUCKETS);
+            logInfo("ArrayCache.MIN_ARRAY_SIZE = " + MIN_ARRAY_SIZE);
+            logInfo("ArrayCache.MAX_ARRAY_SIZE = " + MAX_ARRAY_SIZE);
+            logInfo("ArrayCache.ARRAY_SIZES = "
+                    + Arrays.toString(ARRAY_SIZES));
+            logInfo("ArrayCache.THRESHOLD_ARRAY_SIZE = "
+                    + THRESHOLD_ARRAY_SIZE);
+            logInfo("ArrayCache.THRESHOLD_HUGE_ARRAY_SIZE = "
+                    + THRESHOLD_HUGE_ARRAY_SIZE);
+        }
+    }
+
+    private ArrayCacheConst() {
+        // Utility class
+    }
+
+    // small methods used a lot (to be inlined / optimized by hotspot)
+
+    static int getBucket(final int length) {
+        for (int i = 0; i < ARRAY_SIZES.length; i++) {
+            if (length <= ARRAY_SIZES[i]) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    /**
+     * Return the new array size (~ x2)
+     * @param curSize current used size
+     * @param needSize needed size
+     * @return new array size
+     */
+    public static int getNewSize(final int curSize, final int needSize) {
+        // check if needSize is negative or integer overflow:
+        if (needSize < 0) {
+            // hard overflow failure - we can't even accommodate
+            // new items without overflowing
+            throw new ArrayIndexOutOfBoundsException(
+                          "array exceeds maximum capacity !");
+        }
+        assert curSize >= 0;
+        final int initial = curSize;
+        int size;
+        if (initial > THRESHOLD_ARRAY_SIZE) {
+            size = initial + (initial >> 1); // x(3/2)
+        } else {
+            size = (initial << 1); // x2
+        }
+        // ensure the new size is >= needed size:
+        if (size < needSize) {
+            // align to 4096 (may overflow):
+            size = ((needSize >> 12) + 1) << 12;
+        }
+        // check integer overflow:
+        if (size < 0) {
+            // resize to maximum capacity:
+            size = Integer.MAX_VALUE;
+        }
+        return size;
+    }
+
+    /**
+     * Return the new array size (~ x2)
+     * @param curSize current used size
+     * @param needSize needed size
+     * @return new array size
+     */
+    public static long getNewLargeSize(final long curSize, final long needSize) {
+        // check if needSize is negative or integer overflow:
+        if ((needSize >> 31L) != 0L) {
+            // hard overflow failure - we can't even accommodate
+            // new items without overflowing
+            throw new ArrayIndexOutOfBoundsException(
+                          "array exceeds maximum capacity !");
+        }
+        assert curSize >= 0L;
+        long size;
+        if (curSize > THRESHOLD_HUGE_ARRAY_SIZE) {
+            size = curSize + (curSize >> 2L); // x(5/4)
+        } else if (curSize > THRESHOLD_ARRAY_SIZE) {
+            size = curSize + (curSize >> 1L); // x(3/2)
+        } else if (curSize > THRESHOLD_SMALL_ARRAY_SIZE) {
+            size = (curSize << 1L); // x2
+        } else {
+            size = (curSize << 2L); // x4
+        }
+        // ensure the new size is >= needed size:
+        if (size < needSize) {
+            // align to 4096:
+            size = ((needSize >> 12L) + 1L) << 12L;
+        }
+        // check integer overflow:
+        if (size > Integer.MAX_VALUE) {
+            // resize to maximum capacity:
+            size = Integer.MAX_VALUE;
+        }
+        return size;
+    }
+
+    static final class CacheStats {
+        final String name;
+        final BucketStats[] bucketStats;
+        int resize = 0;
+        int oversize = 0;
+        long totalInitial = 0L;
+
+        CacheStats(final String name) {
+            this.name = name;
+
+            bucketStats = new BucketStats[BUCKETS];
+            for (int i = 0; i < BUCKETS; i++) {
+                bucketStats[i] = new BucketStats();
+            }
+        }
+
+        void reset() {
+            resize = 0;
+            oversize = 0;
+
+            for (int i = 0; i < BUCKETS; i++) {
+                bucketStats[i].reset();
+            }
+        }
+
+        long dumpStats() {
+            long totalCacheBytes = 0L;
+
+            if (DO_STATS) {
+                for (int i = 0; i < BUCKETS; i++) {
+                    final BucketStats s = bucketStats[i];
+
+                    if (s.maxSize != 0) {
+                        totalCacheBytes += getByteFactor()
+                                           * (s.maxSize * ARRAY_SIZES[i]);
+                    }
+                }
+
+                if (totalInitial != 0L || totalCacheBytes != 0L
+                    || resize != 0 || oversize != 0)
+                {
+                    logInfo(name + ": resize: " + resize
+                            + " - oversize: " + oversize
+                            + " - initial: " + getTotalInitialBytes()
+                            + " bytes (" + totalInitial + " elements)"
+                            + " - cache: " + totalCacheBytes + " bytes"
+                    );
+                }
+
+                if (totalCacheBytes != 0L) {
+                    logInfo(name + ": usage stats:");
+
+                    for (int i = 0; i < BUCKETS; i++) {
+                        final BucketStats s = bucketStats[i];
+
+                        if (s.getOp != 0) {
+                            logInfo("  Bucket[" + ARRAY_SIZES[i] + "]: "
+                                    + "get: " + s.getOp
+                                    + " - put: " + s.returnOp
+                                    + " - create: " + s.createOp
+                                    + " :: max size: " + s.maxSize
+                            );
+                        }
+                    }
+                }
+            }
+            return totalCacheBytes;
+        }
+
+        private int getByteFactor() {
+            int factor = 1;
+            if (name.contains("Int") || name.contains("Float")) {
+                factor = 4;
+            }
+            return factor;
+        }
+
+        long getTotalInitialBytes() {
+            return getByteFactor() * totalInitial;
+        }
+    }
+
+    static final class BucketStats {
+        int getOp = 0;
+        int createOp = 0;
+        int returnOp = 0;
+        int maxSize = 0;
+
+        void reset() {
+            getOp = 0;
+            createOp = 0;
+            returnOp = 0;
+            maxSize = 0;
+        }
+
+        void updateMaxSize(final int size) {
+            if (size > maxSize) {
+                maxSize = size;
+            }
+        }
+    }
+}
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/ByteArrayCache.java	Wed Aug 03 14:49:01 2016 +0530
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/ByteArrayCache.java	Wed Aug 03 22:53:38 2016 +0200
@@ -22,109 +22,224 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
-
 package sun.java2d.marlin;
 
-import java.util.ArrayDeque;
+import static sun.java2d.marlin.ArrayCacheConst.ARRAY_SIZES;
+import static sun.java2d.marlin.ArrayCacheConst.BUCKETS;
+import static sun.java2d.marlin.ArrayCacheConst.MAX_ARRAY_SIZE;
+import static sun.java2d.marlin.MarlinUtils.logInfo;
+import static sun.java2d.marlin.MarlinUtils.logException;
+
+import java.lang.ref.WeakReference;
 import java.util.Arrays;
-import static sun.java2d.marlin.MarlinUtils.logException;
-import static sun.java2d.marlin.MarlinUtils.logInfo;
+
+import sun.java2d.marlin.ArrayCacheConst.BucketStats;
+import sun.java2d.marlin.ArrayCacheConst.CacheStats;
+
+/*
+ * Note that the [BYTE/INT/FLOAT]ArrayCache files are nearly identical except
+ * for a few type and name differences. Typically, the [BYTE]ArrayCache.java file
+ * is edited manually and then [INT]ArrayCache.java and [FLOAT]ArrayCache.java
+ * files are generated with the following command lines:
+ */
+// % sed -e 's/(b\yte)[ ]*//g' -e 's/b\yte/int/g' -e 's/B\yte/Int/g' < B\yteArrayCache.java > IntArrayCache.java
+// % sed -e 's/(b\yte)[ ]*/(float) /g' -e 's/b\yte/float/g' -e 's/B\yte/Float/g' < B\yteArrayCache.java > FloatArrayCache.java
 
 final class ByteArrayCache implements MarlinConst {
 
-    private final int arraySize;
-    private final ArrayDeque<byte[]> byteArrays;
-    // stats
-    private int getOp = 0;
-    private int createOp = 0;
-    private int returnOp = 0;
+    final boolean clean;
+    private final int bucketCapacity;
+    private WeakReference<Bucket[]> refBuckets = null;
+    final CacheStats stats;
+
+    ByteArrayCache(final boolean clean, final int bucketCapacity) {
+        this.clean = clean;
+        this.bucketCapacity = bucketCapacity;
+        this.stats = (DO_STATS) ?
+            new CacheStats(getLogPrefix(clean) + "ByteArrayCache") : null;
+    }
+
+    Bucket getCacheBucket(final int length) {
+        final int bucket = ArrayCacheConst.getBucket(length);
+        return getBuckets()[bucket];
+    }
+
+    private Bucket[] getBuckets() {
+        // resolve reference:
+        Bucket[] buckets = (refBuckets != null) ? refBuckets.get() : null;
+
+        // create a new buckets ?
+        if (buckets == null) {
+            buckets = new Bucket[BUCKETS];
+
+            for (int i = 0; i < BUCKETS; i++) {
+                buckets[i] = new Bucket(clean, ARRAY_SIZES[i], bucketCapacity,
+                        (DO_STATS) ? stats.bucketStats[i] : null);
+            }
+
+            // update weak reference:
+            refBuckets = new WeakReference<Bucket[]>(buckets);
+        }
+        return buckets;
+    }
+
+    Reference createRef(final int initialSize) {
+        return new Reference(this, initialSize);
+    }
+
+    static final class Reference {
+
+        // initial array reference (direct access)
+        final byte[] initial;
+        private final boolean clean;
+        private final ByteArrayCache cache;
+
+        Reference(final ByteArrayCache cache, final int initialSize) {
+            this.cache = cache;
+            this.clean = cache.clean;
+            this.initial = createArray(initialSize, clean);
+            if (DO_STATS) {
+                cache.stats.totalInitial += initialSize;
+            }
+        }
 
-    void dumpStats() {
-        if (getOp > 0) {
-            logInfo("ByteArrayCache[" + arraySize + "]: get: " + getOp
-                    + " created: " + createOp + " - returned: " + returnOp
-                    + " :: cache size: " + byteArrays.size());
+        byte[] getArray(final int length) {
+            if (length <= MAX_ARRAY_SIZE) {
+                return cache.getCacheBucket(length).getArray();
+            }
+            if (DO_STATS) {
+                cache.stats.oversize++;
+            }
+            if (DO_LOG_OVERSIZE) {
+                logInfo(getLogPrefix(clean) + "ByteArrayCache: "
+                        + "getArray[oversize]: length=\t" + length);
+            }
+            return createArray(length, clean);
+        }
+
+        byte[] widenArray(final byte[] array, final int usedSize,
+                          final int needSize)
+        {
+            final int length = array.length;
+            if (DO_CHECKS && length >= needSize) {
+                return array;
+            }
+            if (DO_STATS) {
+                cache.stats.resize++;
+            }
+
+            // maybe change bucket:
+            // ensure getNewSize() > newSize:
+            final byte[] res = getArray(ArrayCacheConst.getNewSize(usedSize, needSize));
+
+            // use wrapper to ensure proper copy:
+            System.arraycopy(array, 0, res, 0, usedSize); // copy only used elements
+
+            // maybe return current array:
+            putArray(array, 0, usedSize); // ensure array is cleared
+
+            if (DO_LOG_WIDEN_ARRAY) {
+                logInfo(getLogPrefix(clean) + "ByteArrayCache: "
+                        + "widenArray[" + res.length
+                        + "]: usedSize=\t" + usedSize + "\tlength=\t" + length
+                        + "\tneeded length=\t" + needSize);
+            }
+            return res;
+        }
+
+        byte[] putArray(final byte[] array)
+        {
+            // dirty array helper:
+            return putArray(array, 0, array.length);
+        }
+
+        byte[] putArray(final byte[] array, final int fromIndex,
+                        final int toIndex)
+        {
+            if (array.length <= MAX_ARRAY_SIZE) {
+                if ((clean || DO_CLEAN_DIRTY) && (toIndex != 0)) {
+                    // clean-up array of dirty part[fromIndex; toIndex[
+                    fill(array, fromIndex, toIndex, (byte)0);
+                }
+                // ensure to never store initial arrays in cache:
+                if (array != initial) {
+                    cache.getCacheBucket(array.length).putArray(array);
+                }
+            }
+            return initial;
         }
     }
 
-    ByteArrayCache(final int arraySize) {
-        this.arraySize = arraySize;
-        // small but enough: almost 1 cache line
-        this.byteArrays = new ArrayDeque<byte[]>(6);
-    }
+    static final class Bucket {
+
+        private int tail = 0;
+        private final int arraySize;
+        private final boolean clean;
+        private final byte[][] arrays;
+        private final BucketStats stats;
 
-    byte[] getArray() {
-        if (DO_STATS) {
-            getOp++;
+        Bucket(final boolean clean, final int arraySize,
+               final int capacity, final BucketStats stats)
+        {
+            this.arraySize = arraySize;
+            this.clean = clean;
+            this.stats = stats;
+            this.arrays = new byte[capacity][];
         }
 
-        // use cache:
-        final byte[] array = byteArrays.pollLast();
-        if (array != null) {
-            return array;
+        byte[] getArray() {
+            if (DO_STATS) {
+                stats.getOp++;
+            }
+            // use cache:
+            if (tail != 0) {
+                final byte[] array = arrays[--tail];
+                arrays[tail] = null;
+                return array;
+            }
+            if (DO_STATS) {
+                stats.createOp++;
+            }
+            return createArray(arraySize, clean);
         }
 
-        if (DO_STATS) {
-            createOp++;
+        void putArray(final byte[] array)
+        {
+            if (DO_CHECKS && (array.length != arraySize)) {
+                logInfo(getLogPrefix(clean) + "ByteArrayCache: "
+                        + "bad length = " + array.length);
+                return;
+            }
+            if (DO_STATS) {
+                stats.returnOp++;
+            }
+            // fill cache:
+            if (arrays.length > tail) {
+                arrays[tail++] = array;
+
+                if (DO_STATS) {
+                    stats.updateMaxSize(tail);
+                }
+            } else if (DO_CHECKS) {
+                logInfo(getLogPrefix(clean) + "ByteArrayCache: "
+                        + "array capacity exceeded !");
+            }
         }
-
-        return new byte[arraySize];
     }
 
-    void putDirtyArray(final byte[] array, final int length) {
-        if (length != arraySize) {
-            if (DO_CHECKS) {
-                MarlinUtils.logInfo("ArrayCache: bad length = " + length);
-            }
-            return;
-        }
-        if (DO_STATS) {
-            returnOp++;
-        }
-
-        // NO clean-up of array data = DIRTY ARRAY
-
-        if (DO_CLEAN_DIRTY) {
-            // Force zero-fill dirty arrays:
-            Arrays.fill(array, 0, array.length, BYTE_0);
+    static byte[] createArray(final int length, final boolean clean) {
+        if (clean) {
+            return new byte[length];
         }
-
-        // fill cache:
-        byteArrays.addLast(array);
-    }
-
-    void putArray(final byte[] array, final int length,
-                  final int fromIndex, final int toIndex)
-    {
-        if (length != arraySize) {
-            if (DO_CHECKS) {
-                MarlinUtils.logInfo("ArrayCache: bad length = " + length);
-            }
-            return;
-        }
-        if (DO_STATS) {
-            returnOp++;
-        }
-
-        // clean-up array of dirty part[fromIndex; toIndex[
-        fill(array, fromIndex, toIndex, BYTE_0);
-
-        // fill cache:
-        byteArrays.addLast(array);
+       // use JDK9 Unsafe.allocateUninitializedArray(class, length):
+       return (byte[]) OffHeapArray.UNSAFE.allocateUninitializedArray(byte.class, length);
     }
 
     static void fill(final byte[] array, final int fromIndex,
                      final int toIndex, final byte value)
     {
         // clear array data:
-        /*
-         * Arrays.fill is faster than System.arraycopy(empty array)
-         * or Unsafe.setMemory(byte 0)
-         */
-        if (toIndex != 0) {
-            Arrays.fill(array, fromIndex, toIndex, value);
-        }
-
+        Arrays.fill(array, fromIndex, toIndex, value);
         if (DO_CHECKS) {
             check(array, fromIndex, toIndex, value);
         }
@@ -149,4 +264,8 @@
             }
         }
     }
+
+    static String getLogPrefix(final boolean clean) {
+        return (clean) ? "Clean" : "Dirty";
+    }
 }
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Dasher.java	Wed Aug 03 14:49:01 2016 +0530
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Dasher.java	Wed Aug 03 22:53:38 2016 +0200
@@ -68,15 +68,13 @@
     // per-thread renderer context
     final RendererContext rdrCtx;
 
-    // dashes array (dirty)
-    final float[] dashes_initial = new float[INITIAL_ARRAY];
-
     // flag to recycle dash array copy
     boolean recycleDashes;
 
-    // per-thread initial arrays (large enough to satisfy most usages
-    // +1 to avoid recycling in Helpers.widenArray()
-    private final float[] firstSegmentsBuffer_initial = new float[INITIAL_ARRAY + 1];
+    // dashes ref (dirty)
+    final FloatArrayCache.Reference dashes_ref;
+    // firstSegmentsBuffer ref (dirty)
+    final FloatArrayCache.Reference firstSegmentsBuffer_ref;
 
     /**
      * Constructs a <code>Dasher</code>.
@@ -85,7 +83,10 @@
     Dasher(final RendererContext rdrCtx) {
         this.rdrCtx = rdrCtx;
 
-        firstSegmentsBuffer = firstSegmentsBuffer_initial;
+        dashes_ref = rdrCtx.newDirtyFloatArrayRef(INITIAL_ARRAY); // 1K
+
+        firstSegmentsBuffer_ref = rdrCtx.newDirtyFloatArrayRef(INITIAL_ARRAY); // 1K
+        firstSegmentsBuffer     = firstSegmentsBuffer_ref.initial;
 
         // we need curCurvepts to be able to contain 2 curves because when
         // dashing curves, we need to subdivide it
@@ -142,18 +143,12 @@
         if (DO_CLEAN_DIRTY) {
             // Force zero-fill dirty arrays:
             Arrays.fill(curCurvepts, 0f);
-            Arrays.fill(firstSegmentsBuffer, 0f);
         }
         // Return arrays:
-        if (recycleDashes && dash != dashes_initial) {
-            rdrCtx.putDirtyFloatArray(dash);
-            dash = null;
+        if (recycleDashes) {
+            dash = dashes_ref.putArray(dash);
         }
-
-        if (firstSegmentsBuffer != firstSegmentsBuffer_initial) {
-            rdrCtx.putDirtyFloatArray(firstSegmentsBuffer);
-            firstSegmentsBuffer = firstSegmentsBuffer_initial;
-        }
+        firstSegmentsBuffer = firstSegmentsBuffer_ref.putArray(firstSegmentsBuffer);
     }
 
     @Override
@@ -222,7 +217,8 @@
                             .add(segIdx + len);
                     }
                     firstSegmentsBuffer = buf
-                        = rdrCtx.widenDirtyFloatArray(buf, segIdx, segIdx + len);
+                        = firstSegmentsBuffer_ref.widenArray(buf, segIdx,
+                                                             segIdx + len);
                 }
                 buf[segIdx++] = type;
                 len--;
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/FloatArrayCache.java	Wed Aug 03 14:49:01 2016 +0530
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/FloatArrayCache.java	Wed Aug 03 22:53:38 2016 +0200
@@ -22,110 +22,224 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
-
 package sun.java2d.marlin;
 
-import java.util.ArrayDeque;
+import static sun.java2d.marlin.ArrayCacheConst.ARRAY_SIZES;
+import static sun.java2d.marlin.ArrayCacheConst.BUCKETS;
+import static sun.java2d.marlin.ArrayCacheConst.MAX_ARRAY_SIZE;
+import static sun.java2d.marlin.MarlinUtils.logInfo;
+import static sun.java2d.marlin.MarlinUtils.logException;
+
+import java.lang.ref.WeakReference;
 import java.util.Arrays;
-import static sun.java2d.marlin.MarlinUtils.logException;
-import static sun.java2d.marlin.MarlinUtils.logInfo;
+
+import sun.java2d.marlin.ArrayCacheConst.BucketStats;
+import sun.java2d.marlin.ArrayCacheConst.CacheStats;
+
+/*
+ * Note that the [BYTE/INT/FLOAT]ArrayCache files are nearly identical except
+ * for a few type and name differences. Typically, the [BYTE]ArrayCache.java file
+ * is edited manually and then [INT]ArrayCache.java and [FLOAT]ArrayCache.java
+ * files are generated with the following command lines:
+ */
+// % sed -e 's/(b\yte)[ ]*//g' -e 's/b\yte/int/g' -e 's/B\yte/Int/g' < B\yteArrayCache.java > IntArrayCache.java
+// % sed -e 's/(b\yte)[ ]*/(float) /g' -e 's/b\yte/float/g' -e 's/B\yte/Float/g' < B\yteArrayCache.java > FloatArrayCache.java
 
 final class FloatArrayCache implements MarlinConst {
 
-    private final int arraySize;
-    private final ArrayDeque<float[]> floatArrays;
-    // stats
-    private int getOp = 0;
-    private int createOp = 0;
-    private int returnOp = 0;
+    final boolean clean;
+    private final int bucketCapacity;
+    private WeakReference<Bucket[]> refBuckets = null;
+    final CacheStats stats;
+
+    FloatArrayCache(final boolean clean, final int bucketCapacity) {
+        this.clean = clean;
+        this.bucketCapacity = bucketCapacity;
+        this.stats = (DO_STATS) ?
+            new CacheStats(getLogPrefix(clean) + "FloatArrayCache") : null;
+    }
+
+    Bucket getCacheBucket(final int length) {
+        final int bucket = ArrayCacheConst.getBucket(length);
+        return getBuckets()[bucket];
+    }
+
+    private Bucket[] getBuckets() {
+        // resolve reference:
+        Bucket[] buckets = (refBuckets != null) ? refBuckets.get() : null;
+
+        // create a new buckets ?
+        if (buckets == null) {
+            buckets = new Bucket[BUCKETS];
+
+            for (int i = 0; i < BUCKETS; i++) {
+                buckets[i] = new Bucket(clean, ARRAY_SIZES[i], bucketCapacity,
+                        (DO_STATS) ? stats.bucketStats[i] : null);
+            }
+
+            // update weak reference:
+            refBuckets = new WeakReference<Bucket[]>(buckets);
+        }
+        return buckets;
+    }
+
+    Reference createRef(final int initialSize) {
+        return new Reference(this, initialSize);
+    }
+
+    static final class Reference {
+
+        // initial array reference (direct access)
+        final float[] initial;
+        private final boolean clean;
+        private final FloatArrayCache cache;
+
+        Reference(final FloatArrayCache cache, final int initialSize) {
+            this.cache = cache;
+            this.clean = cache.clean;
+            this.initial = createArray(initialSize, clean);
+            if (DO_STATS) {
+                cache.stats.totalInitial += initialSize;
+            }
+        }
 
-    void dumpStats() {
-        if (getOp > 0) {
-            logInfo("FloatArrayCache[" + arraySize + "]: get: " + getOp
-                    + " created: " + createOp + " - returned: " + returnOp
-                    + " :: cache size: " + floatArrays.size());
+        float[] getArray(final int length) {
+            if (length <= MAX_ARRAY_SIZE) {
+                return cache.getCacheBucket(length).getArray();
+            }
+            if (DO_STATS) {
+                cache.stats.oversize++;
+            }
+            if (DO_LOG_OVERSIZE) {
+                logInfo(getLogPrefix(clean) + "FloatArrayCache: "
+                        + "getArray[oversize]: length=\t" + length);
+            }
+            return createArray(length, clean);
+        }
+
+        float[] widenArray(final float[] array, final int usedSize,
+                          final int needSize)
+        {
+            final int length = array.length;
+            if (DO_CHECKS && length >= needSize) {
+                return array;
+            }
+            if (DO_STATS) {
+                cache.stats.resize++;
+            }
+
+            // maybe change bucket:
+            // ensure getNewSize() > newSize:
+            final float[] res = getArray(ArrayCacheConst.getNewSize(usedSize, needSize));
+
+            // use wrapper to ensure proper copy:
+            System.arraycopy(array, 0, res, 0, usedSize); // copy only used elements
+
+            // maybe return current array:
+            putArray(array, 0, usedSize); // ensure array is cleared
+
+            if (DO_LOG_WIDEN_ARRAY) {
+                logInfo(getLogPrefix(clean) + "FloatArrayCache: "
+                        + "widenArray[" + res.length
+                        + "]: usedSize=\t" + usedSize + "\tlength=\t" + length
+                        + "\tneeded length=\t" + needSize);
+            }
+            return res;
+        }
+
+        float[] putArray(final float[] array)
+        {
+            // dirty array helper:
+            return putArray(array, 0, array.length);
+        }
+
+        float[] putArray(final float[] array, final int fromIndex,
+                        final int toIndex)
+        {
+            if (array.length <= MAX_ARRAY_SIZE) {
+                if ((clean || DO_CLEAN_DIRTY) && (toIndex != 0)) {
+                    // clean-up array of dirty part[fromIndex; toIndex[
+                    fill(array, fromIndex, toIndex, (float) 0);
+                }
+                // ensure to never store initial arrays in cache:
+                if (array != initial) {
+                    cache.getCacheBucket(array.length).putArray(array);
+                }
+            }
+            return initial;
         }
     }
 
-    FloatArrayCache(final int arraySize) {
-        this.arraySize = arraySize;
-        // small but enough: almost 1 cache line
-        this.floatArrays = new ArrayDeque<float[]>(6);
-    }
+    static final class Bucket {
+
+        private int tail = 0;
+        private final int arraySize;
+        private final boolean clean;
+        private final float[][] arrays;
+        private final BucketStats stats;
 
-    float[] getArray() {
-        if (DO_STATS) {
-            getOp++;
+        Bucket(final boolean clean, final int arraySize,
+               final int capacity, final BucketStats stats)
+        {
+            this.arraySize = arraySize;
+            this.clean = clean;
+            this.stats = stats;
+            this.arrays = new float[capacity][];
         }
 
-        // use cache
-        final float[] array = floatArrays.pollLast();
-
-        if (array != null) {
-            return array;
+        float[] getArray() {
+            if (DO_STATS) {
+                stats.getOp++;
+            }
+            // use cache:
+            if (tail != 0) {
+                final float[] array = arrays[--tail];
+                arrays[tail] = null;
+                return array;
+            }
+            if (DO_STATS) {
+                stats.createOp++;
+            }
+            return createArray(arraySize, clean);
         }
 
-        if (DO_STATS) {
-            createOp++;
+        void putArray(final float[] array)
+        {
+            if (DO_CHECKS && (array.length != arraySize)) {
+                logInfo(getLogPrefix(clean) + "FloatArrayCache: "
+                        + "bad length = " + array.length);
+                return;
+            }
+            if (DO_STATS) {
+                stats.returnOp++;
+            }
+            // fill cache:
+            if (arrays.length > tail) {
+                arrays[tail++] = array;
+
+                if (DO_STATS) {
+                    stats.updateMaxSize(tail);
+                }
+            } else if (DO_CHECKS) {
+                logInfo(getLogPrefix(clean) + "FloatArrayCache: "
+                        + "array capacity exceeded !");
+            }
         }
-
-        return new float[arraySize];
     }
 
-    void putDirtyArray(final float[] array, final int length) {
-        if (length != arraySize) {
-            if (DO_CHECKS) {
-                MarlinUtils.logInfo("ArrayCache: bad length = " + length);
-            }
-            return;
-        }
-        if (DO_STATS) {
-            returnOp++;
-        }
-
-        // NO clean-up of array data = DIRTY ARRAY
-
-        if (DO_CLEAN_DIRTY) {
-            // Force zero-fill dirty arrays:
-            Arrays.fill(array, 0, array.length, 0f);
+    static float[] createArray(final int length, final boolean clean) {
+        if (clean) {
+            return new float[length];
         }
-
-        // fill cache:
-        floatArrays.addLast(array);
-    }
-
-    void putArray(final float[] array, final int length,
-                  final int fromIndex, final int toIndex)
-    {
-        if (length != arraySize) {
-            if (DO_CHECKS) {
-                MarlinUtils.logInfo("ArrayCache: bad length = " + length);
-            }
-            return;
-        }
-        if (DO_STATS) {
-            returnOp++;
-        }
-
-        // clean-up array of dirty part[fromIndex; toIndex[
-        fill(array, fromIndex, toIndex, 0f);
-
-        // fill cache:
-        floatArrays.addLast(array);
+       // use JDK9 Unsafe.allocateUninitializedArray(class, length):
+       return (float[]) OffHeapArray.UNSAFE.allocateUninitializedArray(float.class, length);
     }
 
     static void fill(final float[] array, final int fromIndex,
                      final int toIndex, final float value)
     {
         // clear array data:
-        /*
-         * Arrays.fill is faster than System.arraycopy(empty array)
-         * or Unsafe.setMemory(byte 0)
-         */
-        if (toIndex != 0) {
-            Arrays.fill(array, fromIndex, toIndex, value);
-        }
-
+        Arrays.fill(array, fromIndex, toIndex, value);
         if (DO_CHECKS) {
             check(array, fromIndex, toIndex, value);
         }
@@ -150,4 +264,8 @@
             }
         }
     }
+
+    static String getLogPrefix(final boolean clean) {
+        return (clean) ? "Clean" : "Dirty";
+    }
 }
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/IntArrayCache.java	Wed Aug 03 14:49:01 2016 +0530
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/IntArrayCache.java	Wed Aug 03 22:53:38 2016 +0200
@@ -22,109 +22,224 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
-
 package sun.java2d.marlin;
 
-import java.util.ArrayDeque;
+import static sun.java2d.marlin.ArrayCacheConst.ARRAY_SIZES;
+import static sun.java2d.marlin.ArrayCacheConst.BUCKETS;
+import static sun.java2d.marlin.ArrayCacheConst.MAX_ARRAY_SIZE;
+import static sun.java2d.marlin.MarlinUtils.logInfo;
+import static sun.java2d.marlin.MarlinUtils.logException;
+
+import java.lang.ref.WeakReference;
 import java.util.Arrays;
-import static sun.java2d.marlin.MarlinUtils.logException;
-import static sun.java2d.marlin.MarlinUtils.logInfo;
+
+import sun.java2d.marlin.ArrayCacheConst.BucketStats;
+import sun.java2d.marlin.ArrayCacheConst.CacheStats;
+
+/*
+ * Note that the [BYTE/INT/FLOAT]ArrayCache files are nearly identical except
+ * for a few type and name differences. Typically, the [BYTE]ArrayCache.java file
+ * is edited manually and then [INT]ArrayCache.java and [FLOAT]ArrayCache.java
+ * files are generated with the following command lines:
+ */
+// % sed -e 's/(b\yte)[ ]*//g' -e 's/b\yte/int/g' -e 's/B\yte/Int/g' < B\yteArrayCache.java > IntArrayCache.java
+// % sed -e 's/(b\yte)[ ]*/(float) /g' -e 's/b\yte/float/g' -e 's/B\yte/Float/g' < B\yteArrayCache.java > FloatArrayCache.java
 
 final class IntArrayCache implements MarlinConst {
 
-    private final int arraySize;
-    private final ArrayDeque<int[]> intArrays;
-    // stats
-    private int getOp = 0;
-    private int createOp = 0;
-    private int returnOp = 0;
+    final boolean clean;
+    private final int bucketCapacity;
+    private WeakReference<Bucket[]> refBuckets = null;
+    final CacheStats stats;
+
+    IntArrayCache(final boolean clean, final int bucketCapacity) {
+        this.clean = clean;
+        this.bucketCapacity = bucketCapacity;
+        this.stats = (DO_STATS) ?
+            new CacheStats(getLogPrefix(clean) + "IntArrayCache") : null;
+    }
+
+    Bucket getCacheBucket(final int length) {
+        final int bucket = ArrayCacheConst.getBucket(length);
+        return getBuckets()[bucket];
+    }
+
+    private Bucket[] getBuckets() {
+        // resolve reference:
+        Bucket[] buckets = (refBuckets != null) ? refBuckets.get() : null;
+
+        // create a new buckets ?
+        if (buckets == null) {
+            buckets = new Bucket[BUCKETS];
+
+            for (int i = 0; i < BUCKETS; i++) {
+                buckets[i] = new Bucket(clean, ARRAY_SIZES[i], bucketCapacity,
+                        (DO_STATS) ? stats.bucketStats[i] : null);
+            }
+
+            // update weak reference:
+            refBuckets = new WeakReference<Bucket[]>(buckets);
+        }
+        return buckets;
+    }
+
+    Reference createRef(final int initialSize) {
+        return new Reference(this, initialSize);
+    }
+
+    static final class Reference {
+
+        // initial array reference (direct access)
+        final int[] initial;
+        private final boolean clean;
+        private final IntArrayCache cache;
+
+        Reference(final IntArrayCache cache, final int initialSize) {
+            this.cache = cache;
+            this.clean = cache.clean;
+            this.initial = createArray(initialSize, clean);
+            if (DO_STATS) {
+                cache.stats.totalInitial += initialSize;
+            }
+        }
 
-    void dumpStats() {
-        if (getOp > 0) {
-            logInfo("IntArrayCache[" + arraySize + "]: get: " + getOp
-                    + " created: " + createOp + " - returned: " + returnOp
-                    + " :: cache size: " + intArrays.size());
+        int[] getArray(final int length) {
+            if (length <= MAX_ARRAY_SIZE) {
+                return cache.getCacheBucket(length).getArray();
+            }
+            if (DO_STATS) {
+                cache.stats.oversize++;
+            }
+            if (DO_LOG_OVERSIZE) {
+                logInfo(getLogPrefix(clean) + "IntArrayCache: "
+                        + "getArray[oversize]: length=\t" + length);
+            }
+            return createArray(length, clean);
+        }
+
+        int[] widenArray(final int[] array, final int usedSize,
+                          final int needSize)
+        {
+            final int length = array.length;
+            if (DO_CHECKS && length >= needSize) {
+                return array;
+            }
+            if (DO_STATS) {
+                cache.stats.resize++;
+            }
+
+            // maybe change bucket:
+            // ensure getNewSize() > newSize:
+            final int[] res = getArray(ArrayCacheConst.getNewSize(usedSize, needSize));
+
+            // use wrapper to ensure proper copy:
+            System.arraycopy(array, 0, res, 0, usedSize); // copy only used elements
+
+            // maybe return current array:
+            putArray(array, 0, usedSize); // ensure array is cleared
+
+            if (DO_LOG_WIDEN_ARRAY) {
+                logInfo(getLogPrefix(clean) + "IntArrayCache: "
+                        + "widenArray[" + res.length
+                        + "]: usedSize=\t" + usedSize + "\tlength=\t" + length
+                        + "\tneeded length=\t" + needSize);
+            }
+            return res;
+        }
+
+        int[] putArray(final int[] array)
+        {
+            // dirty array helper:
+            return putArray(array, 0, array.length);
+        }
+
+        int[] putArray(final int[] array, final int fromIndex,
+                        final int toIndex)
+        {
+            if (array.length <= MAX_ARRAY_SIZE) {
+                if ((clean || DO_CLEAN_DIRTY) && (toIndex != 0)) {
+                    // clean-up array of dirty part[fromIndex; toIndex[
+                    fill(array, fromIndex, toIndex, 0);
+                }
+                // ensure to never store initial arrays in cache:
+                if (array != initial) {
+                    cache.getCacheBucket(array.length).putArray(array);
+                }
+            }
+            return initial;
         }
     }
 
-    IntArrayCache(final int arraySize) {
-        this.arraySize = arraySize;
-        // small but enough: almost 1 cache line
-        this.intArrays = new ArrayDeque<int[]>(6);
-    }
+    static final class Bucket {
+
+        private int tail = 0;
+        private final int arraySize;
+        private final boolean clean;
+        private final int[][] arrays;
+        private final BucketStats stats;
 
-    int[] getArray() {
-        if (DO_STATS) {
-            getOp++;
+        Bucket(final boolean clean, final int arraySize,
+               final int capacity, final BucketStats stats)
+        {
+            this.arraySize = arraySize;
+            this.clean = clean;
+            this.stats = stats;
+            this.arrays = new int[capacity][];
         }
 
-        // use cache:
-        final int[] array = intArrays.pollLast();
-        if (array != null) {
-            return array;
+        int[] getArray() {
+            if (DO_STATS) {
+                stats.getOp++;
+            }
+            // use cache:
+            if (tail != 0) {
+                final int[] array = arrays[--tail];
+                arrays[tail] = null;
+                return array;
+            }
+            if (DO_STATS) {
+                stats.createOp++;
+            }
+            return createArray(arraySize, clean);
         }
 
-        if (DO_STATS) {
-            createOp++;
+        void putArray(final int[] array)
+        {
+            if (DO_CHECKS && (array.length != arraySize)) {
+                logInfo(getLogPrefix(clean) + "IntArrayCache: "
+                        + "bad length = " + array.length);
+                return;
+            }
+            if (DO_STATS) {
+                stats.returnOp++;
+            }
+            // fill cache:
+            if (arrays.length > tail) {
+                arrays[tail++] = array;
+
+                if (DO_STATS) {
+                    stats.updateMaxSize(tail);
+                }
+            } else if (DO_CHECKS) {
+                logInfo(getLogPrefix(clean) + "IntArrayCache: "
+                        + "array capacity exceeded !");
+            }
         }
-
-        return new int[arraySize];
     }
 
-    void putDirtyArray(final int[] array, final int length) {
-        if (length != arraySize) {
-            if (DO_CHECKS) {
-                MarlinUtils.logInfo("ArrayCache: bad length = " + length);
-            }
-            return;
-        }
-        if (DO_STATS) {
-            returnOp++;
-        }
-
-        // NO clean-up of array data = DIRTY ARRAY
-
-        if (DO_CLEAN_DIRTY) {
-            // Force zero-fill dirty arrays:
-            Arrays.fill(array, 0, array.length, 0);
+    static int[] createArray(final int length, final boolean clean) {
+        if (clean) {
+            return new int[length];
         }
-
-        // fill cache:
-        intArrays.addLast(array);
-    }
-
-    void putArray(final int[] array, final int length,
-                  final int fromIndex, final int toIndex)
-    {
-        if (length != arraySize) {
-            if (DO_CHECKS) {
-                MarlinUtils.logInfo("ArrayCache: bad length = " + length);
-            }
-            return;
-        }
-        if (DO_STATS) {
-            returnOp++;
-        }
-
-        // clean-up array of dirty part[fromIndex; toIndex[
-        fill(array, fromIndex, toIndex, 0);
-
-        // fill cache:
-        intArrays.addLast(array);
+       // use JDK9 Unsafe.allocateUninitializedArray(class, length):
+       return (int[]) OffHeapArray.UNSAFE.allocateUninitializedArray(int.class, length);
     }
 
     static void fill(final int[] array, final int fromIndex,
                      final int toIndex, final int value)
     {
         // clear array data:
-        /*
-         * Arrays.fill is faster than System.arraycopy(empty array)
-         * or Unsafe.setMemory(byte 0)
-         */
-        if (toIndex != 0) {
-            Arrays.fill(array, fromIndex, toIndex, value);
-        }
-
+        Arrays.fill(array, fromIndex, toIndex, value);
         if (DO_CHECKS) {
             check(array, fromIndex, toIndex, value);
         }
@@ -149,4 +264,8 @@
             }
         }
     }
+
+    static String getLogPrefix(final boolean clean) {
+        return (clean) ? "Clean" : "Dirty";
+    }
 }
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinCache.java	Wed Aug 03 14:49:01 2016 +0530
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinCache.java	Wed Aug 03 22:53:38 2016 +0200
@@ -100,8 +100,8 @@
     // per-thread renderer context
     final RendererContext rdrCtx;
 
-    // large cached touchedTile (dirty)
-    final int[] touchedTile_initial = new int[INITIAL_ARRAY]; // 1 tile line
+    // touchedTile ref (clean)
+    private final IntArrayCache.Reference touchedTile_ref;
 
     int tileMin, tileMax;
 
@@ -110,9 +110,10 @@
     MarlinCache(final RendererContext rdrCtx) {
         this.rdrCtx = rdrCtx;
 
-        rowAAChunk = new OffHeapArray(rdrCtx.cleanerObj, INITIAL_CHUNK_ARRAY); // 64K
+        rowAAChunk = rdrCtx.newOffHeapArray(INITIAL_CHUNK_ARRAY); // 64K
 
-        touchedTile = touchedTile_initial;
+        touchedTile_ref = rdrCtx.newCleanIntArrayRef(INITIAL_ARRAY); // 1K = 1 tile line
+        touchedTile     = touchedTile_ref.initial;
 
         // tile used marks:
         tileMin = Integer.MAX_VALUE;
@@ -181,10 +182,9 @@
 
         if (nxTiles > INITIAL_ARRAY) {
             if (DO_STATS) {
-                rdrCtx.stats.stat_array_marlincache_touchedTile
-                    .add(nxTiles);
+                rdrCtx.stats.stat_array_marlincache_touchedTile.add(nxTiles);
             }
-            touchedTile = rdrCtx.getIntArray(nxTiles);
+            touchedTile = touchedTile_ref.getArray(nxTiles);
         }
     }
 
@@ -196,11 +196,13 @@
         // Reset touchedTile if needed:
         resetTileLine(0);
 
+        if (DO_STATS) {
+            rdrCtx.stats.totalOffHeap += rowAAChunk.length;
+        }
+
         // Return arrays:
-        if (touchedTile != touchedTile_initial) {
-            rdrCtx.putIntArray(touchedTile, 0, 0); // already zero filled
-            touchedTile = touchedTile_initial;
-        }
+        touchedTile = touchedTile_ref.putArray(touchedTile, 0, 0); // already zero filled
+
         // At last: resize back off-heap rowAA to initial size
         if (rowAAChunk.length != INITIAL_CHUNK_ARRAY) {
             // note: may throw OOME:
@@ -554,8 +556,7 @@
         addr_off += SIZE_INT;
 
         if (DO_STATS) {
-            rdrCtx.stats.hist_tile_generator_encoding_runLen
-                .add(runLen);
+            rdrCtx.stats.hist_tile_generator_encoding_runLen.add(runLen);
         }
 
         long len = (addr_off - _rowAAChunk.address);
@@ -612,12 +613,12 @@
 
     private void expandRowAAChunk(final long needSize) {
         if (DO_STATS) {
-            rdrCtx.stats.stat_array_marlincache_rowAAChunk
-                .add(needSize);
+            rdrCtx.stats.stat_array_marlincache_rowAAChunk.add(needSize);
         }
 
         // note: throw IOOB if neededSize > 2Gb:
-        final long newSize = ArrayCache.getNewLargeSize(rowAAChunk.length, needSize);
+        final long newSize = ArrayCacheConst.getNewLargeSize(rowAAChunk.length,
+                                                             needSize);
 
         rowAAChunk.resize(newSize);
     }
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinConst.java	Wed Aug 03 14:49:01 2016 +0530
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinConst.java	Wed Aug 03 22:53:38 2016 +0200
@@ -62,6 +62,8 @@
     // enable traces
     static final boolean DO_TRACE = ENABLE_LOGS && false;
 
+    // do flush stats
+    static final boolean DO_FLUSH_STATS = true;
     // do flush monitors
     static final boolean DO_FLUSH_MONITORS = true;
     // use one polling thread to dump statistics/monitors
@@ -78,7 +80,7 @@
     // flag to enable logs related bounds checks
     static final boolean DO_LOG_BOUNDS = ENABLE_LOGS && false;
 
-    // Initial Array sizing (initial context capacity) ~ 350K
+    // Initial Array sizing (initial context capacity) ~ 450K
 
     // 2048 pixel (width x height) for initial capacity
     static final int INITIAL_PIXEL_DIM
@@ -86,14 +88,17 @@
 
     // typical array sizes: only odd numbers allowed below
     static final int INITIAL_ARRAY        = 256;
-    static final int INITIAL_SMALL_ARRAY  = 1024;
-    static final int INITIAL_MEDIUM_ARRAY = 4096;
-    static final int INITIAL_LARGE_ARRAY  = 8192;
+
     // alpha row dimension
     static final int INITIAL_AA_ARRAY     = INITIAL_PIXEL_DIM;
 
-    // initial edges (24 bytes) = 24K [ints] = 96K
-    static final int INITIAL_EDGES_CAPACITY = 4096 * 24; // 6 ints per edges
+    // 4096 edges for initial capacity
+    static final int INITIAL_EDGES_COUNT = MarlinProperties.getInitialEdges();
+
+    // initial edges = 3/4 * edges count (4096)
+    // 6 ints per edges = 24 bytes
+    // edges capacity = 24 x initial edges = 18 * edges count (4096) = 72K
+    static final int INITIAL_EDGES_CAPACITY = INITIAL_EDGES_COUNT * 18;
 
     // zero value as byte
     static final byte BYTE_0 = (byte) 0;
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinProperties.java	Wed Aug 03 14:49:01 2016 +0530
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinProperties.java	Wed Aug 03 22:53:38 2016 +0200
@@ -42,13 +42,27 @@
     }
 
     /**
+     * Return the initial edge capacity used to define initial arrays
+     * (edges, polystack, crossings)
+     *
+     * @return 256 < initial edges < 65536 (4096 by default)
+     */
+    public static int getInitialEdges() {
+        return align(
+            getInteger("sun.java2d.renderer.edges", 4096, 64, 64 * 1024),
+            64);
+    }
+
+    /**
      * Return the initial pixel size used to define initial arrays
      * (tile AA chunk, alpha line, buckets)
      *
      * @return 64 < initial pixel size < 32768 (2048 by default)
      */
     public static int getInitialImageSize() {
-        return getInteger("sun.java2d.renderer.pixelsize", 2048, 64, 32 * 1024);
+        return align(
+            getInteger("sun.java2d.renderer.pixelsize", 2048, 64, 32 * 1024),
+            64);
     }
 
     /**
@@ -182,4 +196,8 @@
         return value;
     }
 
+    static int align(final int val, final int norm) {
+        final int ceil = FloatMath.ceil_int( ((float)val) / norm);
+        return ceil * norm;
+    }
 }
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinRenderingEngine.java	Wed Aug 03 14:49:01 2016 +0530
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinRenderingEngine.java	Wed Aug 03 22:53:38 2016 +0200
@@ -47,7 +47,38 @@
 public class MarlinRenderingEngine extends RenderingEngine
                                    implements MarlinConst
 {
-    private static enum NormMode {ON_WITH_AA, ON_NO_AA, OFF}
+    private static enum NormMode {
+        ON_WITH_AA {
+            @Override
+            PathIterator getNormalizingPathIterator(final RendererContext rdrCtx,
+                                                    final PathIterator src)
+            {
+                // NormalizingPathIterator NearestPixelCenter:
+                return rdrCtx.nPCPathIterator.init(src);
+            }
+        },
+        ON_NO_AA{
+            @Override
+            PathIterator getNormalizingPathIterator(final RendererContext rdrCtx,
+                                                    final PathIterator src)
+            {
+                // NearestPixel NormalizingPathIterator:
+                return rdrCtx.nPQPathIterator.init(src);
+            }
+        },
+        OFF{
+            @Override
+            PathIterator getNormalizingPathIterator(final RendererContext rdrCtx,
+                                                    final PathIterator src)
+            {
+                // return original path iterator if normalization is disabled:
+                return src;
+            }
+        };
+
+        abstract PathIterator getNormalizingPathIterator(RendererContext rdrCtx,
+                                                         PathIterator src);
+    }
 
     private static final float MIN_PEN_SIZE = 1f / NORM_SUBPIXELS;
 
@@ -91,13 +122,7 @@
         final RendererContext rdrCtx = getRendererContext();
         try {
             // initialize a large copyable Path2D to avoid a lot of array growing:
-            final Path2D.Float p2d =
-                    (rdrCtx.p2d == null) ?
-                    (rdrCtx.p2d = new Path2D.Float(Path2D.WIND_NON_ZERO,
-                                                   INITIAL_MEDIUM_ARRAY))
-                    : rdrCtx.p2d;
-            // reset
-            p2d.reset();
+            final Path2D.Float p2d = rdrCtx.getPath2D();
 
             strokeTo(rdrCtx,
                      src,
@@ -274,7 +299,7 @@
                         Shape src,
                         AffineTransform at,
                         float width,
-                        NormMode normalize,
+                        NormMode norm,
                         int caps,
                         int join,
                         float miterlimit,
@@ -338,13 +363,12 @@
                     dashLen = dashes.length;
                     final float[] newDashes;
                     if (dashLen <= INITIAL_ARRAY) {
-                        newDashes = rdrCtx.dasher.dashes_initial;
+                        newDashes = rdrCtx.dasher.dashes_ref.initial;
                     } else {
                         if (DO_STATS) {
-                            rdrCtx.stats.stat_array_dasher_dasher
-                                .add(dashLen);
+                            rdrCtx.stats.stat_array_dasher_dasher.add(dashLen);
                         }
-                        newDashes = rdrCtx.getDirtyFloatArray(dashLen);
+                        newDashes = rdrCtx.dasher.dashes_ref.getArray(dashLen);
                     }
                     System.arraycopy(dashes, 0, newDashes, 0, dashLen);
                     dashes = newDashes;
@@ -400,8 +424,8 @@
         }
         pc2d = transformerPC2D.inverseDeltaTransformConsumer(pc2d, strokerat);
 
-        final PathIterator pi = getNormalizingPathIterator(rdrCtx, normalize,
-                                    src.getPathIterator(at));
+        final PathIterator pi = norm.getNormalizingPathIterator(rdrCtx,
+                                         src.getPathIterator(at));
 
         pathTo(rdrCtx, pi, pc2d);
 
@@ -424,25 +448,6 @@
         return Math.abs(num) < 2.0 * Math.ulp(num);
     }
 
-    PathIterator getNormalizingPathIterator(final RendererContext rdrCtx,
-                                            final NormMode mode,
-                                            final PathIterator src)
-    {
-        switch (mode) {
-            case ON_WITH_AA:
-                // NormalizingPathIterator NearestPixelCenter:
-                return rdrCtx.nPCPathIterator.init(src);
-            case ON_NO_AA:
-                // NearestPixel NormalizingPathIterator:
-                return rdrCtx.nPQPathIterator.init(src);
-            case OFF:
-                // return original path iterator if normalization is disabled:
-                return src;
-            default:
-                throw new InternalError("Unrecognized normalization mode");
-        }
-    }
-
     abstract static class NormalizingPathIterator implements PathIterator {
 
         private PathIterator src;
@@ -792,8 +797,8 @@
 
             if (bs == null) {
                 // fill shape:
-                final PathIterator pi = getNormalizingPathIterator(rdrCtx, norm,
-                                            s.getPathIterator(_at));
+                final PathIterator pi = norm.getNormalizingPathIterator(rdrCtx,
+                                                 s.getPathIterator(_at));
 
                 // note: Winding rule may be EvenOdd ONLY for fill operations !
                 r = rdrCtx.renderer.init(clip.getLoX(), clip.getLoY(),
@@ -1021,8 +1026,11 @@
         logInfo("sun.java2d.renderer.useRef           = "
                 + refType);
 
+        logInfo("sun.java2d.renderer.edges            = "
+                + MarlinConst.INITIAL_EDGES_COUNT);
         logInfo("sun.java2d.renderer.pixelsize        = "
                 + MarlinConst.INITIAL_PIXEL_DIM);
+
         logInfo("sun.java2d.renderer.subPixel_log2_X  = "
                 + MarlinConst.SUBPIXEL_LG_POSITIONS_X);
         logInfo("sun.java2d.renderer.subPixel_log2_Y  = "
@@ -1076,6 +1084,11 @@
         logInfo("CUB_INC_BND  = " + Renderer.CUB_INC_BND);
         logInfo("QUAD_DEC_BND = " + Renderer.QUAD_DEC_BND);
 
+        logInfo("INITIAL_EDGES_CAPACITY               = "
+                + MarlinConst.INITIAL_EDGES_CAPACITY);
+        logInfo("INITIAL_CROSSING_COUNT               = "
+                + Renderer.INITIAL_CROSSING_COUNT);
+
         logInfo("=========================================================="
                 + "=====================");
     }
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Renderer.java	Wed Aug 03 14:49:01 2016 +0530
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Renderer.java	Wed Aug 03 22:53:38 2016 +0200
@@ -43,9 +43,9 @@
     private static final double POWER_2_TO_32 = 0x1.0p32;
 
     // use float to make tosubpix methods faster (no int to float conversion)
-    public static final float f_SUBPIXEL_POSITIONS_X
+    public static final float F_SUBPIXEL_POSITIONS_X
         = (float) SUBPIXEL_POSITIONS_X;
-    public static final float f_SUBPIXEL_POSITIONS_Y
+    public static final float F_SUBPIXEL_POSITIONS_Y
         = (float) SUBPIXEL_POSITIONS_Y;
     public static final int SUBPIXEL_MASK_X = SUBPIXEL_POSITIONS_X - 1;
     public static final int SUBPIXEL_MASK_Y = SUBPIXEL_POSITIONS_Y - 1;
@@ -58,6 +58,9 @@
     static final int INITIAL_BUCKET_ARRAY
         = INITIAL_PIXEL_DIM * SUBPIXEL_POSITIONS_Y;
 
+    // crossing capacity = edges count / 8 ~ 512
+    static final int INITIAL_CROSSING_COUNT = INITIAL_EDGES_COUNT >> 3;
+
     public static final int WIND_EVEN_ODD = 0;
     public static final int WIND_NON_ZERO = 1;
 
@@ -136,14 +139,15 @@
     // max used for both edgePtrs and crossings (stats only)
     private int activeEdgeMaxUsed;
 
-    // per-thread initial arrays (large enough to satisfy most usages) (1024)
-    private final int[] crossings_initial = new int[INITIAL_SMALL_ARRAY]; // 4K
-    // +1 to avoid recycling in Helpers.widenArray()
-    private final int[] edgePtrs_initial  = new int[INITIAL_SMALL_ARRAY + 1]; // 4K
+    // crossings ref (dirty)
+    private final IntArrayCache.Reference crossings_ref;
+    // edgePtrs ref (dirty)
+    private final IntArrayCache.Reference edgePtrs_ref;
     // merge sort initial arrays (large enough to satisfy most usages) (1024)
-    private final int[] aux_crossings_initial = new int[INITIAL_SMALL_ARRAY]; // 4K
-    // +1 to avoid recycling in Helpers.widenArray()
-    private final int[] aux_edgePtrs_initial  = new int[INITIAL_SMALL_ARRAY + 1]; // 4K
+    // aux_crossings ref (dirty)
+    private final IntArrayCache.Reference aux_crossings_ref;
+    // aux_edgePtrs ref (dirty)
+    private final IntArrayCache.Reference aux_edgePtrs_ref;
 
 //////////////////////////////////////////////////////////////////////////////
 //  EDGE LIST
@@ -164,11 +168,10 @@
     // sum of each edge delta Y (subpixels)
     private int edgeSumDeltaY;
 
-    // +1 to avoid recycling in Helpers.widenArray()
-    private final int[] edgeBuckets_initial
-        = new int[INITIAL_BUCKET_ARRAY + 1]; // 64K
-    private final int[] edgeBucketCounts_initial
-        = new int[INITIAL_BUCKET_ARRAY + 1]; // 64K
+    // edgeBuckets ref (clean)
+    private final IntArrayCache.Reference edgeBuckets_ref;
+    // edgeBucketCounts ref (clean)
+    private final IntArrayCache.Reference edgeBucketCounts_ref;
 
     // Flattens using adaptive forward differencing. This only carries out
     // one iteration of the AFD loop. All it does is update AFD variables (i.e.
@@ -402,7 +405,8 @@
             // suppose _edges.length > _SIZEOF_EDGE_BYTES
             // so doubling size is enough to add needed bytes
             // note: throw IOOB if neededSize > 2Gb:
-            final long edgeNewSize = ArrayCache.getNewLargeSize(_edges.length,
+            final long edgeNewSize = ArrayCacheConst.getNewLargeSize(
+                                        _edges.length,
                                         edgePtr + _SIZEOF_EDGE_BYTES);
 
             if (DO_STATS) {
@@ -514,28 +518,52 @@
     // dirty curve
     private final Curve curve;
 
+    // clean alpha array (zero filled)
+    private int[] alphaLine;
+
+    // alphaLine ref (clean)
+    private final IntArrayCache.Reference alphaLine_ref;
+
+    private boolean enableBlkFlags = false;
+    private boolean prevUseBlkFlags = false;
+
+    /* block flags (0|1) */
+    private int[] blkFlags;
+
+    // blkFlags ref (clean)
+    private final IntArrayCache.Reference blkFlags_ref;
+
     Renderer(final RendererContext rdrCtx) {
         this.rdrCtx = rdrCtx;
 
-        this.edges = new OffHeapArray(rdrCtx.cleanerObj, INITIAL_EDGES_CAPACITY); // 96K
+        this.edges = rdrCtx.newOffHeapArray(INITIAL_EDGES_CAPACITY); // 96K
 
         this.curve = rdrCtx.curve;
 
-        edgeBuckets = edgeBuckets_initial;
-        edgeBucketCounts = edgeBucketCounts_initial;
+        edgeBuckets_ref      = rdrCtx.newCleanIntArrayRef(INITIAL_BUCKET_ARRAY); // 64K
+        edgeBucketCounts_ref = rdrCtx.newCleanIntArrayRef(INITIAL_BUCKET_ARRAY); // 64K
 
-        alphaLine  = alphaLine_initial;
+        edgeBuckets      = edgeBuckets_ref.initial;
+        edgeBucketCounts = edgeBucketCounts_ref.initial;
+
+        // 2048 (pixelsize) pixel large
+        alphaLine_ref = rdrCtx.newCleanIntArrayRef(INITIAL_AA_ARRAY); // 8K
+        alphaLine     = alphaLine_ref.initial;
 
         this.cache = rdrCtx.cache;
 
-        // ScanLine:
-        crossings     = crossings_initial;
-        aux_crossings = aux_crossings_initial;
-        edgePtrs      = edgePtrs_initial;
-        aux_edgePtrs  = aux_edgePtrs_initial;
+        crossings_ref     = rdrCtx.newDirtyIntArrayRef(INITIAL_CROSSING_COUNT); // 2K
+        aux_crossings_ref = rdrCtx.newDirtyIntArrayRef(INITIAL_CROSSING_COUNT); // 2K
+        edgePtrs_ref      = rdrCtx.newDirtyIntArrayRef(INITIAL_CROSSING_COUNT); // 2K
+        aux_edgePtrs_ref  = rdrCtx.newDirtyIntArrayRef(INITIAL_CROSSING_COUNT); // 2K
 
-        edgeCount = 0;
-        activeEdgeMaxUsed = 0;
+        crossings     = crossings_ref.initial;
+        aux_crossings = aux_crossings_ref.initial;
+        edgePtrs      = edgePtrs_ref.initial;
+        aux_edgePtrs  = aux_edgePtrs_ref.initial;
+
+        blkFlags_ref = rdrCtx.newCleanIntArrayRef(INITIAL_ARRAY); // 1K = 1 tile line
+        blkFlags     = blkFlags_ref.initial;
     }
 
     Renderer init(final int pix_boundsX, final int pix_boundsY,
@@ -569,8 +597,8 @@
                 rdrCtx.stats.stat_array_renderer_edgeBucketCounts
                     .add(edgeBucketsLength);
             }
-            edgeBuckets = rdrCtx.getIntArray(edgeBucketsLength);
-            edgeBucketCounts = rdrCtx.getIntArray(edgeBucketsLength);
+            edgeBuckets = edgeBuckets_ref.getArray(edgeBucketsLength);
+            edgeBucketCounts = edgeBucketCounts_ref.getArray(edgeBucketsLength);
         }
 
         edgeMinY = Integer.MAX_VALUE;
@@ -595,41 +623,19 @@
         if (DO_STATS) {
             rdrCtx.stats.stat_rdr_activeEdges.add(activeEdgeMaxUsed);
             rdrCtx.stats.stat_rdr_edges.add(edges.used);
-            rdrCtx.stats.stat_rdr_edges_count
-                .add(edges.used / SIZEOF_EDGE_BYTES);
-        }
-        if (DO_CLEAN_DIRTY) {
-            // Force zero-fill dirty arrays:
-            Arrays.fill(crossings,     0);
-            Arrays.fill(aux_crossings, 0);
-            Arrays.fill(edgePtrs,      0);
-            Arrays.fill(aux_edgePtrs,  0);
+            rdrCtx.stats.stat_rdr_edges_count.add(edges.used / SIZEOF_EDGE_BYTES);
+            rdrCtx.stats.hist_rdr_edges_count.add(edges.used / SIZEOF_EDGE_BYTES);
+            rdrCtx.stats.totalOffHeap += edges.length;
         }
         // Return arrays:
-        if (crossings != crossings_initial) {
-            rdrCtx.putDirtyIntArray(crossings);
-            crossings = crossings_initial;
-            if (aux_crossings != aux_crossings_initial) {
-                rdrCtx.putDirtyIntArray(aux_crossings);
-                aux_crossings = aux_crossings_initial;
-            }
-        }
-        if (edgePtrs != edgePtrs_initial) {
-            rdrCtx.putDirtyIntArray(edgePtrs);
-            edgePtrs = edgePtrs_initial;
-            if (aux_edgePtrs != aux_edgePtrs_initial) {
-                rdrCtx.putDirtyIntArray(aux_edgePtrs);
-                aux_edgePtrs = aux_edgePtrs_initial;
-            }
-        }
-        if (alphaLine != alphaLine_initial) {
-            rdrCtx.putIntArray(alphaLine, 0, 0); // already zero filled
-            alphaLine = alphaLine_initial;
-        }
-        if (blkFlags != blkFlags_initial) {
-            rdrCtx.putIntArray(blkFlags, 0, 0); // already zero filled
-            blkFlags = blkFlags_initial;
-        }
+        crossings = crossings_ref.putArray(crossings);
+        aux_crossings = aux_crossings_ref.putArray(aux_crossings);
+
+        edgePtrs = edgePtrs_ref.putArray(edgePtrs);
+        aux_edgePtrs = aux_edgePtrs_ref.putArray(aux_edgePtrs);
+
+        alphaLine = alphaLine_ref.putArray(alphaLine, 0, 0); // already zero filled
+        blkFlags  = blkFlags_ref.putArray(blkFlags, 0, 0); // already zero filled
 
         if (edgeMinY != Integer.MAX_VALUE) {
             // if context is maked as DIRTY:
@@ -639,30 +645,16 @@
                 buckets_minY = 0;
                 buckets_maxY = boundsMaxY - boundsMinY;
             }
-            // clear used part
-            if (edgeBuckets == edgeBuckets_initial) {
-                // fill only used part
-                IntArrayCache.fill(edgeBuckets,      buckets_minY,
-                                                     buckets_maxY,     0);
-                IntArrayCache.fill(edgeBucketCounts, buckets_minY,
-                                                     buckets_maxY + 1, 0);
-            } else {
-                 // clear only used part
-                rdrCtx.putIntArray(edgeBuckets,      buckets_minY,
-                                                     buckets_maxY);
-                edgeBuckets = edgeBuckets_initial;
-
-                rdrCtx.putIntArray(edgeBucketCounts, buckets_minY,
-                                                     buckets_maxY + 1);
-                edgeBucketCounts = edgeBucketCounts_initial;
-            }
-        } else if (edgeBuckets != edgeBuckets_initial) {
+            // clear only used part
+            edgeBuckets = edgeBuckets_ref.putArray(edgeBuckets, buckets_minY,
+                                                                buckets_maxY);
+            edgeBucketCounts = edgeBucketCounts_ref.putArray(edgeBucketCounts,
+                                                             buckets_minY,
+                                                             buckets_maxY + 1);
+        } else {
             // unused arrays
-            rdrCtx.putIntArray(edgeBuckets, 0, 0);
-            edgeBuckets = edgeBuckets_initial;
-
-            rdrCtx.putIntArray(edgeBucketCounts, 0, 0);
-            edgeBucketCounts = edgeBucketCounts_initial;
+            edgeBuckets = edgeBuckets_ref.putArray(edgeBuckets, 0, 0);
+            edgeBucketCounts = edgeBucketCounts_ref.putArray(edgeBucketCounts, 0, 0);
         }
 
         // At last: resize back off-heap edges to initial size
@@ -680,12 +672,12 @@
     }
 
     private static float tosubpixx(final float pix_x) {
-        return f_SUBPIXEL_POSITIONS_X * pix_x;
+        return F_SUBPIXEL_POSITIONS_X * pix_x;
     }
 
     private static float tosubpixy(final float pix_y) {
         // shift y by -0.5 for fast ceil(y - 0.5):
-        return f_SUBPIXEL_POSITIONS_Y * pix_y - 0.5f;
+        return F_SUBPIXEL_POSITIONS_Y * pix_y - 0.5f;
     }
 
     @Override
@@ -749,11 +741,6 @@
         throw new InternalError("Renderer does not use a native consumer.");
     }
 
-    // clean alpha array (zero filled)
-    private int[] alphaLine;
-    // 2048 (pixelsize) pixel large
-    private final int[] alphaLine_initial = new int[INITIAL_AA_ARRAY]; // 8K
-
     private void _endRendering(final int ymin, final int ymax) {
         if (DISABLE_RENDER) {
             return;
@@ -857,8 +844,7 @@
             // bucketCount indicates new edge / edge end:
             if (bucketcount != 0) {
                 if (DO_STATS) {
-                    rdrCtx.stats.stat_rdr_activeEdges_updates
-                        .add(numCrossings);
+                    rdrCtx.stats.stat_rdr_activeEdges_updates.add(numCrossings);
                 }
 
                 // last bit set to 1 means that edges ends
@@ -883,38 +869,33 @@
 
                 if (ptrLen != 0) {
                     if (DO_STATS) {
-                        rdrCtx.stats.stat_rdr_activeEdges_adds
-                            .add(ptrLen);
+                        rdrCtx.stats.stat_rdr_activeEdges_adds.add(ptrLen);
                         if (ptrLen > 10) {
-                            rdrCtx.stats.stat_rdr_activeEdges_adds_high
-                                .add(ptrLen);
+                            rdrCtx.stats.stat_rdr_activeEdges_adds_high.add(ptrLen);
                         }
                     }
                     ptrEnd = numCrossings + ptrLen;
 
                     if (edgePtrsLen < ptrEnd) {
                         if (DO_STATS) {
-                            rdrCtx.stats.stat_array_renderer_edgePtrs
-                                .add(ptrEnd);
+                            rdrCtx.stats.stat_array_renderer_edgePtrs.add(ptrEnd);
                         }
                         this.edgePtrs = _edgePtrs
-                            = rdrCtx.widenDirtyIntArray(_edgePtrs, numCrossings,
-                                                        ptrEnd);
+                            = edgePtrs_ref.widenArray(_edgePtrs, numCrossings,
+                                                      ptrEnd);
 
                         edgePtrsLen = _edgePtrs.length;
                         // Get larger auxiliary storage:
-                        if (_aux_edgePtrs != aux_edgePtrs_initial) {
-                            rdrCtx.putDirtyIntArray(_aux_edgePtrs);
-                        }
+                        aux_edgePtrs_ref.putArray(_aux_edgePtrs);
+
                         // use ArrayCache.getNewSize() to use the same growing
-                        // factor than widenDirtyIntArray():
+                        // factor than widenArray():
                         if (DO_STATS) {
-                            rdrCtx.stats.stat_array_renderer_aux_edgePtrs
-                                .add(ptrEnd);
+                            rdrCtx.stats.stat_array_renderer_aux_edgePtrs.add(ptrEnd);
                         }
                         this.aux_edgePtrs = _aux_edgePtrs
-                            = rdrCtx.getDirtyIntArray(
-                                ArrayCache.getNewSize(numCrossings, ptrEnd)
+                            = aux_edgePtrs_ref.getArray(
+                                ArrayCacheConst.getNewSize(numCrossings, ptrEnd)
                             );
                     }
 
@@ -933,26 +914,24 @@
 
                     if (crossingsLen < numCrossings) {
                         // Get larger array:
-                        if (_crossings != crossings_initial) {
-                            rdrCtx.putDirtyIntArray(_crossings);
-                        }
+                        crossings_ref.putArray(_crossings);
+
                         if (DO_STATS) {
                             rdrCtx.stats.stat_array_renderer_crossings
                                 .add(numCrossings);
                         }
                         this.crossings = _crossings
-                            = rdrCtx.getDirtyIntArray(numCrossings);
+                            = crossings_ref.getArray(numCrossings);
 
                         // Get larger auxiliary storage:
-                        if (_aux_crossings != aux_crossings_initial) {
-                            rdrCtx.putDirtyIntArray(_aux_crossings);
-                        }
+                        aux_crossings_ref.putArray(_aux_crossings);
+
                         if (DO_STATS) {
                             rdrCtx.stats.stat_array_renderer_aux_crossings
                                 .add(numCrossings);
                         }
                         this.aux_crossings = _aux_crossings
-                            = rdrCtx.getDirtyIntArray(numCrossings);
+                            = aux_crossings_ref.getArray(numCrossings);
 
                         crossingsLen = _crossings.length;
                     }
@@ -973,10 +952,8 @@
                  */
                 if ((ptrLen < 10) || (numCrossings < 40)) {
                     if (DO_STATS) {
-                        rdrCtx.stats.hist_rdr_crossings
-                            .add(numCrossings);
-                        rdrCtx.stats.hist_rdr_crossings_adds
-                            .add(ptrLen);
+                        rdrCtx.stats.hist_rdr_crossings.add(numCrossings);
+                        rdrCtx.stats.hist_rdr_crossings_adds.add(ptrLen);
                     }
 
                     /*
@@ -1019,23 +996,20 @@
                         _unsafe.putInt(addr + _OFF_ERROR, (err & _ERR_STEP_MAX));
 
                         if (DO_STATS) {
-                            rdrCtx.stats.stat_rdr_crossings_updates
-                                .add(numCrossings);
+                            rdrCtx.stats.stat_rdr_crossings_updates.add(numCrossings);
                         }
 
                         // insertion sort of crossings:
                         if (cross < lastCross) {
                             if (DO_STATS) {
-                                rdrCtx.stats.stat_rdr_crossings_sorts
-                                    .add(i);
+                                rdrCtx.stats.stat_rdr_crossings_sorts.add(i);
                             }
 
                             /* use binary search for newly added edges
                                in crossings if arrays are large enough */
                             if (useBinarySearch && (i >= prevNumCrossings)) {
                                 if (DO_STATS) {
-                                    rdrCtx.stats.
-                                        stat_rdr_crossings_bsearch.add(i);
+                                    rdrCtx.stats.stat_rdr_crossings_bsearch.add(i);
                                 }
                                 low = 0;
                                 high = i - 1;
@@ -1078,14 +1052,11 @@
                     }
                 } else {
                     if (DO_STATS) {
-                        rdrCtx.stats.stat_rdr_crossings_msorts
-                            .add(numCrossings);
+                        rdrCtx.stats.stat_rdr_crossings_msorts.add(numCrossings);
                         rdrCtx.stats.hist_rdr_crossings_ratio
                             .add((1000 * ptrLen) / numCrossings);
-                        rdrCtx.stats.hist_rdr_crossings_msorts
-                            .add(numCrossings);
-                        rdrCtx.stats.hist_rdr_crossings_msorts_adds
-                            .add(ptrLen);
+                        rdrCtx.stats.hist_rdr_crossings_msorts.add(numCrossings);
+                        rdrCtx.stats.hist_rdr_crossings_msorts_adds.add(ptrLen);
                     }
 
                     // Copy sorted data in auxiliary arrays
@@ -1125,8 +1096,7 @@
                         _unsafe.putInt(addr + _OFF_ERROR, (err & _ERR_STEP_MAX));
 
                         if (DO_STATS) {
-                            rdrCtx.stats.stat_rdr_crossings_updates
-                                .add(numCrossings);
+                            rdrCtx.stats.stat_rdr_crossings_updates.add(numCrossings);
                         }
 
                         if (i >= prevNumCrossings) {
@@ -1136,8 +1106,7 @@
 
                         } else if (cross < lastCross) {
                             if (DO_STATS) {
-                                rdrCtx.stats.stat_rdr_crossings_sorts
-                                    .add(i);
+                                rdrCtx.stats.stat_rdr_crossings_sorts.add(i);
                             }
 
                             // (straight) insertion sort of crossings:
@@ -1462,7 +1431,7 @@
                 // note: +2 to ensure enough space left at end
                 final int nxTiles = ((pmaxX - pminX) >> TILE_SIZE_LG) + 2;
                 if (nxTiles > INITIAL_ARRAY) {
-                    blkFlags = rdrCtx.getIntArray(nxTiles);
+                    blkFlags = blkFlags_ref.getArray(nxTiles);
                 }
             }
         }
@@ -1494,10 +1463,9 @@
         // Useful when processing tile line by tile line
         if (width > INITIAL_AA_ARRAY) {
             if (DO_STATS) {
-                rdrCtx.stats.stat_array_renderer_alphaline
-                    .add(width);
+                rdrCtx.stats.stat_array_renderer_alphaline.add(width);
             }
-            alphaLine = rdrCtx.getIntArray(width);
+            alphaLine = alphaLine_ref.getArray(width);
         }
 
         // process first tile line:
@@ -1532,13 +1500,6 @@
         }
     }
 
-    private boolean enableBlkFlags = false;
-    private boolean prevUseBlkFlags = false;
-
-    private final int[] blkFlags_initial = new int[INITIAL_ARRAY]; // 1 tile line
-    /* block flags (0|1) */
-    private int[] blkFlags = blkFlags_initial;
-
     void copyAARow(final int[] alphaRow,
                    final int pix_y, final int pix_from, final int pix_to,
                    final boolean useBlockFlags)
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/RendererContext.java	Wed Aug 03 14:49:01 2016 +0530
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/RendererContext.java	Wed Aug 03 22:53:38 2016 +0200
@@ -29,10 +29,8 @@
 import java.lang.ref.WeakReference;
 import java.util.concurrent.atomic.AtomicInteger;
 import sun.java2d.ReentrantContext;
-import sun.java2d.ReentrantContextProvider;
-import static sun.java2d.marlin.ArrayCache.*;
+import sun.java2d.marlin.ArrayCacheConst.CacheStats;
 import sun.java2d.marlin.MarlinRenderingEngine.NormalizingPathIterator;
-import static sun.java2d.marlin.MarlinUtils.logInfo;
 
 /**
  * This class is a renderer context dedicated to a single thread
@@ -41,12 +39,6 @@
 
     // RendererContext creation counter
     private static final AtomicInteger CTX_COUNT = new AtomicInteger(1);
-    // RendererContext statistics
-    final RendererStats stats = (DO_STATS || DO_MONITORS)
-                                       ? RendererStats.getInstance(): null;
-
-    private static final boolean USE_CACHE_HARD_REF = DO_STATS
-        || (MarlinRenderingEngine.REF_TYPE == ReentrantContextProvider.REF_WEAK);
 
     /**
      * Create a new renderer context
@@ -54,25 +46,14 @@
      * @return new RendererContext instance
      */
     static RendererContext createContext() {
-        final RendererContext newCtx = new RendererContext("ctx"
-                    + Integer.toString(CTX_COUNT.getAndIncrement()));
-
-        if (DO_STATS || DO_MONITORS) {
-            RendererStats.ALL_CONTEXTS.add(newCtx);
-        }
-        return newCtx;
+        return new RendererContext("ctx"
+                       + Integer.toString(CTX_COUNT.getAndIncrement()));
     }
 
-    // context name (debugging purposes)
-    final String name;
     // Smallest object used as Cleaner's parent reference
-    final Object cleanerObj = new Object();
+    private final Object cleanerObj;
     // dirty flag indicating an exception occured during pipeline in pathTo()
     boolean dirty = false;
-    // dynamic array caches kept using weak reference (low memory footprint)
-    WeakReference<ArrayCachesHolder> refArrayCaches = null;
-    // hard reference to array caches (for statistics)
-    ArrayCachesHolder hardRefArrayCaches = null;
     // shared data
     final float[] float6 = new float[6];
     // shared curve (dirty) (Renderer / Stroker)
@@ -83,8 +64,8 @@
     final NormalizingPathIterator nPQPathIterator;
     // MarlinRenderingEngine.TransformingPathConsumer2D
     final TransformingPathConsumer2D transformerPC2D;
-    // recycled Path2D instance
-    Path2D.Float p2d = null;
+    // recycled Path2D instance (weak)
+    private WeakReference<Path2D.Float> refPath2D = null;
     final Renderer renderer;
     final Stroker stroker;
     // Simplifies out collinear lines
@@ -95,6 +76,19 @@
     // flag indicating the shape is stroked (1) or filled (0)
     int stroking = 0;
 
+    // Array caches:
+    /* clean int[] cache (zero-filled) = 5 refs */
+    private final IntArrayCache cleanIntCache = new IntArrayCache(true, 5);
+    /* dirty int[] cache = 4 refs */
+    private final IntArrayCache dirtyIntCache = new IntArrayCache(false, 4);
+    /* dirty float[] cache = 3 refs */
+    private final FloatArrayCache dirtyFloatCache = new FloatArrayCache(false, 3);
+    /* dirty byte[] cache = 1 ref */
+    private final ByteArrayCache dirtyByteCache = new ByteArrayCache(false, 1);
+
+    // RendererContext statistics
+    final RendererStats stats;
+
     /**
      * Constructor
      *
@@ -104,8 +98,18 @@
         if (LOG_CREATE_CONTEXT) {
             MarlinUtils.logInfo("new RendererContext = " + name);
         }
+        this.cleanerObj = new Object();
 
-        this.name = name;
+        // create first stats (needed by newOffHeapArray):
+        if (DO_STATS || DO_MONITORS) {
+            stats = RendererStats.createInstance(cleanerObj, name);
+            // push cache stats:
+            stats.cacheStats = new CacheStats[] { cleanIntCache.stats,
+                dirtyIntCache.stats, dirtyFloatCache.stats, dirtyByteCache.stats
+            };
+        } else {
+            stats = null;
+        }
 
         // NormalizingPathIterator instances:
         nPCPathIterator = new NormalizingPathIterator.NearestPixelCenter(float6);
@@ -128,11 +132,13 @@
      * clean up before reusing this context
      */
     void dispose() {
+        if (DO_STATS) {
+            if (stats.totalOffHeap > stats.totalOffHeapMax) {
+                stats.totalOffHeapMax = stats.totalOffHeap;
+            }
+            stats.totalOffHeap = 0L;
+        }
         stroking = 0;
-        // reset hard reference to array caches if needed:
-        if (!USE_CACHE_HARD_REF) {
-            hardRefArrayCaches = null;
-        }
         // if context is maked as DIRTY:
         if (dirty) {
             // may happen if an exception if thrown in the pipeline processing:
@@ -151,300 +157,43 @@
         }
     }
 
-    // Array caches
-    ArrayCachesHolder getArrayCachesHolder() {
-        // Use hard reference first (cached resolved weak reference):
-        ArrayCachesHolder holder = hardRefArrayCaches;
-        if (holder == null) {
-            // resolve reference:
-            holder = (refArrayCaches != null)
-                     ? refArrayCaches.get()
-                     : null;
-            // create a new ArrayCachesHolder if none is available
-            if (holder == null) {
-                if (LOG_CREATE_CONTEXT) {
-                    MarlinUtils.logInfo("new ArrayCachesHolder for "
-                                        + "RendererContext = " + name);
-                }
-
-                holder = new ArrayCachesHolder();
-
-                if (USE_CACHE_HARD_REF) {
-                    // update hard reference:
-                    hardRefArrayCaches = holder;
-                } else {
-                    // update weak reference:
-                    refArrayCaches = new WeakReference<ArrayCachesHolder>(holder);
-                }
-            }
-        }
-        return holder;
-    }
+    Path2D.Float getPath2D() {
+        // resolve reference:
+        Path2D.Float p2d
+            = (refPath2D != null) ? refPath2D.get() : null;
 
-    // dirty byte array cache
-    ByteArrayCache getDirtyByteArrayCache(final int length) {
-        final int bucket = ArrayCache.getBucketDirtyBytes(length);
-        return getArrayCachesHolder().dirtyByteArrayCaches[bucket];
-    }
-
-    byte[] getDirtyByteArray(final int length) {
-        if (length <= MAX_DIRTY_BYTE_ARRAY_SIZE) {
-            return getDirtyByteArrayCache(length).getArray();
-        }
-
-        if (DO_STATS) {
-            incOversize();
-        }
-
-        if (DO_LOG_OVERSIZE) {
-            logInfo("getDirtyByteArray[oversize]: length=\t" + length);
-        }
+        // create a new Path2D ?
+        if (p2d == null) {
+            p2d = new Path2D.Float(Path2D.WIND_NON_ZERO, INITIAL_EDGES_COUNT); // 32K
 
-        return new byte[length];
-    }
-
-    void putDirtyByteArray(final byte[] array) {
-        final int length = array.length;
-        // odd sized array are non-cached arrays (initial arrays)
-        // ensure to never store initial arrays in cache:
-        if (((length & 0x1) == 0) && (length <= MAX_DIRTY_BYTE_ARRAY_SIZE)) {
-            getDirtyByteArrayCache(length).putDirtyArray(array, length);
-        }
-    }
-
-    byte[] widenDirtyByteArray(final byte[] in,
-                               final int usedSize, final int needSize)
-    {
-        final int length = in.length;
-        if (DO_CHECKS && length >= needSize) {
-            return in;
+            // update weak reference:
+            refPath2D = new WeakReference<Path2D.Float>(p2d);
         }
-        if (DO_STATS) {
-            incResizeDirtyByte();
-        }
-
-        // maybe change bucket:
-        // ensure getNewSize() > newSize:
-        final byte[] res = getDirtyByteArray(getNewSize(usedSize, needSize));
-
-        System.arraycopy(in, 0, res, 0, usedSize); // copy only used elements
-
-        // maybe return current array:
-        // NO clean-up of array data = DIRTY ARRAY
-        putDirtyByteArray(in);
-
-        if (DO_LOG_WIDEN_ARRAY) {
-            logInfo("widenDirtyByteArray[" + res.length + "]: usedSize=\t"
-                    + usedSize + "\tlength=\t" + length + "\tneeded length=\t"
-                    + needSize);
-        }
-        return res;
-    }
-
-    // int array cache
-    IntArrayCache getIntArrayCache(final int length) {
-        final int bucket = ArrayCache.getBucket(length);
-        return getArrayCachesHolder().intArrayCaches[bucket];
-    }
-
-    int[] getIntArray(final int length) {
-        if (length <= MAX_ARRAY_SIZE) {
-            return getIntArrayCache(length).getArray();
-        }
-
-        if (DO_STATS) {
-            incOversize();
-        }
-
-        if (DO_LOG_OVERSIZE) {
-            logInfo("getIntArray[oversize]: length=\t" + length);
-        }
-
-        return new int[length];
-    }
-
-    // unused
-    int[] widenIntArray(final int[] in, final int usedSize,
-                        final int needSize, final int clearTo)
-    {
-        final int length = in.length;
-        if (DO_CHECKS && length >= needSize) {
-            return in;
-        }
-        if (DO_STATS) {
-            incResizeInt();
-        }
-
-        // maybe change bucket:
-        // ensure getNewSize() > newSize:
-        final int[] res = getIntArray(getNewSize(usedSize, needSize));
-
-        System.arraycopy(in, 0, res, 0, usedSize); // copy only used elements
-
-        // maybe return current array:
-        putIntArray(in, 0, clearTo); // ensure all array is cleared (grow-reduce algo)
-
-        if (DO_LOG_WIDEN_ARRAY) {
-            logInfo("widenIntArray[" + res.length + "]: usedSize=\t"
-                    + usedSize + "\tlength=\t" + length + "\tneeded length=\t"
-                    + needSize);
-        }
-        return res;
+        // reset the path anyway:
+        p2d.reset();
+        return p2d;
     }
 
-    void putIntArray(final int[] array, final int fromIndex,
-                     final int toIndex)
-    {
-        final int length = array.length;
-        // odd sized array are non-cached arrays (initial arrays)
-        // ensure to never store initial arrays in cache:
-        if (((length & 0x1) == 0) && (length <= MAX_ARRAY_SIZE)) {
-            getIntArrayCache(length).putArray(array, length, fromIndex, toIndex);
+    OffHeapArray newOffHeapArray(final long initialSize) {
+        if (DO_STATS) {
+            stats.totalOffHeapInitial += initialSize;
         }
-    }
-
-    // dirty int array cache
-    IntArrayCache getDirtyIntArrayCache(final int length) {
-        final int bucket = ArrayCache.getBucket(length);
-        return getArrayCachesHolder().dirtyIntArrayCaches[bucket];
-    }
-
-    int[] getDirtyIntArray(final int length) {
-        if (length <= MAX_ARRAY_SIZE) {
-            return getDirtyIntArrayCache(length).getArray();
-        }
-
-        if (DO_STATS) {
-            incOversize();
-        }
-
-        if (DO_LOG_OVERSIZE) {
-            logInfo("getDirtyIntArray[oversize]: length=\t" + length);
-        }
-
-        return new int[length];
+        return new OffHeapArray(cleanerObj, initialSize);
     }
 
-    int[] widenDirtyIntArray(final int[] in,
-                             final int usedSize, final int needSize)
-    {
-        final int length = in.length;
-        if (DO_CHECKS && length >= needSize) {
-            return in;
-        }
-        if (DO_STATS) {
-            incResizeDirtyInt();
-        }
-
-        // maybe change bucket:
-        // ensure getNewSize() > newSize:
-        final int[] res = getDirtyIntArray(getNewSize(usedSize, needSize));
-
-        System.arraycopy(in, 0, res, 0, usedSize); // copy only used elements
-
-        // maybe return current array:
-        // NO clean-up of array data = DIRTY ARRAY
-        putDirtyIntArray(in);
-
-        if (DO_LOG_WIDEN_ARRAY) {
-            logInfo("widenDirtyIntArray[" + res.length + "]: usedSize=\t"
-                    + usedSize + "\tlength=\t" + length + "\tneeded length=\t"
-                    + needSize);
-        }
-        return res;
-    }
-
-    void putDirtyIntArray(final int[] array) {
-        final int length = array.length;
-        // odd sized array are non-cached arrays (initial arrays)
-        // ensure to never store initial arrays in cache:
-        if (((length & 0x1) == 0) && (length <= MAX_ARRAY_SIZE)) {
-            getDirtyIntArrayCache(length).putDirtyArray(array, length);
-        }
-    }
-
-    // dirty float array cache
-    FloatArrayCache getDirtyFloatArrayCache(final int length) {
-        final int bucket = ArrayCache.getBucket(length);
-        return getArrayCachesHolder().dirtyFloatArrayCaches[bucket];
+    IntArrayCache.Reference newCleanIntArrayRef(final int initialSize) {
+        return cleanIntCache.createRef(initialSize);
     }
 
-    float[] getDirtyFloatArray(final int length) {
-        if (length <= MAX_ARRAY_SIZE) {
-            return getDirtyFloatArrayCache(length).getArray();
-        }
-
-        if (DO_STATS) {
-            incOversize();
-        }
-
-        if (DO_LOG_OVERSIZE) {
-            logInfo("getDirtyFloatArray[oversize]: length=\t" + length);
-        }
-
-        return new float[length];
+    IntArrayCache.Reference newDirtyIntArrayRef(final int initialSize) {
+        return dirtyIntCache.createRef(initialSize);
     }
 
-    float[] widenDirtyFloatArray(final float[] in,
-                                 final int usedSize, final int needSize)
-    {
-        final int length = in.length;
-        if (DO_CHECKS && length >= needSize) {
-            return in;
-        }
-        if (DO_STATS) {
-            incResizeDirtyFloat();
-        }
-
-        // maybe change bucket:
-        // ensure getNewSize() > newSize:
-        final float[] res = getDirtyFloatArray(getNewSize(usedSize, needSize));
-
-        System.arraycopy(in, 0, res, 0, usedSize); // copy only used elements
-
-        // maybe return current array:
-        // NO clean-up of array data = DIRTY ARRAY
-        putDirtyFloatArray(in);
-
-        if (DO_LOG_WIDEN_ARRAY) {
-            logInfo("widenDirtyFloatArray[" + res.length + "]: usedSize=\t"
-                    + usedSize + "\tlength=\t" + length + "\tneeded length=\t"
-                    + needSize);
-        }
-        return res;
+    FloatArrayCache.Reference newDirtyFloatArrayRef(final int initialSize) {
+        return dirtyFloatCache.createRef(initialSize);
     }
 
-    void putDirtyFloatArray(final float[] array) {
-        final int length = array.length;
-        // odd sized array are non-cached arrays (initial arrays)
-        // ensure to never store initial arrays in cache:
-        if (((length & 0x1) == 0) && (length <= MAX_ARRAY_SIZE)) {
-            getDirtyFloatArrayCache(length).putDirtyArray(array, length);
-        }
-    }
-
-    /* class holding all array cache instances */
-    static final class ArrayCachesHolder {
-        // zero-filled int array cache:
-        final IntArrayCache[] intArrayCaches;
-        // dirty array caches:
-        final IntArrayCache[] dirtyIntArrayCaches;
-        final FloatArrayCache[] dirtyFloatArrayCaches;
-        final ByteArrayCache[] dirtyByteArrayCaches;
-
-        ArrayCachesHolder() {
-            intArrayCaches = new IntArrayCache[BUCKETS];
-            dirtyIntArrayCaches = new IntArrayCache[BUCKETS];
-            dirtyFloatArrayCaches = new FloatArrayCache[BUCKETS];
-            dirtyByteArrayCaches = new ByteArrayCache[BUCKETS];
-
-            for (int i = 0; i < BUCKETS; i++) {
-                intArrayCaches[i] = new IntArrayCache(ARRAY_SIZES[i]);
-                // dirty array caches:
-                dirtyIntArrayCaches[i] = new IntArrayCache(ARRAY_SIZES[i]);
-                dirtyFloatArrayCaches[i] = new FloatArrayCache(ARRAY_SIZES[i]);
-                dirtyByteArrayCaches[i] = new ByteArrayCache(DIRTY_BYTE_ARRAY_SIZES[i]);
-            }
-        }
+    ByteArrayCache.Reference newDirtyByteArrayRef(final int initialSize) {
+        return dirtyByteCache.createRef(initialSize);
     }
 }
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/RendererStats.java	Wed Aug 03 14:49:01 2016 +0530
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/RendererStats.java	Wed Aug 03 22:53:38 2016 +0200
@@ -30,6 +30,8 @@
 import java.util.Timer;
 import java.util.TimerTask;
 import java.util.concurrent.ConcurrentLinkedQueue;
+import jdk.internal.ref.CleanerFactory;
+import sun.java2d.marlin.ArrayCacheConst.CacheStats;
 import static sun.java2d.marlin.MarlinUtils.logInfo;
 import sun.java2d.marlin.stats.Histogram;
 import sun.java2d.marlin.stats.Monitor;
@@ -41,26 +43,22 @@
  */
 public final class RendererStats implements MarlinConst {
 
-    // singleton
-    private static volatile RendererStats SINGLETON = null;
+    static RendererStats createInstance(final Object parent, final String name)
+    {
+        final RendererStats stats = new RendererStats(name);
 
-    static RendererStats getInstance() {
-        if (SINGLETON == null) {
-            SINGLETON = new RendererStats();
-        }
-        return SINGLETON;
+        // Keep a strong reference to dump it later:
+        RendererStatsHolder.getInstance().add(parent, stats);
+
+        return stats;
     }
 
     public static void dumpStats() {
-        if (SINGLETON != null) {
-            SINGLETON.dump();
-        }
+        RendererStatsHolder.dumpStats();
     }
 
-    /* RendererContext collection as hard references
-       (only used for debugging purposes) */
-    static final ConcurrentLinkedQueue<RendererContext> ALL_CONTEXTS
-        = new ConcurrentLinkedQueue<RendererContext>();
+    // context name (debugging purposes)
+    final String name;
     // stats
     final StatLong stat_cache_rowAA
         = new StatLong("cache.rowAA");
@@ -118,7 +116,7 @@
     final StatLong stat_array_stroker_polystack_curveTypes
         = new StatLong("array.stroker.polystack.curveTypes.d_byte");
     final StatLong stat_array_marlincache_rowAAChunk
-        = new StatLong("array.marlincache.rowAAChunk.d_byte");
+        = new StatLong("array.marlincache.rowAAChunk.resize");
     final StatLong stat_array_marlincache_touchedTile
         = new StatLong("array.marlincache.touchedTile.int");
     final StatLong stat_array_renderer_alphaline
@@ -136,6 +134,10 @@
     final StatLong stat_array_renderer_aux_edgePtrs
         = new StatLong("array.renderer.aux_edgePtrs.int");
     // histograms
+    final Histogram hist_rdr_edges_count
+        = new Histogram("renderer.edges.count");
+    final Histogram hist_rdr_poly_stack_curves
+        = new Histogram("renderer.polystack.curves");
     final Histogram hist_rdr_crossings
         = new Histogram("renderer.crossings");
     final Histogram hist_rdr_crossings_ratio
@@ -181,6 +183,8 @@
         stat_rdr_crossings_sorts,
         stat_rdr_crossings_bsearch,
         stat_rdr_crossings_msorts,
+        hist_rdr_edges_count,
+        hist_rdr_poly_stack_curves,
         hist_rdr_crossings,
         hist_rdr_crossings_ratio,
         hist_rdr_crossings_adds,
@@ -233,95 +237,145 @@
         mon_ptg_getAlpha,
         mon_debug
     };
-
-    private RendererStats() {
-        super();
+    // offheap stats
+    long totalOffHeapInitial = 0L;
+     // live accumulator
+    long totalOffHeap = 0L;
+    long totalOffHeapMax = 0L;
+    // cache stats
+    CacheStats[] cacheStats = null;
 
-        AccessController.doPrivileged(
-            (PrivilegedAction<Void>) () -> {
-                final Thread hook = new Thread(
-                    ThreadGroupUtils.getRootThreadGroup(),
-                    new Runnable() {
-                        @Override
-                        public void run() {
-                            dump();
-                        }
-                    },
-                    "MarlinStatsHook"
-                );
-                hook.setContextClassLoader(null);
-                Runtime.getRuntime().addShutdownHook(hook);
-
-                if (USE_DUMP_THREAD) {
-                    final Timer statTimer = new Timer("RendererStats");
-                    statTimer.scheduleAtFixedRate(new TimerTask() {
-                        @Override
-                        public void run() {
-                            dump();
-                        }
-                    }, DUMP_INTERVAL, DUMP_INTERVAL);
-                }
-                return null;
-            }
-        );
+    private RendererStats(final String name) {
+        this.name = name;
     }
 
     void dump() {
-        if (DO_STATS) {
-            ArrayCache.dumpStats();
-        }
-        for (RendererContext rdrCtx : ALL_CONTEXTS) {
-            logInfo("RendererContext: " + rdrCtx.name);
+        logInfo("RendererContext: " + name);
 
-            if (DO_MONITORS) {
+        if (DO_MONITORS) {
+            for (Monitor monitor : monitors) {
+                if (monitor.count != 0) {
+                    logInfo(monitor.toString());
+                }
+            }
+            // As getAATileGenerator percents:
+            final long total = mon_pre_getAATileGenerator.sum;
+            if (total != 0L) {
                 for (Monitor monitor : monitors) {
-                    if (monitor.count != 0) {
-                        logInfo(monitor.toString());
-                    }
+                    logInfo(monitor.name + " : "
+                            + ((100d * monitor.sum) / total) + " %");
                 }
-                // As getAATileGenerator percents:
-                final long total = mon_pre_getAATileGenerator.sum;
-                if (total != 0L) {
-                    for (Monitor monitor : monitors) {
-                        logInfo(monitor.name + " : "
-                                + ((100d * monitor.sum) / total) + " %");
-                    }
+            }
+            if (DO_FLUSH_MONITORS) {
+                for (Monitor m : monitors) {
+                    m.reset();
                 }
-                if (DO_FLUSH_MONITORS) {
-                    for (Monitor m : monitors) {
-                        m.reset();
+            }
+        }
+
+        if (DO_STATS) {
+            for (StatLong stat : statistics) {
+                if (stat.count != 0) {
+                    logInfo(stat.toString());
+                    if (DO_FLUSH_STATS) {
+                        stat.reset();
                     }
                 }
             }
 
-            if (DO_STATS) {
-                for (StatLong stat : statistics) {
-                    if (stat.count != 0) {
-                        logInfo(stat.toString());
+            logInfo("OffHeap footprint: initial: " + totalOffHeapInitial
+                + " bytes - max: " + totalOffHeapMax + " bytes");
+            if (DO_FLUSH_STATS) {
+                totalOffHeapMax = 0L;
+            }
+
+            logInfo("Array caches for RendererContext: " + name);
+
+            long totalInitialBytes = totalOffHeapInitial;
+            long totalCacheBytes   = 0L;
+
+            if (cacheStats != null) {
+                for (CacheStats stat : cacheStats) {
+                    totalCacheBytes   += stat.dumpStats();
+                    totalInitialBytes += stat.getTotalInitialBytes();
+                    if (DO_FLUSH_STATS) {
                         stat.reset();
                     }
                 }
-                // IntArrayCaches stats:
-                final RendererContext.ArrayCachesHolder holder
-                    = rdrCtx.getArrayCachesHolder();
+            }
+            logInfo("Heap footprint: initial: " + totalInitialBytes
+                    + " bytes - cache: " + totalCacheBytes + " bytes");
+        }
+    }
+
+    static final class RendererStatsHolder {
+
+        // singleton
+        private static volatile RendererStatsHolder SINGLETON = null;
 
-                logInfo("Array caches for thread: " + rdrCtx.name);
+        static synchronized RendererStatsHolder getInstance() {
+            if (SINGLETON == null) {
+                SINGLETON = new RendererStatsHolder();
+            }
+            return SINGLETON;
+        }
 
-                for (IntArrayCache cache : holder.intArrayCaches) {
-                    cache.dumpStats();
-                }
+        static void dumpStats() {
+            if (SINGLETON != null) {
+                SINGLETON.dump();
+            }
+        }
+
+        /* RendererStats collection as hard references
+           (only used for debugging purposes) */
+        private final ConcurrentLinkedQueue<RendererStats> allStats
+            = new ConcurrentLinkedQueue<RendererStats>();
 
-                logInfo("Dirty Array caches for thread: " + rdrCtx.name);
+        private RendererStatsHolder() {
+            AccessController.doPrivileged(
+                (PrivilegedAction<Void>) () -> {
+                    final Thread hook = new Thread(
+                        ThreadGroupUtils.getRootThreadGroup(),
+                        new Runnable() {
+                            @Override
+                            public void run() {
+                                dump();
+                            }
+                        },
+                        "MarlinStatsHook"
+                    );
+                    hook.setContextClassLoader(null);
+                    Runtime.getRuntime().addShutdownHook(hook);
 
-                for (IntArrayCache cache : holder.dirtyIntArrayCaches) {
-                    cache.dumpStats();
+                    if (USE_DUMP_THREAD) {
+                        final Timer statTimer = new Timer("RendererStats");
+                        statTimer.scheduleAtFixedRate(new TimerTask() {
+                            @Override
+                            public void run() {
+                                dump();
+                            }
+                        }, DUMP_INTERVAL, DUMP_INTERVAL);
+                    }
+                    return null;
                 }
-                for (FloatArrayCache cache : holder.dirtyFloatArrayCaches) {
-                    cache.dumpStats();
-                }
-                for (ByteArrayCache cache : holder.dirtyByteArrayCaches) {
-                    cache.dumpStats();
-                }
+            );
+        }
+
+        void add(final Object parent, final RendererStats stats) {
+            allStats.add(stats);
+
+            // Register a cleaning function to ensure removing dead entries:
+            CleanerFactory.cleaner().register(parent, () -> remove(stats));
+        }
+
+        void remove(final RendererStats stats) {
+            stats.dump(); // dump anyway
+            allStats.remove(stats);
+        }
+
+        void dump() {
+            for (RendererStats stats : allStats) {
+                stats.dump();
             }
         }
     }
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Stroker.java	Wed Aug 03 14:49:01 2016 +0530
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Stroker.java	Wed Aug 03 22:53:38 2016 +0200
@@ -1205,6 +1205,12 @@
         private static final byte TYPE_QUADTO  = (byte) 1;
         private static final byte TYPE_CUBICTO = (byte) 2;
 
+        // curves capacity = edges count (4096) = half edges x 2 (coords)
+        private static final int INITIAL_CURVES_COUNT = INITIAL_EDGES_COUNT;
+
+        // types capacity = half edges count (2048)
+        private static final int INITIAL_TYPES_COUNT = INITIAL_EDGES_COUNT >> 1;
+
         float[] curves;
         int end;
         byte[] curveTypes;
@@ -1213,10 +1219,10 @@
         // per-thread renderer context
         final RendererContext rdrCtx;
 
-        // per-thread initial arrays (large enough to satisfy most usages: 8192)
-        // +1 to avoid recycling in Helpers.widenArray()
-        private final float[] curves_initial = new float[INITIAL_LARGE_ARRAY + 1]; // 32K
-        private final byte[] curveTypes_initial = new byte[INITIAL_LARGE_ARRAY + 1]; // 8K
+        // curves ref (dirty)
+        final FloatArrayCache.Reference curves_ref;
+        // curveTypes ref (dirty)
+        final ByteArrayCache.Reference curveTypes_ref;
 
         // used marks (stats only)
         int curveTypesUseMark;
@@ -1229,10 +1235,13 @@
         PolyStack(final RendererContext rdrCtx) {
             this.rdrCtx = rdrCtx;
 
-            curves = curves_initial;
-            curveTypes = curveTypes_initial;
+            curves_ref = rdrCtx.newDirtyFloatArrayRef(INITIAL_CURVES_COUNT); // 16K
+            curves     = curves_ref.initial;
+
+            curveTypes_ref = rdrCtx.newDirtyByteArrayRef(INITIAL_TYPES_COUNT); // 2K
+            curveTypes     = curveTypes_ref.initial;
+            numCurves = 0;
             end = 0;
-            numCurves = 0;
 
             if (DO_STATS) {
                 curveTypesUseMark = 0;
@@ -1249,10 +1258,10 @@
             numCurves = 0;
 
             if (DO_STATS) {
-                rdrCtx.stats.stat_rdr_poly_stack_types
-                    .add(curveTypesUseMark);
-                rdrCtx.stats.stat_rdr_poly_stack_curves
-                    .add(curvesUseMark);
+                rdrCtx.stats.stat_rdr_poly_stack_types.add(curveTypesUseMark);
+                rdrCtx.stats.stat_rdr_poly_stack_curves.add(curvesUseMark);
+                rdrCtx.stats.hist_rdr_poly_stack_curves.add(curvesUseMark);
+
                 // reset marks
                 curveTypesUseMark = 0;
                 curvesUseMark = 0;
@@ -1260,15 +1269,8 @@
 
             // Return arrays:
             // curves and curveTypes are kept dirty
-            if (curves != curves_initial) {
-                rdrCtx.putDirtyFloatArray(curves);
-                curves = curves_initial;
-            }
-
-            if (curveTypes != curveTypes_initial) {
-                rdrCtx.putDirtyByteArray(curveTypes);
-                curveTypes = curveTypes_initial;
-            }
+            curves     = curves_ref.putArray(curves);
+            curveTypes = curveTypes_ref.putArray(curveTypes);
         }
 
         private void ensureSpace(final int n) {
@@ -1278,16 +1280,16 @@
                     rdrCtx.stats.stat_array_stroker_polystack_curves
                         .add(end + n);
                 }
-                curves = rdrCtx.widenDirtyFloatArray(curves, end, end + n);
+                curves = curves_ref.widenArray(curves, end, end + n);
             }
             if (curveTypes.length <= numCurves) {
                 if (DO_STATS) {
                     rdrCtx.stats.stat_array_stroker_polystack_curveTypes
                         .add(numCurves + 1);
                 }
-                curveTypes = rdrCtx.widenDirtyByteArray(curveTypes,
-                                                        numCurves,
-                                                        numCurves + 1);
+                curveTypes = curveTypes_ref.widenArray(curveTypes,
+                                                       numCurves,
+                                                       numCurves + 1);
             }
         }
 
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Version.java	Wed Aug 03 14:49:01 2016 +0530
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Version.java	Wed Aug 03 22:53:38 2016 +0200
@@ -27,7 +27,7 @@
 
 public final class Version {
 
-    private static final String VERSION = "marlin-0.7.3.4-Unsafe-OpenJDK";
+    private static final String VERSION = "marlin-0.7.4-Unsafe-OpenJDK";
 
     public static String getVersion() {
         return VERSION;
--- a/jdk/test/sun/java2d/marlin/ArrayCacheSizeTest.java	Wed Aug 03 14:49:01 2016 +0530
+++ b/jdk/test/sun/java2d/marlin/ArrayCacheSizeTest.java	Wed Aug 03 22:53:38 2016 +0200
@@ -21,7 +21,7 @@
  * questions.
  */
 
-import sun.java2d.marlin.ArrayCache;
+import sun.java2d.marlin.ArrayCacheConst;
 
 /*
  * @test
@@ -72,7 +72,7 @@
     private static void testNewSize(final int curSize,
                                     final int needSize) {
 
-        int size = ArrayCache.getNewSize(curSize, needSize);
+        int size = ArrayCacheConst.getNewSize(curSize, needSize);
 
         System.out.println("getNewSize(" + curSize + ", " + needSize
             + ") = " + size);
@@ -118,7 +118,7 @@
     private static void testNewLargeSize(final long curSize,
                                          final long needSize) {
 
-        long size = ArrayCache.getNewLargeSize(curSize, needSize);
+        long size = ArrayCacheConst.getNewLargeSize(curSize, needSize);
 
         System.out.println("getNewLargeSize(" + curSize + ", " + needSize
             + ") = " + size);