8143849: Integrate Marlin renderer per JEP 265
authorlbourges
Mon, 23 Nov 2015 15:02:19 -0800
changeset 34419 14108cfd0823
parent 34418 a947f6b4e0b3
child 34420 e72ce9484d9e
child 34784 43a8becc16f3
8143849: Integrate Marlin renderer per JEP 265 Reviewed-by: flar, prr
jdk/src/java.desktop/share/classes/sun/java2d/marlin/ArrayCache.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/ByteArrayCache.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/CollinearSimplifier.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/Curve.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/Dasher.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/FloatArrayCache.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/FloatMath.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/Helpers.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/IntArrayCache.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinCache.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinConst.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinProperties.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinRenderingEngine.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinTileGenerator.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinUtils.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/MergeSort.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/OffHeapArray.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/Renderer.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/RendererContext.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/RendererStats.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/Stroker.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/TransformingPathConsumer2D.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/Version.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/stats/Histogram.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/stats/Monitor.java
jdk/src/java.desktop/share/classes/sun/java2d/marlin/stats/StatLong.java
jdk/src/java.desktop/share/classes/sun/java2d/pipe/AAShapePipe.java
jdk/src/java.desktop/share/classes/sun/java2d/pipe/RenderingEngine.java
jdk/test/sun/java2d/marlin/CeilAndFloorTests.java
jdk/test/sun/java2d/marlin/CrashTest.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/ArrayCache.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.util.Arrays;
+import static sun.java2d.marlin.MarlinUtils.logInfo;
+
+public final class ArrayCache implements MarlinConst {
+
+    static final int BUCKETS = 4;
+    static final int MIN_ARRAY_SIZE = 4096;
+    static final int MAX_ARRAY_SIZE;
+    static final int MASK_CLR_1 = ~1;
+    // threshold to grow arrays only by (3/2) instead of 2
+    static final int THRESHOLD_ARRAY_SIZE;
+    static final int[] ARRAY_SIZES = new int[BUCKETS];
+    // dirty byte array sizes
+    static final int MIN_DIRTY_BYTE_ARRAY_SIZE = 32 * 2048; // 32px x 2048px
+    static final int MAX_DIRTY_BYTE_ARRAY_SIZE;
+    static final int[] DIRTY_BYTE_ARRAY_SIZES = new int[BUCKETS];
+    // large array thresholds:
+    static final long THRESHOLD_LARGE_ARRAY_SIZE;
+    static final long THRESHOLD_HUGE_ARRAY_SIZE;
+    // stats
+    private static int resizeInt = 0;
+    private static int resizeDirtyInt = 0;
+    private static int resizeDirtyFloat = 0;
+    private static int resizeDirtyByte = 0;
+    private static int oversize = 0;
+
+    static {
+        // initialize buckets for int/float arrays
+        int arraySize = MIN_ARRAY_SIZE;
+
+        for (int i = 0; i < BUCKETS; i++, arraySize <<= 2) {
+            ARRAY_SIZES[i] = arraySize;
+
+            if (doTrace) {
+                logInfo("arraySize[" + i + "]: " + arraySize);
+            }
+        }
+        MAX_ARRAY_SIZE = arraySize >> 2;
+
+        /* initialize buckets for dirty byte arrays
+         (large AA chunk = 32 x 2048 pixels) */
+        arraySize = MIN_DIRTY_BYTE_ARRAY_SIZE;
+
+        for (int i = 0; i < BUCKETS; i++, arraySize <<= 1) {
+            DIRTY_BYTE_ARRAY_SIZES[i] = arraySize;
+
+            if (doTrace) {
+                logInfo("dirty arraySize[" + i + "]: " + arraySize);
+            }
+        }
+        MAX_DIRTY_BYTE_ARRAY_SIZE = arraySize >> 1;
+
+        // threshold to grow arrays only by (3/2) instead of 2
+        THRESHOLD_ARRAY_SIZE = Math.max(2 * 1024 * 1024, MAX_ARRAY_SIZE); // 2M
+
+        THRESHOLD_LARGE_ARRAY_SIZE = 8L * THRESHOLD_ARRAY_SIZE; // 16M
+        THRESHOLD_HUGE_ARRAY_SIZE  = 8L * THRESHOLD_LARGE_ARRAY_SIZE; // 128M
+
+        if (doStats || doMonitors) {
+            logInfo("ArrayCache.BUCKETS        = " + BUCKETS);
+            logInfo("ArrayCache.MIN_ARRAY_SIZE = " + MIN_ARRAY_SIZE);
+            logInfo("ArrayCache.MAX_ARRAY_SIZE = " + MAX_ARRAY_SIZE);
+            logInfo("ArrayCache.ARRAY_SIZES = "
+                    + Arrays.toString(ARRAY_SIZES));
+            logInfo("ArrayCache.MIN_DIRTY_BYTE_ARRAY_SIZE = "
+                    + MIN_DIRTY_BYTE_ARRAY_SIZE);
+            logInfo("ArrayCache.MAX_DIRTY_BYTE_ARRAY_SIZE = "
+                    + MAX_DIRTY_BYTE_ARRAY_SIZE);
+            logInfo("ArrayCache.ARRAY_SIZES = "
+                    + Arrays.toString(DIRTY_BYTE_ARRAY_SIZES));
+            logInfo("ArrayCache.THRESHOLD_ARRAY_SIZE = "
+                    + THRESHOLD_ARRAY_SIZE);
+            logInfo("ArrayCache.THRESHOLD_LARGE_ARRAY_SIZE = "
+                    + THRESHOLD_LARGE_ARRAY_SIZE);
+            logInfo("ArrayCache.THRESHOLD_HUGE_ARRAY_SIZE = "
+                    + THRESHOLD_HUGE_ARRAY_SIZE);
+        }
+    }
+
+    private ArrayCache() {
+        // Utility class
+    }
+
+    static synchronized void incResizeInt() {
+        resizeInt++;
+    }
+
+    static synchronized void incResizeDirtyInt() {
+        resizeDirtyInt++;
+    }
+
+    static synchronized void incResizeDirtyFloat() {
+        resizeDirtyFloat++;
+    }
+
+    static synchronized void incResizeDirtyByte() {
+        resizeDirtyByte++;
+    }
+
+    static synchronized void incOversize() {
+        oversize++;
+    }
+
+    static void dumpStats() {
+        if (resizeInt != 0 || resizeDirtyInt != 0 || resizeDirtyFloat != 0
+                || resizeDirtyByte != 0 || oversize != 0) {
+            logInfo("ArrayCache: int resize: " + resizeInt
+                    + " - dirty int resize: " + resizeDirtyInt
+                    + " - dirty float resize: " + resizeDirtyFloat
+                    + " - dirty byte resize: " + resizeDirtyByte
+                    + " - oversize: " + oversize);
+        }
+    }
+
+    // small methods used a lot (to be inlined / optimized by hotspot)
+
+    static int getBucket(final int length) {
+        for (int i = 0; i < ARRAY_SIZES.length; i++) {
+            if (length <= ARRAY_SIZES[i]) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    static int getBucketDirtyBytes(final int length) {
+        for (int i = 0; i < DIRTY_BYTE_ARRAY_SIZES.length; i++) {
+            if (length <= DIRTY_BYTE_ARRAY_SIZES[i]) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    /**
+     * Return the new array size (~ x2)
+     * @param curSize current used size
+     * @param needSize needed size
+     * @return new array size
+     */
+    public static int getNewSize(final int curSize, final int needSize) {
+        final int initial = (curSize & MASK_CLR_1);
+        int size;
+        if (initial > THRESHOLD_ARRAY_SIZE) {
+            size = initial + (initial >> 1); // x(3/2)
+        } else {
+            size = (initial) << 1; // x2
+        }
+        // ensure the new size is >= needed size:
+        if (size < needSize) {
+            // align to 4096:
+            size = ((needSize >> 12) + 1) << 12;
+        }
+        return size;
+    }
+
+    /**
+     * Return the new array size (~ x2)
+     * @param curSize current used size
+     * @param needSize needed size
+     * @return new array size
+     */
+    public static long getNewLargeSize(final long curSize, final long needSize) {
+        long size;
+        if (curSize > THRESHOLD_HUGE_ARRAY_SIZE) {
+            size = curSize + (curSize >> 2L); // x(5/4)
+        } else  if (curSize > THRESHOLD_LARGE_ARRAY_SIZE) {
+            size = curSize + (curSize >> 1L); // x(3/2)
+        } else {
+            size = curSize << 1L; // x2
+        }
+        // ensure the new size is >= needed size:
+        if (size < needSize) {
+            // align to 4096:
+            size = ((needSize >> 12) + 1) << 12;
+        }
+        if (size >= Integer.MAX_VALUE) {
+            if (curSize >= Integer.MAX_VALUE) {
+                // hard overflow failure - we can't even accommodate
+                // new items without overflowing
+                throw new ArrayIndexOutOfBoundsException(
+                              "array exceeds maximum capacity !");
+            }
+            // resize to maximum capacity:
+            size = Integer.MAX_VALUE;
+        }
+        return size;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/ByteArrayCache.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.util.ArrayDeque;
+import java.util.Arrays;
+import static sun.java2d.marlin.MarlinUtils.logException;
+import static sun.java2d.marlin.MarlinUtils.logInfo;
+
+final class ByteArrayCache implements MarlinConst {
+
+    private final int arraySize;
+    private final ArrayDeque<byte[]> byteArrays;
+    // stats
+    private int getOp = 0;
+    private int createOp = 0;
+    private int returnOp = 0;
+
+    void dumpStats() {
+        if (getOp > 0) {
+            logInfo("ByteArrayCache[" + arraySize + "]: get: " + getOp
+                    + " created: " + createOp + " - returned: " + returnOp
+                    + " :: cache size: " + byteArrays.size());
+        }
+    }
+
+    ByteArrayCache(final int arraySize) {
+        this.arraySize = arraySize;
+        // small but enough: almost 1 cache line
+        this.byteArrays = new ArrayDeque<byte[]>(6);
+    }
+
+    byte[] getArray() {
+        if (doStats) {
+            getOp++;
+        }
+
+        // use cache:
+        final byte[] array = byteArrays.pollLast();
+        if (array != null) {
+            return array;
+        }
+
+        if (doStats) {
+            createOp++;
+        }
+
+        return new byte[arraySize];
+    }
+
+    void putDirtyArray(final byte[] array, final int length) {
+        if (length != arraySize) {
+            if (doChecks) {
+                System.out.println("ArrayCache: bad length = " + length);
+            }
+            return;
+        }
+        if (doStats) {
+            returnOp++;
+        }
+
+        // NO clean-up of array data = DIRTY ARRAY
+
+        if (doCleanDirty) {
+            // Force zero-fill dirty arrays:
+            Arrays.fill(array, 0, array.length, BYTE_0);
+        }
+
+        // fill cache:
+        byteArrays.addLast(array);
+    }
+
+    void putArray(final byte[] array, final int length,
+                  final int fromIndex, final int toIndex)
+    {
+        if (length != arraySize) {
+            if (doChecks) {
+                System.out.println("ArrayCache: bad length = " + length);
+            }
+            return;
+        }
+        if (doStats) {
+            returnOp++;
+        }
+
+        // clean-up array of dirty part[fromIndex; toIndex[
+        fill(array, fromIndex, toIndex, BYTE_0);
+
+        // fill cache:
+        byteArrays.addLast(array);
+    }
+
+    static void fill(final byte[] array, final int fromIndex,
+                     final int toIndex, final byte value)
+    {
+        // clear array data:
+        /*
+         * Arrays.fill is faster than System.arraycopy(empty array)
+         * or Unsafe.setMemory(byte 0)
+         */
+        if (toIndex != 0) {
+            Arrays.fill(array, fromIndex, toIndex, value);
+        }
+
+        if (doChecks) {
+            check(array, 0, array.length, value);
+        }
+    }
+
+    static void check(final byte[] array, final int fromIndex,
+                      final int toIndex, final byte value)
+    {
+        if (doChecks) {
+            // check zero on full array:
+            for (int i = fromIndex; i < toIndex; i++) {
+                if (array[i] != value) {
+                    logException("Invalid array value at " + i + "\n"
+                            + Arrays.toString(array), new Throwable());
+
+                    // ensure array is correctly filled:
+                    Arrays.fill(array, value);
+
+                    return;
+                }
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/CollinearSimplifier.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import sun.awt.geom.PathConsumer2D;
+
+final class CollinearSimplifier implements PathConsumer2D {
+
+    enum SimplifierState {
+
+        Empty, PreviousPoint, PreviousLine
+    };
+    // slope precision threshold
+    static final float EPS = 1e-4f; // aaime proposed 1e-3f
+
+    PathConsumer2D delegate;
+    SimplifierState state;
+    float px1, py1, px2, py2;
+    float pslope;
+
+    CollinearSimplifier() {
+    }
+
+    public CollinearSimplifier init(PathConsumer2D delegate) {
+        this.delegate = delegate;
+        this.state = SimplifierState.Empty;
+
+        return this; // fluent API
+    }
+
+    @Override
+    public void pathDone() {
+        emitStashedLine();
+        state = SimplifierState.Empty;
+        delegate.pathDone();
+    }
+
+    @Override
+    public void closePath() {
+        emitStashedLine();
+        state = SimplifierState.Empty;
+        delegate.closePath();
+    }
+
+    @Override
+    public long getNativeConsumer() {
+        return 0;
+    }
+
+    @Override
+    public void quadTo(float x1, float y1, float x2, float y2) {
+        emitStashedLine();
+        delegate.quadTo(x1, y1, x2, y2);
+        // final end point:
+        state = SimplifierState.PreviousPoint;
+        px1 = x2;
+        py1 = y2;
+    }
+
+    @Override
+    public void curveTo(float x1, float y1, float x2, float y2,
+                        float x3, float y3) {
+        emitStashedLine();
+        delegate.curveTo(x1, y1, x2, y2, x3, y3);
+        // final end point:
+        state = SimplifierState.PreviousPoint;
+        px1 = x3;
+        py1 = y3;
+    }
+
+    @Override
+    public void moveTo(float x, float y) {
+        emitStashedLine();
+        delegate.moveTo(x, y);
+        state = SimplifierState.PreviousPoint;
+        px1 = x;
+        py1 = y;
+    }
+
+    @Override
+    public void lineTo(final float x, final float y) {
+        switch (state) {
+            case Empty:
+                delegate.lineTo(x, y);
+                state = SimplifierState.PreviousPoint;
+                px1 = x;
+                py1 = y;
+                return;
+
+            case PreviousPoint:
+                state = SimplifierState.PreviousLine;
+                px2 = x;
+                py2 = y;
+                pslope = getSlope(px1, py1, x, y);
+                return;
+
+            case PreviousLine:
+                final float slope = getSlope(px2, py2, x, y);
+                // test for collinearity
+                if ((slope == pslope) || (Math.abs(pslope - slope) < EPS)) {
+                    // merge segments
+                    px2 = x;
+                    py2 = y;
+                    return;
+                }
+                // emit previous segment
+                delegate.lineTo(px2, py2);
+                px1 = px2;
+                py1 = py2;
+                px2 = x;
+                py2 = y;
+                pslope = slope;
+                return;
+            default:
+        }
+    }
+
+    private void emitStashedLine() {
+        if (state == SimplifierState.PreviousLine) {
+            delegate.lineTo(px2, py2);
+        }
+    }
+
+    private static float getSlope(float x1, float y1, float x2, float y2) {
+        float dy = y2 - y1;
+        if (dy == 0f) {
+            return (x2 > x1) ? Float.POSITIVE_INFINITY
+                   : Float.NEGATIVE_INFINITY;
+        }
+        return (x2 - x1) / dy;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Curve.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.util.Iterator;
+
+final class Curve {
+
+    float ax, ay, bx, by, cx, cy, dx, dy;
+    float dax, day, dbx, dby;
+    // shared iterator instance
+    private final BreakPtrIterator iterator = new BreakPtrIterator();
+
+    Curve() {
+    }
+
+    void set(float[] points, int type) {
+        switch(type) {
+        case 8:
+            set(points[0], points[1],
+                points[2], points[3],
+                points[4], points[5],
+                points[6], points[7]);
+            return;
+        case 6:
+            set(points[0], points[1],
+                points[2], points[3],
+                points[4], points[5]);
+            return;
+        default:
+            throw new InternalError("Curves can only be cubic or quadratic");
+        }
+    }
+
+    void set(float x1, float y1,
+             float x2, float y2,
+             float x3, float y3,
+             float x4, float y4)
+    {
+        ax = 3f * (x2 - x3) + x4 - x1;
+        ay = 3f * (y2 - y3) + y4 - y1;
+        bx = 3f * (x1 - 2f * x2 + x3);
+        by = 3f * (y1 - 2f * y2 + y3);
+        cx = 3f * (x2 - x1);
+        cy = 3f * (y2 - y1);
+        dx = x1;
+        dy = y1;
+        dax = 3f * ax; day = 3f * ay;
+        dbx = 2f * bx; dby = 2f * by;
+    }
+
+    void set(float x1, float y1,
+             float x2, float y2,
+             float x3, float y3)
+    {
+        ax = 0f; ay = 0f;
+        bx = x1 - 2f * x2 + x3;
+        by = y1 - 2f * y2 + y3;
+        cx = 2f * (x2 - x1);
+        cy = 2f * (y2 - y1);
+        dx = x1;
+        dy = y1;
+        dax = 0f; day = 0f;
+        dbx = 2f * bx; dby = 2f * by;
+    }
+
+    float xat(float t) {
+        return t * (t * (t * ax + bx) + cx) + dx;
+    }
+    float yat(float t) {
+        return t * (t * (t * ay + by) + cy) + dy;
+    }
+
+    float dxat(float t) {
+        return t * (t * dax + dbx) + cx;
+    }
+
+    float dyat(float t) {
+        return t * (t * day + dby) + cy;
+    }
+
+    int dxRoots(float[] roots, int off) {
+        return Helpers.quadraticRoots(dax, dbx, cx, roots, off);
+    }
+
+    int dyRoots(float[] roots, int off) {
+        return Helpers.quadraticRoots(day, dby, cy, roots, off);
+    }
+
+    int infPoints(float[] pts, int off) {
+        // inflection point at t if -f'(t)x*f''(t)y + f'(t)y*f''(t)x == 0
+        // Fortunately, this turns out to be quadratic, so there are at
+        // most 2 inflection points.
+        final float a = dax * dby - dbx * day;
+        final float b = 2f * (cy * dax - day * cx);
+        final float c = cy * dbx - cx * dby;
+
+        return Helpers.quadraticRoots(a, b, c, pts, off);
+    }
+
+    // finds points where the first and second derivative are
+    // perpendicular. This happens when g(t) = f'(t)*f''(t) == 0 (where
+    // * is a dot product). Unfortunately, we have to solve a cubic.
+    private int perpendiculardfddf(float[] pts, int off) {
+        assert pts.length >= off + 4;
+
+        // these are the coefficients of some multiple of g(t) (not g(t),
+        // because the roots of a polynomial are not changed after multiplication
+        // by a constant, and this way we save a few multiplications).
+        final float a = 2f * (dax*dax + day*day);
+        final float b = 3f * (dax*dbx + day*dby);
+        final float c = 2f * (dax*cx + day*cy) + dbx*dbx + dby*dby;
+        final float d = dbx*cx + dby*cy;
+        return Helpers.cubicRootsInAB(a, b, c, d, pts, off, 0f, 1f);
+    }
+
+    // Tries to find the roots of the function ROC(t)-w in [0, 1). It uses
+    // a variant of the false position algorithm to find the roots. False
+    // position requires that 2 initial values x0,x1 be given, and that the
+    // function must have opposite signs at those values. To find such
+    // values, we need the local extrema of the ROC function, for which we
+    // need the roots of its derivative; however, it's harder to find the
+    // roots of the derivative in this case than it is to find the roots
+    // of the original function. So, we find all points where this curve's
+    // first and second derivative are perpendicular, and we pretend these
+    // are our local extrema. There are at most 3 of these, so we will check
+    // at most 4 sub-intervals of (0,1). ROC has asymptotes at inflection
+    // points, so roc-w can have at least 6 roots. This shouldn't be a
+    // problem for what we're trying to do (draw a nice looking curve).
+    int rootsOfROCMinusW(float[] roots, int off, final float w, final float err) {
+        // no OOB exception, because by now off<=6, and roots.length >= 10
+        assert off <= 6 && roots.length >= 10;
+        int ret = off;
+        int numPerpdfddf = perpendiculardfddf(roots, off);
+        float t0 = 0, ft0 = ROCsq(t0) - w*w;
+        roots[off + numPerpdfddf] = 1f; // always check interval end points
+        numPerpdfddf++;
+        for (int i = off; i < off + numPerpdfddf; i++) {
+            float t1 = roots[i], ft1 = ROCsq(t1) - w*w;
+            if (ft0 == 0f) {
+                roots[ret++] = t0;
+            } else if (ft1 * ft0 < 0f) { // have opposite signs
+                // (ROC(t)^2 == w^2) == (ROC(t) == w) is true because
+                // ROC(t) >= 0 for all t.
+                roots[ret++] = falsePositionROCsqMinusX(t0, t1, w*w, err);
+            }
+            t0 = t1;
+            ft0 = ft1;
+        }
+
+        return ret - off;
+    }
+
+    private static float eliminateInf(float x) {
+        return (x == Float.POSITIVE_INFINITY ? Float.MAX_VALUE :
+            (x == Float.NEGATIVE_INFINITY ? Float.MIN_VALUE : x));
+    }
+
+    // A slight modification of the false position algorithm on wikipedia.
+    // This only works for the ROCsq-x functions. It might be nice to have
+    // the function as an argument, but that would be awkward in java6.
+    // TODO: It is something to consider for java8 (or whenever lambda
+    // expressions make it into the language), depending on how closures
+    // and turn out. Same goes for the newton's method
+    // algorithm in Helpers.java
+    private float falsePositionROCsqMinusX(float x0, float x1,
+                                           final float x, final float err)
+    {
+        final int iterLimit = 100;
+        int side = 0;
+        float t = x1, ft = eliminateInf(ROCsq(t) - x);
+        float s = x0, fs = eliminateInf(ROCsq(s) - x);
+        float r = s, fr;
+        for (int i = 0; i < iterLimit && Math.abs(t - s) > err * Math.abs(t + s); i++) {
+            r = (fs * t - ft * s) / (fs - ft);
+            fr = ROCsq(r) - x;
+            if (sameSign(fr, ft)) {
+                ft = fr; t = r;
+                if (side < 0) {
+                    fs /= (1 << (-side));
+                    side--;
+                } else {
+                    side = -1;
+                }
+            } else if (fr * fs > 0) {
+                fs = fr; s = r;
+                if (side > 0) {
+                    ft /= (1 << side);
+                    side++;
+                } else {
+                    side = 1;
+                }
+            } else {
+                break;
+            }
+        }
+        return r;
+    }
+
+    private static boolean sameSign(float x, float y) {
+        // another way is to test if x*y > 0. This is bad for small x, y.
+        return (x < 0f && y < 0f) || (x > 0f && y > 0f);
+    }
+
+    // returns the radius of curvature squared at t of this curve
+    // see http://en.wikipedia.org/wiki/Radius_of_curvature_(applications)
+    private float ROCsq(final float t) {
+        // dx=xat(t) and dy=yat(t). These calls have been inlined for efficiency
+        final float dx = t * (t * dax + dbx) + cx;
+        final float dy = t * (t * day + dby) + cy;
+        final float ddx = 2f * dax * t + dbx;
+        final float ddy = 2f * day * t + dby;
+        final float dx2dy2 = dx*dx + dy*dy;
+        final float ddx2ddy2 = ddx*ddx + ddy*ddy;
+        final float ddxdxddydy = ddx*dx + ddy*dy;
+        return dx2dy2*((dx2dy2*dx2dy2) / (dx2dy2 * ddx2ddy2 - ddxdxddydy*ddxdxddydy));
+    }
+
+    // curve to be broken should be in pts
+    // this will change the contents of pts but not Ts
+    // TODO: There's no reason for Ts to be an array. All we need is a sequence
+    // of t values at which to subdivide. An array statisfies this condition,
+    // but is unnecessarily restrictive. Ts should be an Iterator<Float> instead.
+    // Doing this will also make dashing easier, since we could easily make
+    // LengthIterator an Iterator<Float> and feed it to this function to simplify
+    // the loop in Dasher.somethingTo.
+    BreakPtrIterator breakPtsAtTs(final float[] pts, final int type,
+                                  final float[] Ts, final int numTs)
+    {
+        assert pts.length >= 2*type && numTs <= Ts.length;
+
+        // initialize shared iterator:
+        iterator.init(pts, type, Ts, numTs);
+
+        return iterator;
+    }
+
+    static final class BreakPtrIterator {
+        private int nextCurveIdx;
+        private int curCurveOff;
+        private float prevT;
+        private float[] pts;
+        private int type;
+        private float[] ts;
+        private int numTs;
+
+        void init(final float[] pts, final int type,
+                  final float[] ts, final int numTs) {
+            this.pts = pts;
+            this.type = type;
+            this.ts = ts;
+            this.numTs = numTs;
+
+            nextCurveIdx = 0;
+            curCurveOff = 0;
+            prevT = 0f;
+        }
+
+        public boolean hasNext() {
+            return nextCurveIdx <= numTs;
+        }
+
+        public int next() {
+            int ret;
+            if (nextCurveIdx < numTs) {
+                float curT = ts[nextCurveIdx];
+                float splitT = (curT - prevT) / (1f - prevT);
+                Helpers.subdivideAt(splitT,
+                                    pts, curCurveOff,
+                                    pts, 0,
+                                    pts, type, type);
+                prevT = curT;
+                ret = 0;
+                curCurveOff = type;
+            } else {
+                ret = curCurveOff;
+            }
+            nextCurveIdx++;
+            return ret;
+        }
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Dasher.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,702 @@
+/*
+ * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.util.Arrays;
+import sun.awt.geom.PathConsumer2D;
+
+/**
+ * The <code>Dasher</code> class takes a series of linear commands
+ * (<code>moveTo</code>, <code>lineTo</code>, <code>close</code> and
+ * <code>end</code>) and breaks them into smaller segments according to a
+ * dash pattern array and a starting dash phase.
+ *
+ * <p> Issues: in J2Se, a zero length dash segment as drawn as a very
+ * short dash, whereas Pisces does not draw anything.  The PostScript
+ * semantics are unclear.
+ *
+ */
+final class Dasher implements sun.awt.geom.PathConsumer2D, MarlinConst {
+
+    static final int recLimit = 4;
+    static final float ERR = 0.01f;
+    static final float minTincrement = 1f / (1 << recLimit);
+
+    private PathConsumer2D out;
+    private float[] dash;
+    private int dashLen;
+    private float startPhase;
+    private boolean startDashOn;
+    private int startIdx;
+
+    private boolean starting;
+    private boolean needsMoveTo;
+
+    private int idx;
+    private boolean dashOn;
+    private float phase;
+
+    private float sx, sy;
+    private float x0, y0;
+
+    // temporary storage for the current curve
+    private final float[] curCurvepts;
+
+    // per-thread renderer context
+    final RendererContext rdrCtx;
+
+    // dashes array (dirty)
+    final float[] dashes_initial = new float[INITIAL_ARRAY];
+
+    // flag to recycle dash array copy
+    boolean recycleDashes;
+
+    // per-thread initial arrays (large enough to satisfy most usages
+    // +1 to avoid recycling in Helpers.widenArray()
+    private final float[] firstSegmentsBuffer_initial = new float[INITIAL_ARRAY + 1];
+
+    /**
+     * Constructs a <code>Dasher</code>.
+     * @param rdrCtx per-thread renderer context
+     */
+    Dasher(final RendererContext rdrCtx) {
+        this.rdrCtx = rdrCtx;
+
+        firstSegmentsBuffer = firstSegmentsBuffer_initial;
+
+        // we need curCurvepts to be able to contain 2 curves because when
+        // dashing curves, we need to subdivide it
+        curCurvepts = new float[8 * 2];
+    }
+
+    /**
+     * Initialize the <code>Dasher</code>.
+     *
+     * @param out an output <code>PathConsumer2D</code>.
+     * @param dash an array of <code>float</code>s containing the dash pattern
+     * @param dashLen length of the given dash array
+     * @param phase a <code>float</code> containing the dash phase
+     * @param recycleDashes true to indicate to recycle the given dash array
+     * @return this instance
+     */
+    Dasher init(final PathConsumer2D out, float[] dash, int dashLen,
+                float phase, boolean recycleDashes)
+    {
+        if (phase < 0f) {
+            throw new IllegalArgumentException("phase < 0 !");
+        }
+        this.out = out;
+
+        // Normalize so 0 <= phase < dash[0]
+        int idx = 0;
+        dashOn = true;
+        float d;
+        while (phase >= (d = dash[idx])) {
+            phase -= d;
+            idx = (idx + 1) % dashLen;
+            dashOn = !dashOn;
+        }
+
+        this.dash = dash;
+        this.dashLen = dashLen;
+        this.startPhase = this.phase = phase;
+        this.startDashOn = dashOn;
+        this.startIdx = idx;
+        this.starting = true;
+        needsMoveTo = false;
+        firstSegidx = 0;
+
+        this.recycleDashes = recycleDashes;
+
+        return this; // fluent API
+    }
+
+    /**
+     * Disposes this dasher:
+     * clean up before reusing this instance
+     */
+    void dispose() {
+        if (doCleanDirty) {
+            // Force zero-fill dirty arrays:
+            Arrays.fill(curCurvepts, 0f);
+            Arrays.fill(firstSegmentsBuffer, 0f);
+        }
+        // Return arrays:
+        if (recycleDashes && dash != dashes_initial) {
+            rdrCtx.putDirtyFloatArray(dash);
+            dash = null;
+        }
+
+        if (firstSegmentsBuffer != firstSegmentsBuffer_initial) {
+            rdrCtx.putDirtyFloatArray(firstSegmentsBuffer);
+            firstSegmentsBuffer = firstSegmentsBuffer_initial;
+        }
+    }
+
+    @Override
+    public void moveTo(float x0, float y0) {
+        if (firstSegidx > 0) {
+            out.moveTo(sx, sy);
+            emitFirstSegments();
+        }
+        needsMoveTo = true;
+        this.idx = startIdx;
+        this.dashOn = this.startDashOn;
+        this.phase = this.startPhase;
+        this.sx = this.x0 = x0;
+        this.sy = this.y0 = y0;
+        this.starting = true;
+    }
+
+    private void emitSeg(float[] buf, int off, int type) {
+        switch (type) {
+        case 8:
+            out.curveTo(buf[off+0], buf[off+1],
+                        buf[off+2], buf[off+3],
+                        buf[off+4], buf[off+5]);
+            return;
+        case 6:
+            out.quadTo(buf[off+0], buf[off+1],
+                       buf[off+2], buf[off+3]);
+            return;
+        case 4:
+            out.lineTo(buf[off], buf[off+1]);
+            return;
+        default:
+        }
+    }
+
+    private void emitFirstSegments() {
+        final float[] fSegBuf = firstSegmentsBuffer;
+
+        for (int i = 0; i < firstSegidx; ) {
+            int type = (int)fSegBuf[i];
+            emitSeg(fSegBuf, i + 1, type);
+            i += (type - 1);
+        }
+        firstSegidx = 0;
+    }
+    // We don't emit the first dash right away. If we did, caps would be
+    // drawn on it, but we need joins to be drawn if there's a closePath()
+    // So, we store the path elements that make up the first dash in the
+    // buffer below.
+    private float[] firstSegmentsBuffer; // dynamic array
+    private int firstSegidx;
+
+    // precondition: pts must be in relative coordinates (relative to x0,y0)
+    // fullCurve is true iff the curve in pts has not been split.
+    private void goTo(float[] pts, int off, final int type) {
+        float x = pts[off + type - 4];
+        float y = pts[off + type - 3];
+        if (dashOn) {
+            if (starting) {
+                int len = type - 2 + 1;
+                int segIdx = firstSegidx;
+                float[] buf = firstSegmentsBuffer;
+                if (segIdx + len  > buf.length) {
+                    if (doStats) {
+                        RendererContext.stats.stat_array_dasher_firstSegmentsBuffer
+                            .add(segIdx + len);
+                    }
+                    firstSegmentsBuffer = buf
+                        = rdrCtx.widenDirtyFloatArray(buf, segIdx, segIdx + len);
+                }
+                buf[segIdx++] = type;
+                len--;
+                // small arraycopy (2, 4 or 6) but with offset:
+                System.arraycopy(pts, off, buf, segIdx, len);
+                segIdx += len;
+                firstSegidx = segIdx;
+            } else {
+                if (needsMoveTo) {
+                    out.moveTo(x0, y0);
+                    needsMoveTo = false;
+                }
+                emitSeg(pts, off, type);
+            }
+        } else {
+            starting = false;
+            needsMoveTo = true;
+        }
+        this.x0 = x;
+        this.y0 = y;
+    }
+
+    @Override
+    public void lineTo(float x1, float y1) {
+        float dx = x1 - x0;
+        float dy = y1 - y0;
+
+        float len = dx*dx + dy*dy;
+        if (len == 0f) {
+            return;
+        }
+        len = (float) Math.sqrt(len);
+
+        // The scaling factors needed to get the dx and dy of the
+        // transformed dash segments.
+        final float cx = dx / len;
+        final float cy = dy / len;
+
+        final float[] _curCurvepts = curCurvepts;
+        final float[] _dash = dash;
+
+        float leftInThisDashSegment;
+        float dashdx, dashdy, p;
+
+        while (true) {
+            leftInThisDashSegment = _dash[idx] - phase;
+
+            if (len <= leftInThisDashSegment) {
+                _curCurvepts[0] = x1;
+                _curCurvepts[1] = y1;
+                goTo(_curCurvepts, 0, 4);
+
+                // Advance phase within current dash segment
+                phase += len;
+                // TODO: compare float values using epsilon:
+                if (len == leftInThisDashSegment) {
+                    phase = 0f;
+                    idx = (idx + 1) % dashLen;
+                    dashOn = !dashOn;
+                }
+                return;
+            }
+
+            dashdx = _dash[idx] * cx;
+            dashdy = _dash[idx] * cy;
+
+            if (phase == 0f) {
+                _curCurvepts[0] = x0 + dashdx;
+                _curCurvepts[1] = y0 + dashdy;
+            } else {
+                p = leftInThisDashSegment / _dash[idx];
+                _curCurvepts[0] = x0 + p * dashdx;
+                _curCurvepts[1] = y0 + p * dashdy;
+            }
+
+            goTo(_curCurvepts, 0, 4);
+
+            len -= leftInThisDashSegment;
+            // Advance to next dash segment
+            idx = (idx + 1) % dashLen;
+            dashOn = !dashOn;
+            phase = 0f;
+        }
+    }
+
+    // shared instance in Dasher
+    private final LengthIterator li = new LengthIterator();
+
+    // preconditions: curCurvepts must be an array of length at least 2 * type,
+    // that contains the curve we want to dash in the first type elements
+    private void somethingTo(int type) {
+        if (pointCurve(curCurvepts, type)) {
+            return;
+        }
+        li.initializeIterationOnCurve(curCurvepts, type);
+
+        // initially the current curve is at curCurvepts[0...type]
+        int curCurveoff = 0;
+        float lastSplitT = 0f;
+        float t;
+        float leftInThisDashSegment = dash[idx] - phase;
+
+        while ((t = li.next(leftInThisDashSegment)) < 1f) {
+            if (t != 0f) {
+                Helpers.subdivideAt((t - lastSplitT) / (1f - lastSplitT),
+                                    curCurvepts, curCurveoff,
+                                    curCurvepts, 0,
+                                    curCurvepts, type, type);
+                lastSplitT = t;
+                goTo(curCurvepts, 2, type);
+                curCurveoff = type;
+            }
+            // Advance to next dash segment
+            idx = (idx + 1) % dashLen;
+            dashOn = !dashOn;
+            phase = 0f;
+            leftInThisDashSegment = dash[idx];
+        }
+        goTo(curCurvepts, curCurveoff+2, type);
+        phase += li.lastSegLen();
+        if (phase >= dash[idx]) {
+            phase = 0f;
+            idx = (idx + 1) % dashLen;
+            dashOn = !dashOn;
+        }
+        // reset LengthIterator:
+        li.reset();
+    }
+
+    private static boolean pointCurve(float[] curve, int type) {
+        for (int i = 2; i < type; i++) {
+            if (curve[i] != curve[i-2]) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    // Objects of this class are used to iterate through curves. They return
+    // t values where the left side of the curve has a specified length.
+    // It does this by subdividing the input curve until a certain error
+    // condition has been met. A recursive subdivision procedure would
+    // return as many as 1<<limit curves, but this is an iterator and we
+    // don't need all the curves all at once, so what we carry out a
+    // lazy inorder traversal of the recursion tree (meaning we only move
+    // through the tree when we need the next subdivided curve). This saves
+    // us a lot of memory because at any one time we only need to store
+    // limit+1 curves - one for each level of the tree + 1.
+    // NOTE: the way we do things here is not enough to traverse a general
+    // tree; however, the trees we are interested in have the property that
+    // every non leaf node has exactly 2 children
+    static final class LengthIterator {
+        private enum Side {LEFT, RIGHT};
+        // Holds the curves at various levels of the recursion. The root
+        // (i.e. the original curve) is at recCurveStack[0] (but then it
+        // gets subdivided, the left half is put at 1, so most of the time
+        // only the right half of the original curve is at 0)
+        private final float[][] recCurveStack; // dirty
+        // sides[i] indicates whether the node at level i+1 in the path from
+        // the root to the current leaf is a left or right child of its parent.
+        private final Side[] sides; // dirty
+        private int curveType;
+        // lastT and nextT delimit the current leaf.
+        private float nextT;
+        private float lenAtNextT;
+        private float lastT;
+        private float lenAtLastT;
+        private float lenAtLastSplit;
+        private float lastSegLen;
+        // the current level in the recursion tree. 0 is the root. limit
+        // is the deepest possible leaf.
+        private int recLevel;
+        private boolean done;
+
+        // the lengths of the lines of the control polygon. Only its first
+        // curveType/2 - 1 elements are valid. This is an optimization. See
+        // next(float) for more detail.
+        private final float[] curLeafCtrlPolyLengths = new float[3];
+
+        LengthIterator() {
+            this.recCurveStack = new float[recLimit + 1][8];
+            this.sides = new Side[recLimit];
+            // if any methods are called without first initializing this object
+            // on a curve, we want it to fail ASAP.
+            this.nextT = Float.MAX_VALUE;
+            this.lenAtNextT = Float.MAX_VALUE;
+            this.lenAtLastSplit = Float.MIN_VALUE;
+            this.recLevel = Integer.MIN_VALUE;
+            this.lastSegLen = Float.MAX_VALUE;
+            this.done = true;
+        }
+
+        /**
+         * Reset this LengthIterator.
+         */
+        void reset() {
+            // keep data dirty
+            // as it appears not useful to reset data:
+            if (doCleanDirty) {
+                final int recLimit = recCurveStack.length - 1;
+                for (int i = recLimit; i >= 0; i--) {
+                    Arrays.fill(recCurveStack[i], 0f);
+                }
+                Arrays.fill(sides, Side.LEFT);
+                Arrays.fill(curLeafCtrlPolyLengths, 0f);
+                Arrays.fill(nextRoots, 0f);
+                Arrays.fill(flatLeafCoefCache, 0f);
+                flatLeafCoefCache[2] = -1f;
+            }
+        }
+
+        void initializeIterationOnCurve(float[] pts, int type) {
+            // optimize arraycopy (8 values faster than 6 = type):
+            System.arraycopy(pts, 0, recCurveStack[0], 0, 8);
+            this.curveType = type;
+            this.recLevel = 0;
+            this.lastT = 0f;
+            this.lenAtLastT = 0f;
+            this.nextT = 0f;
+            this.lenAtNextT = 0f;
+            goLeft(); // initializes nextT and lenAtNextT properly
+            this.lenAtLastSplit = 0f;
+            if (recLevel > 0) {
+                this.sides[0] = Side.LEFT;
+                this.done = false;
+            } else {
+                // the root of the tree is a leaf so we're done.
+                this.sides[0] = Side.RIGHT;
+                this.done = true;
+            }
+            this.lastSegLen = 0f;
+        }
+
+        // 0 == false, 1 == true, -1 == invalid cached value.
+        private int cachedHaveLowAcceleration = -1;
+
+        private boolean haveLowAcceleration(float err) {
+            if (cachedHaveLowAcceleration == -1) {
+                final float len1 = curLeafCtrlPolyLengths[0];
+                final float len2 = curLeafCtrlPolyLengths[1];
+                // the test below is equivalent to !within(len1/len2, 1, err).
+                // It is using a multiplication instead of a division, so it
+                // should be a bit faster.
+                if (!Helpers.within(len1, len2, err*len2)) {
+                    cachedHaveLowAcceleration = 0;
+                    return false;
+                }
+                if (curveType == 8) {
+                    final float len3 = curLeafCtrlPolyLengths[2];
+                    // if len1 is close to 2 and 2 is close to 3, that probably
+                    // means 1 is close to 3 so the second part of this test might
+                    // not be needed, but it doesn't hurt to include it.
+                    final float errLen3 = err * len3;
+                    if (!(Helpers.within(len2, len3, errLen3) &&
+                          Helpers.within(len1, len3, errLen3))) {
+                        cachedHaveLowAcceleration = 0;
+                        return false;
+                    }
+                }
+                cachedHaveLowAcceleration = 1;
+                return true;
+            }
+
+            return (cachedHaveLowAcceleration == 1);
+        }
+
+        // we want to avoid allocations/gc so we keep this array so we
+        // can put roots in it,
+        private final float[] nextRoots = new float[4];
+
+        // caches the coefficients of the current leaf in its flattened
+        // form (see inside next() for what that means). The cache is
+        // invalid when it's third element is negative, since in any
+        // valid flattened curve, this would be >= 0.
+        private final float[] flatLeafCoefCache = new float[]{0f, 0f, -1f, 0f};
+
+        // returns the t value where the remaining curve should be split in
+        // order for the left subdivided curve to have length len. If len
+        // is >= than the length of the uniterated curve, it returns 1.
+        float next(final float len) {
+            final float targetLength = lenAtLastSplit + len;
+            while (lenAtNextT < targetLength) {
+                if (done) {
+                    lastSegLen = lenAtNextT - lenAtLastSplit;
+                    return 1f;
+                }
+                goToNextLeaf();
+            }
+            lenAtLastSplit = targetLength;
+            final float leaflen = lenAtNextT - lenAtLastT;
+            float t = (targetLength - lenAtLastT) / leaflen;
+
+            // cubicRootsInAB is a fairly expensive call, so we just don't do it
+            // if the acceleration in this section of the curve is small enough.
+            if (!haveLowAcceleration(0.05f)) {
+                // We flatten the current leaf along the x axis, so that we're
+                // left with a, b, c which define a 1D Bezier curve. We then
+                // solve this to get the parameter of the original leaf that
+                // gives us the desired length.
+                final float[] _flatLeafCoefCache = flatLeafCoefCache;
+
+                if (_flatLeafCoefCache[2] < 0) {
+                    float x = 0f + curLeafCtrlPolyLengths[0],
+                          y = x  + curLeafCtrlPolyLengths[1];
+                    if (curveType == 8) {
+                        float z = y + curLeafCtrlPolyLengths[2];
+                        _flatLeafCoefCache[0] = 3f * (x - y) + z;
+                        _flatLeafCoefCache[1] = 3f * (y - 2f * x);
+                        _flatLeafCoefCache[2] = 3f * x;
+                        _flatLeafCoefCache[3] = -z;
+                    } else if (curveType == 6) {
+                        _flatLeafCoefCache[0] = 0f;
+                        _flatLeafCoefCache[1] = y - 2f * x;
+                        _flatLeafCoefCache[2] = 2f * x;
+                        _flatLeafCoefCache[3] = -y;
+                    }
+                }
+                float a = _flatLeafCoefCache[0];
+                float b = _flatLeafCoefCache[1];
+                float c = _flatLeafCoefCache[2];
+                float d = t * _flatLeafCoefCache[3];
+
+                // we use cubicRootsInAB here, because we want only roots in 0, 1,
+                // and our quadratic root finder doesn't filter, so it's just a
+                // matter of convenience.
+                int n = Helpers.cubicRootsInAB(a, b, c, d, nextRoots, 0, 0, 1);
+                if (n == 1 && !Float.isNaN(nextRoots[0])) {
+                    t = nextRoots[0];
+                }
+            }
+            // t is relative to the current leaf, so we must make it a valid parameter
+            // of the original curve.
+            t = t * (nextT - lastT) + lastT;
+            if (t >= 1f) {
+                t = 1f;
+                done = true;
+            }
+            // even if done = true, if we're here, that means targetLength
+            // is equal to, or very, very close to the total length of the
+            // curve, so lastSegLen won't be too high. In cases where len
+            // overshoots the curve, this method will exit in the while
+            // loop, and lastSegLen will still be set to the right value.
+            lastSegLen = len;
+            return t;
+        }
+
+        float lastSegLen() {
+            return lastSegLen;
+        }
+
+        // go to the next leaf (in an inorder traversal) in the recursion tree
+        // preconditions: must be on a leaf, and that leaf must not be the root.
+        private void goToNextLeaf() {
+            // We must go to the first ancestor node that has an unvisited
+            // right child.
+            int _recLevel = recLevel;
+            final Side[] _sides = sides;
+
+            _recLevel--;
+            while(_sides[_recLevel] == Side.RIGHT) {
+                if (_recLevel == 0) {
+                    recLevel = 0;
+                    done = true;
+                    return;
+                }
+                _recLevel--;
+            }
+
+            _sides[_recLevel] = Side.RIGHT;
+            // optimize arraycopy (8 values faster than 6 = type):
+            System.arraycopy(recCurveStack[_recLevel], 0,
+                             recCurveStack[_recLevel+1], 0, 8);
+            _recLevel++;
+
+            recLevel = _recLevel;
+            goLeft();
+        }
+
+        // go to the leftmost node from the current node. Return its length.
+        private void goLeft() {
+            float len = onLeaf();
+            if (len >= 0f) {
+                lastT = nextT;
+                lenAtLastT = lenAtNextT;
+                nextT += (1 << (recLimit - recLevel)) * minTincrement;
+                lenAtNextT += len;
+                // invalidate caches
+                flatLeafCoefCache[2] = -1f;
+                cachedHaveLowAcceleration = -1;
+            } else {
+                Helpers.subdivide(recCurveStack[recLevel], 0,
+                                  recCurveStack[recLevel+1], 0,
+                                  recCurveStack[recLevel], 0, curveType);
+                sides[recLevel] = Side.LEFT;
+                recLevel++;
+                goLeft();
+            }
+        }
+
+        // this is a bit of a hack. It returns -1 if we're not on a leaf, and
+        // the length of the leaf if we are on a leaf.
+        private float onLeaf() {
+            float[] curve = recCurveStack[recLevel];
+            float polyLen = 0f;
+
+            float x0 = curve[0], y0 = curve[1];
+            for (int i = 2; i < curveType; i += 2) {
+                final float x1 = curve[i], y1 = curve[i+1];
+                final float len = Helpers.linelen(x0, y0, x1, y1);
+                polyLen += len;
+                curLeafCtrlPolyLengths[i/2 - 1] = len;
+                x0 = x1;
+                y0 = y1;
+            }
+
+            final float lineLen = Helpers.linelen(curve[0], curve[1],
+                                                  curve[curveType-2],
+                                                  curve[curveType-1]);
+            if ((polyLen - lineLen) < ERR || recLevel == recLimit) {
+                return (polyLen + lineLen) / 2f;
+            }
+            return -1f;
+        }
+    }
+
+    @Override
+    public void curveTo(float x1, float y1,
+                        float x2, float y2,
+                        float x3, float y3)
+    {
+        final float[] _curCurvepts = curCurvepts;
+        _curCurvepts[0] = x0;        _curCurvepts[1] = y0;
+        _curCurvepts[2] = x1;        _curCurvepts[3] = y1;
+        _curCurvepts[4] = x2;        _curCurvepts[5] = y2;
+        _curCurvepts[6] = x3;        _curCurvepts[7] = y3;
+        somethingTo(8);
+    }
+
+    @Override
+    public void quadTo(float x1, float y1, float x2, float y2) {
+        final float[] _curCurvepts = curCurvepts;
+        _curCurvepts[0] = x0;        _curCurvepts[1] = y0;
+        _curCurvepts[2] = x1;        _curCurvepts[3] = y1;
+        _curCurvepts[4] = x2;        _curCurvepts[5] = y2;
+        somethingTo(6);
+    }
+
+    @Override
+    public void closePath() {
+        lineTo(sx, sy);
+        if (firstSegidx > 0) {
+            if (!dashOn || needsMoveTo) {
+                out.moveTo(sx, sy);
+            }
+            emitFirstSegments();
+        }
+        moveTo(sx, sy);
+    }
+
+    @Override
+    public void pathDone() {
+        if (firstSegidx > 0) {
+            out.moveTo(sx, sy);
+            emitFirstSegments();
+        }
+        out.pathDone();
+
+        // Dispose this instance:
+        dispose();
+    }
+
+    @Override
+    public long getNativeConsumer() {
+        throw new InternalError("Dasher does not use a native consumer");
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/FloatArrayCache.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.util.ArrayDeque;
+import java.util.Arrays;
+import static sun.java2d.marlin.MarlinUtils.logException;
+import static sun.java2d.marlin.MarlinUtils.logInfo;
+
+final class FloatArrayCache implements MarlinConst {
+
+    private final int arraySize;
+    private final ArrayDeque<float[]> floatArrays;
+    // stats
+    private int getOp = 0;
+    private int createOp = 0;
+    private int returnOp = 0;
+
+    void dumpStats() {
+        if (getOp > 0) {
+            logInfo("FloatArrayCache[" + arraySize + "]: get: " + getOp
+                    + " created: " + createOp + " - returned: " + returnOp
+                    + " :: cache size: " + floatArrays.size());
+        }
+    }
+
+    FloatArrayCache(final int arraySize) {
+        this.arraySize = arraySize;
+        // small but enough: almost 1 cache line
+        this.floatArrays = new ArrayDeque<float[]>(6);
+    }
+
+    float[] getArray() {
+        if (doStats) {
+            getOp++;
+        }
+
+        // use cache
+        final float[] array = floatArrays.pollLast();
+
+        if (array != null) {
+            return array;
+        }
+
+        if (doStats) {
+            createOp++;
+        }
+
+        return new float[arraySize];
+    }
+
+    void putDirtyArray(final float[] array, final int length) {
+        if (length != arraySize) {
+            if (doChecks) {
+                System.out.println("ArrayCache: bad length = " + length);
+            }
+            return;
+        }
+        if (doStats) {
+            returnOp++;
+        }
+
+        // NO clean-up of array data = DIRTY ARRAY
+
+        if (doCleanDirty) {
+            // Force zero-fill dirty arrays:
+            Arrays.fill(array, 0, array.length, 0f);
+        }
+
+        // fill cache:
+        floatArrays.addLast(array);
+    }
+
+    void putArray(final float[] array, final int length,
+                  final int fromIndex, final int toIndex)
+    {
+        if (length != arraySize) {
+            if (doChecks) {
+                System.out.println("ArrayCache: bad length = " + length);
+            }
+            return;
+        }
+        if (doStats) {
+            returnOp++;
+        }
+
+        // clean-up array of dirty part[fromIndex; toIndex[
+        fill(array, fromIndex, toIndex, 0f);
+
+        // fill cache:
+        floatArrays.addLast(array);
+    }
+
+    static void fill(final float[] array, final int fromIndex,
+                     final int toIndex, final float value)
+    {
+        // clear array data:
+        /*
+         * Arrays.fill is faster than System.arraycopy(empty array)
+         * or Unsafe.setMemory(byte 0)
+         */
+        if (toIndex != 0) {
+            Arrays.fill(array, fromIndex, toIndex, value);
+        }
+
+        if (doChecks) {
+            check(array, 0, array.length, value);
+        }
+    }
+
+    static void check(final float[] array, final int fromIndex,
+                      final int toIndex, final float value)
+    {
+        if (doChecks) {
+            // check zero on full array:
+            for (int i = fromIndex; i < toIndex; i++) {
+                if (array[i] != value) {
+                    logException("Invalid array value at " + i + "\n"
+                            + Arrays.toString(array), new Throwable());
+
+                    // ensure array is correctly filled:
+                    Arrays.fill(array, value);
+
+                    return;
+                }
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/FloatMath.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package sun.java2d.marlin;
+
+import sun.misc.DoubleConsts;
+import sun.misc.FloatConsts;
+
+/**
+ * Faster Math ceil / floor routines derived from StrictMath
+ */
+public final class FloatMath implements MarlinConst {
+
+    // overflow / NaN handling enabled:
+    static final boolean CHECK_OVERFLOW = true;
+    static final boolean CHECK_NAN = true;
+
+    private FloatMath() {
+        // utility class
+    }
+
+    // faster inlined min/max functions in the branch prediction is high
+    static float max(final float a, final float b) {
+        // no NaN handling
+        return (a >= b) ? a : b;
+    }
+
+    static int max(final int a, final int b) {
+        return (a >= b) ? a : b;
+    }
+
+    static int min(final int a, final int b) {
+        return (a <= b) ? a : b;
+    }
+
+    /**
+     * Returns the smallest (closest to negative infinity) {@code float} value
+     * that is greater than or equal to the argument and is equal to a
+     * mathematical integer. Special cases:
+     * <ul><li>If the argument value is already equal to a mathematical integer,
+     * then the result is the same as the argument.  <li>If the argument is NaN
+     * or an infinity or positive zero or negative zero, then the result is the
+     * same as the argument.  <li>If the argument value is less than zero but
+     * greater than -1.0, then the result is negative zero.</ul> Note that the
+     * value of {@code StrictMath.ceil(x)} is exactly the value of
+     * {@code -StrictMath.floor(-x)}.
+     *
+     * @param a a value.
+     * @return the smallest (closest to negative infinity) floating-point value
+     * that is greater than or equal to the argument and is equal to a
+     * mathematical integer.
+     */
+    public static float ceil_f(final float a) {
+        // Derived from StrictMath.ceil(double):
+
+        // Inline call to Math.getExponent(a) to
+        // compute only once Float.floatToRawIntBits(a)
+        final int doppel = Float.floatToRawIntBits(a);
+
+        final int exponent = ((doppel & FloatConsts.EXP_BIT_MASK)
+                >> (FloatConsts.SIGNIFICAND_WIDTH - 1))
+                - FloatConsts.EXP_BIAS;
+
+        if (exponent < 0) {
+            /*
+             * Absolute value of argument is less than 1.
+             * floorOrceil(-0.0) => -0.0
+             * floorOrceil(+0.0) => +0.0
+             */
+            return ((a == 0) ? a :
+                    ( (a < 0f) ? -0f : 1f) );
+        }
+        if (CHECK_OVERFLOW && (exponent >= 23)) { // 52 for double
+            /*
+             * Infinity, NaN, or a value so large it must be integral.
+             */
+            return a;
+        }
+        // Else the argument is either an integral value already XOR it
+        // has to be rounded to one.
+        assert exponent >= 0 && exponent <= 22; // 51 for double
+
+        final int intpart = doppel
+                & (~(FloatConsts.SIGNIF_BIT_MASK >> exponent));
+
+        if (intpart == doppel) {
+            return a; // integral value (including 0)
+        }
+
+        // 0 handled above as an integer
+        // sign: 1 for negative, 0 for positive numbers
+        // add : 0 for negative and 1 for positive numbers
+        return Float.intBitsToFloat(intpart) + ((~intpart) >>> 31);
+    }
+
+    /**
+     * Returns the largest (closest to positive infinity) {@code float} value
+     * that is less than or equal to the argument and is equal to a mathematical
+     * integer. Special cases:
+     * <ul><li>If the argument value is already equal to a mathematical integer,
+     * then the result is the same as the argument.  <li>If the argument is NaN
+     * or an infinity or positive zero or negative zero, then the result is the
+     * same as the argument.</ul>
+     *
+     * @param a a value.
+     * @return the largest (closest to positive infinity) floating-point value
+     * that less than or equal to the argument and is equal to a mathematical
+     * integer.
+     */
+    public static float floor_f(final float a) {
+        // Derived from StrictMath.floor(double):
+
+        // Inline call to Math.getExponent(a) to
+        // compute only once Float.floatToRawIntBits(a)
+        final int doppel = Float.floatToRawIntBits(a);
+
+        final int exponent = ((doppel & FloatConsts.EXP_BIT_MASK)
+                >> (FloatConsts.SIGNIFICAND_WIDTH - 1))
+                - FloatConsts.EXP_BIAS;
+
+        if (exponent < 0) {
+            /*
+             * Absolute value of argument is less than 1.
+             * floorOrceil(-0.0) => -0.0
+             * floorOrceil(+0.0) => +0.0
+             */
+            return ((a == 0) ? a :
+                    ( (a < 0f) ? -1f : 0f) );
+        }
+        if (CHECK_OVERFLOW && (exponent >= 23)) { // 52 for double
+            /*
+             * Infinity, NaN, or a value so large it must be integral.
+             */
+            return a;
+        }
+        // Else the argument is either an integral value already XOR it
+        // has to be rounded to one.
+        assert exponent >= 0 && exponent <= 22; // 51 for double
+
+        final int intpart = doppel
+                & (~(FloatConsts.SIGNIF_BIT_MASK >> exponent));
+
+        if (intpart == doppel) {
+            return a; // integral value (including 0)
+        }
+
+        // 0 handled above as an integer
+        // sign: 1 for negative, 0 for positive numbers
+        // add : -1 for negative and 0 for positive numbers
+        return Float.intBitsToFloat(intpart) + (intpart >> 31);
+    }
+
+    /**
+     * Faster alternative to ceil(float) optimized for the integer domain
+     * and supporting NaN and +/-Infinity.
+     *
+     * @param a a value.
+     * @return the largest (closest to positive infinity) integer value
+     * that less than or equal to the argument and is equal to a mathematical
+     * integer.
+     */
+    public static int ceil_int(final float a) {
+        final int intpart = (int) a;
+
+        if (a <= intpart
+                || (CHECK_OVERFLOW && intpart == Integer.MAX_VALUE)
+                || CHECK_NAN && Float.isNaN(a)) {
+            return intpart;
+        }
+        return intpart + 1;
+    }
+
+    /**
+     * Faster alternative to floor(float) optimized for the integer domain
+     * and supporting NaN and +/-Infinity.
+     *
+     * @param a a value.
+     * @return the largest (closest to positive infinity) floating-point value
+     * that less than or equal to the argument and is equal to a mathematical
+     * integer.
+     */
+    public static int floor_int(final float a) {
+        final int intpart = (int) a;
+
+        if (a >= intpart
+                || (CHECK_OVERFLOW && intpart == Integer.MIN_VALUE)
+                || CHECK_NAN && Float.isNaN(a)) {
+            return intpart;
+        }
+        return intpart - 1;
+    }
+
+    /**
+     * Returns a floating-point power of two in the normal range.
+     */
+    static double powerOfTwoD(int n) {
+        assert (n >= DoubleConsts.MIN_EXPONENT && n <= DoubleConsts.MAX_EXPONENT);
+        return Double.longBitsToDouble((((long) n + (long) DoubleConsts.EXP_BIAS)
+                << (DoubleConsts.SIGNIFICAND_WIDTH - 1))
+                & DoubleConsts.EXP_BIT_MASK);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Helpers.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,441 @@
+/*
+ * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import static java.lang.Math.PI;
+import static java.lang.Math.cos;
+import static java.lang.Math.sqrt;
+import static java.lang.Math.cbrt;
+import static java.lang.Math.acos;
+
+final class Helpers implements MarlinConst {
+
+    private Helpers() {
+        throw new Error("This is a non instantiable class");
+    }
+
+    static boolean within(final float x, final float y, final float err) {
+        final float d = y - x;
+        return (d <= err && d >= -err);
+    }
+
+    static boolean within(final double x, final double y, final double err) {
+        final double d = y - x;
+        return (d <= err && d >= -err);
+    }
+
+    static int quadraticRoots(final float a, final float b,
+                              final float c, float[] zeroes, final int off)
+    {
+        int ret = off;
+        float t;
+        if (a != 0f) {
+            final float dis = b*b - 4*a*c;
+            if (dis > 0f) {
+                final float sqrtDis = (float)Math.sqrt(dis);
+                // depending on the sign of b we use a slightly different
+                // algorithm than the traditional one to find one of the roots
+                // so we can avoid adding numbers of different signs (which
+                // might result in loss of precision).
+                if (b >= 0f) {
+                    zeroes[ret++] = (2f * c) / (-b - sqrtDis);
+                    zeroes[ret++] = (-b - sqrtDis) / (2f * a);
+                } else {
+                    zeroes[ret++] = (-b + sqrtDis) / (2f * a);
+                    zeroes[ret++] = (2f * c) / (-b + sqrtDis);
+                }
+            } else if (dis == 0f) {
+                t = (-b) / (2f * a);
+                zeroes[ret++] = t;
+            }
+        } else {
+            if (b != 0f) {
+                t = (-c) / b;
+                zeroes[ret++] = t;
+            }
+        }
+        return ret - off;
+    }
+
+    // find the roots of g(t) = d*t^3 + a*t^2 + b*t + c in [A,B)
+    static int cubicRootsInAB(float d, float a, float b, float c,
+                              float[] pts, final int off,
+                              final float A, final float B)
+    {
+        if (d == 0f) {
+            int num = quadraticRoots(a, b, c, pts, off);
+            return filterOutNotInAB(pts, off, num, A, B) - off;
+        }
+        // From Graphics Gems:
+        // http://tog.acm.org/resources/GraphicsGems/gems/Roots3And4.c
+        // (also from awt.geom.CubicCurve2D. But here we don't need as
+        // much accuracy and we don't want to create arrays so we use
+        // our own customized version).
+
+        // normal form: x^3 + ax^2 + bx + c = 0
+        a /= d;
+        b /= d;
+        c /= d;
+
+        //  substitute x = y - A/3 to eliminate quadratic term:
+        //     x^3 +Px + Q = 0
+        //
+        // Since we actually need P/3 and Q/2 for all of the
+        // calculations that follow, we will calculate
+        // p = P/3
+        // q = Q/2
+        // instead and use those values for simplicity of the code.
+        double sq_A = a * a;
+        double p = (1.0/3.0) * ((-1.0/3.0) * sq_A + b);
+        double q = (1.0/2.0) * ((2.0/27.0) * a * sq_A - (1.0/3.0) * a * b + c);
+
+        // use Cardano's formula
+
+        double cb_p = p * p * p;
+        double D = q * q + cb_p;
+
+        int num;
+        if (D < 0.0) {
+            // see: http://en.wikipedia.org/wiki/Cubic_function#Trigonometric_.28and_hyperbolic.29_method
+            final double phi = (1.0/3.0) * acos(-q / sqrt(-cb_p));
+            final double t = 2.0 * sqrt(-p);
+
+            pts[ off+0 ] =  (float)( t * cos(phi));
+            pts[ off+1 ] =  (float)(-t * cos(phi + (PI / 3.0)));
+            pts[ off+2 ] =  (float)(-t * cos(phi - (PI / 3.0)));
+            num = 3;
+        } else {
+            final double sqrt_D = sqrt(D);
+            final double u = cbrt(sqrt_D - q);
+            final double v = - cbrt(sqrt_D + q);
+
+            pts[ off ] = (float)(u + v);
+            num = 1;
+
+            if (within(D, 0.0, 1e-8)) {
+                pts[off+1] = -(pts[off] / 2f);
+                num = 2;
+            }
+        }
+
+        final float sub = (1f/3f) * a;
+
+        for (int i = 0; i < num; ++i) {
+            pts[ off+i ] -= sub;
+        }
+
+        return filterOutNotInAB(pts, off, num, A, B) - off;
+    }
+
+    static float evalCubic(final float a, final float b,
+                           final float c, final float d,
+                           final float t)
+    {
+        return t * (t * (t * a + b) + c) + d;
+    }
+
+    static float evalQuad(final float a, final float b,
+                          final float c, final float t)
+    {
+        return t * (t * a + b) + c;
+    }
+
+    // returns the index 1 past the last valid element remaining after filtering
+    static int filterOutNotInAB(float[] nums, final int off, final int len,
+                                final float a, final float b)
+    {
+        int ret = off;
+        for (int i = off, end = off + len; i < end; i++) {
+            if (nums[i] >= a && nums[i] < b) {
+                nums[ret++] = nums[i];
+            }
+        }
+        return ret;
+    }
+
+    static float polyLineLength(float[] poly, final int off, final int nCoords) {
+        assert nCoords % 2 == 0 && poly.length >= off + nCoords : "";
+        float acc = 0;
+        for (int i = off + 2; i < off + nCoords; i += 2) {
+            acc += linelen(poly[i], poly[i+1], poly[i-2], poly[i-1]);
+        }
+        return acc;
+    }
+
+    static float linelen(float x1, float y1, float x2, float y2) {
+        final float dx = x2 - x1;
+        final float dy = y2 - y1;
+        return (float)Math.sqrt(dx*dx + dy*dy);
+    }
+
+    static void subdivide(float[] src, int srcoff, float[] left, int leftoff,
+                          float[] right, int rightoff, int type)
+    {
+        switch(type) {
+        case 6:
+            Helpers.subdivideQuad(src, srcoff, left, leftoff, right, rightoff);
+            return;
+        case 8:
+            Helpers.subdivideCubic(src, srcoff, left, leftoff, right, rightoff);
+            return;
+        default:
+            throw new InternalError("Unsupported curve type");
+        }
+    }
+
+    static void isort(float[] a, int off, int len) {
+        for (int i = off + 1, end = off + len; i < end; i++) {
+            float ai = a[i];
+            int j = i - 1;
+            for (; j >= off && a[j] > ai; j--) {
+                a[j+1] = a[j];
+            }
+            a[j+1] = ai;
+        }
+    }
+
+    // Most of these are copied from classes in java.awt.geom because we need
+    // float versions of these functions, and Line2D, CubicCurve2D,
+    // QuadCurve2D don't provide them.
+    /**
+     * Subdivides the cubic curve specified by the coordinates
+     * stored in the <code>src</code> array at indices <code>srcoff</code>
+     * through (<code>srcoff</code>&nbsp;+&nbsp;7) and stores the
+     * resulting two subdivided curves into the two result arrays at the
+     * corresponding indices.
+     * Either or both of the <code>left</code> and <code>right</code>
+     * arrays may be <code>null</code> or a reference to the same array
+     * as the <code>src</code> array.
+     * Note that the last point in the first subdivided curve is the
+     * same as the first point in the second subdivided curve. Thus,
+     * it is possible to pass the same array for <code>left</code>
+     * and <code>right</code> and to use offsets, such as <code>rightoff</code>
+     * equals (<code>leftoff</code> + 6), in order
+     * to avoid allocating extra storage for this common point.
+     * @param src the array holding the coordinates for the source curve
+     * @param srcoff the offset into the array of the beginning of the
+     * the 6 source coordinates
+     * @param left the array for storing the coordinates for the first
+     * half of the subdivided curve
+     * @param leftoff the offset into the array of the beginning of the
+     * the 6 left coordinates
+     * @param right the array for storing the coordinates for the second
+     * half of the subdivided curve
+     * @param rightoff the offset into the array of the beginning of the
+     * the 6 right coordinates
+     * @since 1.7
+     */
+    static void subdivideCubic(float src[], int srcoff,
+                               float left[], int leftoff,
+                               float right[], int rightoff)
+    {
+        float x1 = src[srcoff + 0];
+        float y1 = src[srcoff + 1];
+        float ctrlx1 = src[srcoff + 2];
+        float ctrly1 = src[srcoff + 3];
+        float ctrlx2 = src[srcoff + 4];
+        float ctrly2 = src[srcoff + 5];
+        float x2 = src[srcoff + 6];
+        float y2 = src[srcoff + 7];
+        if (left != null) {
+            left[leftoff + 0] = x1;
+            left[leftoff + 1] = y1;
+        }
+        if (right != null) {
+            right[rightoff + 6] = x2;
+            right[rightoff + 7] = y2;
+        }
+        x1 = (x1 + ctrlx1) / 2f;
+        y1 = (y1 + ctrly1) / 2f;
+        x2 = (x2 + ctrlx2) / 2f;
+        y2 = (y2 + ctrly2) / 2f;
+        float centerx = (ctrlx1 + ctrlx2) / 2f;
+        float centery = (ctrly1 + ctrly2) / 2f;
+        ctrlx1 = (x1 + centerx) / 2f;
+        ctrly1 = (y1 + centery) / 2f;
+        ctrlx2 = (x2 + centerx) / 2f;
+        ctrly2 = (y2 + centery) / 2f;
+        centerx = (ctrlx1 + ctrlx2) / 2f;
+        centery = (ctrly1 + ctrly2) / 2f;
+        if (left != null) {
+            left[leftoff + 2] = x1;
+            left[leftoff + 3] = y1;
+            left[leftoff + 4] = ctrlx1;
+            left[leftoff + 5] = ctrly1;
+            left[leftoff + 6] = centerx;
+            left[leftoff + 7] = centery;
+        }
+        if (right != null) {
+            right[rightoff + 0] = centerx;
+            right[rightoff + 1] = centery;
+            right[rightoff + 2] = ctrlx2;
+            right[rightoff + 3] = ctrly2;
+            right[rightoff + 4] = x2;
+            right[rightoff + 5] = y2;
+        }
+    }
+
+
+    static void subdivideCubicAt(float t, float src[], int srcoff,
+                                 float left[], int leftoff,
+                                 float right[], int rightoff)
+    {
+        float x1 = src[srcoff + 0];
+        float y1 = src[srcoff + 1];
+        float ctrlx1 = src[srcoff + 2];
+        float ctrly1 = src[srcoff + 3];
+        float ctrlx2 = src[srcoff + 4];
+        float ctrly2 = src[srcoff + 5];
+        float x2 = src[srcoff + 6];
+        float y2 = src[srcoff + 7];
+        if (left != null) {
+            left[leftoff + 0] = x1;
+            left[leftoff + 1] = y1;
+        }
+        if (right != null) {
+            right[rightoff + 6] = x2;
+            right[rightoff + 7] = y2;
+        }
+        x1 = x1 + t * (ctrlx1 - x1);
+        y1 = y1 + t * (ctrly1 - y1);
+        x2 = ctrlx2 + t * (x2 - ctrlx2);
+        y2 = ctrly2 + t * (y2 - ctrly2);
+        float centerx = ctrlx1 + t * (ctrlx2 - ctrlx1);
+        float centery = ctrly1 + t * (ctrly2 - ctrly1);
+        ctrlx1 = x1 + t * (centerx - x1);
+        ctrly1 = y1 + t * (centery - y1);
+        ctrlx2 = centerx + t * (x2 - centerx);
+        ctrly2 = centery + t * (y2 - centery);
+        centerx = ctrlx1 + t * (ctrlx2 - ctrlx1);
+        centery = ctrly1 + t * (ctrly2 - ctrly1);
+        if (left != null) {
+            left[leftoff + 2] = x1;
+            left[leftoff + 3] = y1;
+            left[leftoff + 4] = ctrlx1;
+            left[leftoff + 5] = ctrly1;
+            left[leftoff + 6] = centerx;
+            left[leftoff + 7] = centery;
+        }
+        if (right != null) {
+            right[rightoff + 0] = centerx;
+            right[rightoff + 1] = centery;
+            right[rightoff + 2] = ctrlx2;
+            right[rightoff + 3] = ctrly2;
+            right[rightoff + 4] = x2;
+            right[rightoff + 5] = y2;
+        }
+    }
+
+    static void subdivideQuad(float src[], int srcoff,
+                              float left[], int leftoff,
+                              float right[], int rightoff)
+    {
+        float x1 = src[srcoff + 0];
+        float y1 = src[srcoff + 1];
+        float ctrlx = src[srcoff + 2];
+        float ctrly = src[srcoff + 3];
+        float x2 = src[srcoff + 4];
+        float y2 = src[srcoff + 5];
+        if (left != null) {
+            left[leftoff + 0] = x1;
+            left[leftoff + 1] = y1;
+        }
+        if (right != null) {
+            right[rightoff + 4] = x2;
+            right[rightoff + 5] = y2;
+        }
+        x1 = (x1 + ctrlx) / 2f;
+        y1 = (y1 + ctrly) / 2f;
+        x2 = (x2 + ctrlx) / 2f;
+        y2 = (y2 + ctrly) / 2f;
+        ctrlx = (x1 + x2) / 2f;
+        ctrly = (y1 + y2) / 2f;
+        if (left != null) {
+            left[leftoff + 2] = x1;
+            left[leftoff + 3] = y1;
+            left[leftoff + 4] = ctrlx;
+            left[leftoff + 5] = ctrly;
+        }
+        if (right != null) {
+            right[rightoff + 0] = ctrlx;
+            right[rightoff + 1] = ctrly;
+            right[rightoff + 2] = x2;
+            right[rightoff + 3] = y2;
+        }
+    }
+
+    static void subdivideQuadAt(float t, float src[], int srcoff,
+                                float left[], int leftoff,
+                                float right[], int rightoff)
+    {
+        float x1 = src[srcoff + 0];
+        float y1 = src[srcoff + 1];
+        float ctrlx = src[srcoff + 2];
+        float ctrly = src[srcoff + 3];
+        float x2 = src[srcoff + 4];
+        float y2 = src[srcoff + 5];
+        if (left != null) {
+            left[leftoff + 0] = x1;
+            left[leftoff + 1] = y1;
+        }
+        if (right != null) {
+            right[rightoff + 4] = x2;
+            right[rightoff + 5] = y2;
+        }
+        x1 = x1 + t * (ctrlx - x1);
+        y1 = y1 + t * (ctrly - y1);
+        x2 = ctrlx + t * (x2 - ctrlx);
+        y2 = ctrly + t * (y2 - ctrly);
+        ctrlx = x1 + t * (x2 - x1);
+        ctrly = y1 + t * (y2 - y1);
+        if (left != null) {
+            left[leftoff + 2] = x1;
+            left[leftoff + 3] = y1;
+            left[leftoff + 4] = ctrlx;
+            left[leftoff + 5] = ctrly;
+        }
+        if (right != null) {
+            right[rightoff + 0] = ctrlx;
+            right[rightoff + 1] = ctrly;
+            right[rightoff + 2] = x2;
+            right[rightoff + 3] = y2;
+        }
+    }
+
+    static void subdivideAt(float t, float src[], int srcoff,
+                            float left[], int leftoff,
+                            float right[], int rightoff, int size)
+    {
+        switch(size) {
+        case 8:
+            subdivideCubicAt(t, src, srcoff, left, leftoff, right, rightoff);
+            return;
+        case 6:
+            subdivideQuadAt(t, src, srcoff, left, leftoff, right, rightoff);
+            return;
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/IntArrayCache.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.util.ArrayDeque;
+import java.util.Arrays;
+import static sun.java2d.marlin.MarlinUtils.logException;
+import static sun.java2d.marlin.MarlinUtils.logInfo;
+
+final class IntArrayCache implements MarlinConst {
+
+    private final int arraySize;
+    private final ArrayDeque<int[]> intArrays;
+    // stats
+    private int getOp = 0;
+    private int createOp = 0;
+    private int returnOp = 0;
+
+    void dumpStats() {
+        if (getOp > 0) {
+            logInfo("IntArrayCache[" + arraySize + "]: get: " + getOp
+                    + " created: " + createOp + " - returned: " + returnOp
+                    + " :: cache size: " + intArrays.size());
+        }
+    }
+
+    IntArrayCache(final int arraySize) {
+        this.arraySize = arraySize;
+        // small but enough: almost 1 cache line
+        this.intArrays = new ArrayDeque<int[]>(6);
+    }
+
+    int[] getArray() {
+        if (doStats) {
+            getOp++;
+        }
+
+        // use cache:
+        final int[] array = intArrays.pollLast();
+        if (array != null) {
+            return array;
+        }
+
+        if (doStats) {
+            createOp++;
+        }
+
+        return new int[arraySize];
+    }
+
+    void putDirtyArray(final int[] array, final int length) {
+        if (length != arraySize) {
+            if (doChecks) {
+                System.out.println("ArrayCache: bad length = " + length);
+            }
+            return;
+        }
+        if (doStats) {
+            returnOp++;
+        }
+
+        // NO clean-up of array data = DIRTY ARRAY
+
+        if (doCleanDirty) {
+            // Force zero-fill dirty arrays:
+            Arrays.fill(array, 0, array.length, 0);
+        }
+
+        // fill cache:
+        intArrays.addLast(array);
+    }
+
+    void putArray(final int[] array, final int length,
+                  final int fromIndex, final int toIndex)
+    {
+        if (length != arraySize) {
+            if (doChecks) {
+                System.out.println("ArrayCache: bad length = " + length);
+            }
+            return;
+        }
+        if (doStats) {
+            returnOp++;
+        }
+
+        // clean-up array of dirty part[fromIndex; toIndex[
+        fill(array, fromIndex, toIndex, 0);
+
+        // fill cache:
+        intArrays.addLast(array);
+    }
+
+    static void fill(final int[] array, final int fromIndex,
+                     final int toIndex, final int value)
+    {
+        // clear array data:
+        /*
+         * Arrays.fill is faster than System.arraycopy(empty array)
+         * or Unsafe.setMemory(byte 0)
+         */
+        if (toIndex != 0) {
+            Arrays.fill(array, fromIndex, toIndex, value);
+        }
+
+        if (doChecks) {
+            check(array, 0, array.length, value);
+        }
+    }
+
+    static void check(final int[] array, final int fromIndex,
+                      final int toIndex, final int value)
+    {
+        if (doChecks) {
+            // check zero on full array:
+            for (int i = fromIndex; i < toIndex; i++) {
+                if (array[i] != value) {
+                    logException("Invalid array value at " + i + "\n"
+                            + Arrays.toString(array), new Throwable());
+
+                    // ensure array is correctly filled:
+                    Arrays.fill(array, value);
+
+                    return;
+                }
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinCache.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,676 @@
+/*
+ * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import jdk.internal.misc.Unsafe;
+
+/**
+ * An object used to cache pre-rendered complex paths.
+ *
+ * @see Renderer
+ */
+public final class MarlinCache implements MarlinConst {
+
+    static final boolean FORCE_RLE = MarlinProperties.isForceRLE();
+    static final boolean FORCE_NO_RLE = MarlinProperties.isForceNoRLE();
+    // minimum width to try using RLE encoding:
+    static final int RLE_MIN_WIDTH
+        = Math.max(BLOCK_SIZE, MarlinProperties.getRLEMinWidth());
+    // maximum width for RLE encoding:
+    // values are stored as int [x|alpha] where alpha is 8 bits
+    static final int RLE_MAX_WIDTH = 1 << (24 - 1);
+
+    // 2048 (pixelSize) alpha values (width) x 32 rows (tile) = 64K bytes
+    // x1 instead of 4 bytes (RLE) ie 1/4 capacity or average good RLE compression
+    static final long INITIAL_CHUNK_ARRAY = TILE_SIZE * INITIAL_PIXEL_DIM; // 64K
+
+    // The alpha map used by this object (taken out of our map cache) to convert
+    // pixel coverage counts gotten from MarlinCache (which are in the range
+    // [0, maxalpha]) into alpha values, which are in [0,256).
+    static final byte[] ALPHA_MAP;
+
+    static final OffHeapArray ALPHA_MAP_UNSAFE;
+
+    static {
+        final byte[] _ALPHA_MAP = buildAlphaMap(MAX_AA_ALPHA);
+
+        ALPHA_MAP_UNSAFE = new OffHeapArray(_ALPHA_MAP, _ALPHA_MAP.length); // 1K
+        ALPHA_MAP =_ALPHA_MAP;
+
+        final Unsafe _unsafe = OffHeapArray.unsafe;
+        final long addr = ALPHA_MAP_UNSAFE.address;
+
+        for (int i = 0; i < _ALPHA_MAP.length; i++) {
+            _unsafe.putByte(addr + i, _ALPHA_MAP[i]);
+        }
+    }
+
+    int bboxX0, bboxY0, bboxX1, bboxY1;
+
+    // 1D dirty arrays
+    // row index in rowAAChunk[]
+    final long[] rowAAChunkIndex = new long[TILE_SIZE];
+    // first pixel (inclusive) for each row
+    final int[] rowAAx0 = new int[TILE_SIZE];
+    // last pixel (exclusive) for each row
+    final int[] rowAAx1 = new int[TILE_SIZE];
+    // encoding mode (0=raw, 1=RLE encoding) for each row
+    final int[] rowAAEnc = new int[TILE_SIZE];
+    // coded length (RLE encoding) for each row
+    final long[] rowAALen = new long[TILE_SIZE];
+    // last position in RLE decoding for each row (getAlpha):
+    final long[] rowAAPos = new long[TILE_SIZE];
+
+    // dirty off-heap array containing pixel coverages for (32) rows (packed)
+    // if encoding=raw, it contains alpha coverage values (val) as integer
+    // if encoding=RLE, it contains tuples (val, last x-coordinate exclusive)
+    // use rowAAx0/rowAAx1 to get row indices within this chunk
+    final OffHeapArray rowAAChunk;
+
+    // current position in rowAAChunk array
+    long rowAAChunkPos;
+
+    // touchedTile[i] is the sum of all the alphas in the tile with
+    // x=j*TILE_SIZE+bboxX0.
+    int[] touchedTile;
+
+    // per-thread renderer context
+    final RendererContext rdrCtx;
+
+    // large cached touchedTile (dirty)
+    final int[] touchedTile_initial = new int[INITIAL_ARRAY]; // 1 tile line
+
+    int tileMin, tileMax;
+
+    boolean useRLE = false;
+
+    MarlinCache(final RendererContext rdrCtx) {
+        this.rdrCtx = rdrCtx;
+
+        rowAAChunk = new OffHeapArray(rdrCtx, INITIAL_CHUNK_ARRAY);
+
+        touchedTile = touchedTile_initial;
+
+        // tile used marks:
+        tileMin = Integer.MAX_VALUE;
+        tileMax = Integer.MIN_VALUE;
+    }
+
+    void init(int minx, int miny, int maxx, int maxy, int edgeSumDeltaY)
+    {
+        // assert maxy >= miny && maxx >= minx;
+        bboxX0 = minx;
+        bboxY0 = miny;
+        bboxX1 = maxx;
+        bboxY1 = maxy;
+
+        final int width = (maxx - minx);
+
+        if (FORCE_NO_RLE) {
+            useRLE = false;
+        } else if (FORCE_RLE) {
+            useRLE = true;
+        } else {
+            // heuristics: use both bbox area and complexity
+            // ie number of primitives:
+
+            // fast check min and max width (maxx < 23bits):
+            if (width <= RLE_MIN_WIDTH || width >= RLE_MAX_WIDTH) {
+                useRLE = false;
+            } else {
+                // perimeter approach: how fit the total length into given height:
+
+                // if stroking: meanCrossings /= 2 => divide edgeSumDeltaY by 2
+                final int heightSubPixel
+                    = (((maxy - miny) << SUBPIXEL_LG_POSITIONS_Y) << rdrCtx.stroking);
+
+                // check meanDist > block size:
+                // check width / (meanCrossings - 1) >= RLE_THRESHOLD
+
+                // fast case: (meanCrossingPerPixel <= 2) means 1 span only
+                useRLE = (edgeSumDeltaY <= (heightSubPixel << 1))
+                    // note: already checked (meanCrossingPerPixel <= 2)
+                    // rewritten to avoid division:
+                    || (width * heightSubPixel) >
+                            ((edgeSumDeltaY - heightSubPixel) << BLOCK_SIZE_LG);
+//                            ((edgeSumDeltaY - heightSubPixel) * RLE_THRESHOLD);
+//                            ((edgeSumDeltaY - heightSubPixel) << BLOCK_TH_LG);
+
+                if (doTrace && !useRLE) {
+                    final float meanCrossings
+                        = ((float) edgeSumDeltaY) / heightSubPixel;
+                    final float meanDist = width / (meanCrossings - 1);
+
+                    System.out.println("High complexity: "
+                        + " for bbox[width = " + width
+                        + " height = " + (maxy - miny)
+                        + "] edgeSumDeltaY = " + edgeSumDeltaY
+                        + " heightSubPixel = " + heightSubPixel
+                        + " meanCrossings = "+ meanCrossings
+                        + " meanDist = " + meanDist
+                        + " width =  " + (width * heightSubPixel)
+                        + " <= criteria:  " + ((edgeSumDeltaY - heightSubPixel) << BLOCK_SIZE_LG)
+                    );
+                }
+            }
+        }
+
+        // the ceiling of (maxy - miny + 1) / TILE_SIZE;
+        final int nxTiles = (width + TILE_SIZE) >> TILE_SIZE_LG;
+
+        if (nxTiles > INITIAL_ARRAY) {
+            if (doStats) {
+                RendererContext.stats.stat_array_marlincache_touchedTile
+                    .add(nxTiles);
+            }
+            touchedTile = rdrCtx.getIntArray(nxTiles);
+        }
+    }
+
+    /**
+     * Disposes this cache:
+     * clean up before reusing this instance
+     */
+    void dispose() {
+        // Reset touchedTile if needed:
+        resetTileLine(0);
+
+        // Return arrays:
+        if (touchedTile != touchedTile_initial) {
+            rdrCtx.putIntArray(touchedTile, 0, 0); // already zero filled
+            touchedTile = touchedTile_initial;
+        }
+        // At last: resize back off-heap rowAA to initial size
+        if (rowAAChunk.length != INITIAL_CHUNK_ARRAY) {
+            // note: may throw OOME:
+            rowAAChunk.resize(INITIAL_CHUNK_ARRAY);
+        }
+        if (doCleanDirty) {
+            // Force zero-fill dirty arrays:
+            rowAAChunk.fill(BYTE_0);
+        }
+    }
+
+    void resetTileLine(final int pminY) {
+        // update bboxY0 to process a complete tile line [0 - 32]
+        bboxY0 = pminY;
+
+        // reset current pos
+        if (doStats) {
+            RendererContext.stats.stat_cache_rowAAChunk.add(rowAAChunkPos);
+        }
+        rowAAChunkPos = 0L;
+
+        // Reset touchedTile:
+        if (tileMin != Integer.MAX_VALUE) {
+            if (doStats) {
+                RendererContext.stats.stat_cache_tiles.add(tileMax - tileMin);
+            }
+            // clean only dirty touchedTile:
+            if (tileMax == 1) {
+                touchedTile[0] = 0;
+            } else {
+                IntArrayCache.fill(touchedTile, tileMin, tileMax, 0);
+            }
+            // reset tile used marks:
+            tileMin = Integer.MAX_VALUE;
+            tileMax = Integer.MIN_VALUE;
+        }
+
+        if (doCleanDirty) {
+            // Force zero-fill dirty arrays:
+            rowAAChunk.fill(BYTE_0);
+        }
+    }
+
+    void clearAARow(final int y) {
+        // process tile line [0 - 32]
+        final int row = y - bboxY0;
+
+        // update pixel range:
+        rowAAx0[row]  = 0; // first pixel inclusive
+        rowAAx1[row]  = 0; //  last pixel exclusive
+        rowAAEnc[row] = 0; // raw encoding
+
+        // note: leave rowAAChunkIndex[row] undefined
+        // and rowAALen[row] & rowAAPos[row] (RLE)
+    }
+
+    /**
+     * Copy the given alpha data into the rowAA cache
+     * @param alphaRow alpha data to copy from
+     * @param y y pixel coordinate
+     * @param px0 first pixel inclusive x0
+     * @param px1 last pixel exclusive x1
+     */
+    void copyAARowNoRLE(final int[] alphaRow, final int y,
+                   final int px0, final int px1)
+    {
+        if (doMonitors) {
+            RendererContext.stats.mon_rdr_copyAARow.start();
+        }
+
+        // skip useless pixels above boundary
+        final int px_bbox1 = FloatMath.min(px1, bboxX1);
+
+        if (doLogBounds) {
+            MarlinUtils.logInfo("row = [" + px0 + " ... " + px_bbox1
+                                + " (" + px1 + ") [ for y=" + y);
+        }
+
+        final int row = y - bboxY0;
+
+        // update pixel range:
+        rowAAx0[row]  = px0;      // first pixel inclusive
+        rowAAx1[row]  = px_bbox1; //  last pixel exclusive
+        rowAAEnc[row] = 0; // raw encoding
+
+        // get current position (bytes):
+        final long pos = rowAAChunkPos;
+        // update row index to current position:
+        rowAAChunkIndex[row] = pos;
+
+        // determine need array size (may overflow):
+        final long needSize = pos + (px_bbox1 - px0);
+
+        // update next position (bytes):
+        rowAAChunkPos = needSize;
+
+        // update row data:
+        final OffHeapArray _rowAAChunk = rowAAChunk;
+        // ensure rowAAChunk capacity:
+        if (_rowAAChunk.length < needSize) {
+            expandRowAAChunk(needSize);
+        }
+        if (doStats) {
+            RendererContext.stats.stat_cache_rowAA.add(px_bbox1 - px0);
+        }
+
+        // rowAA contains only alpha values for range[x0; x1[
+        final int[] _touchedTile = touchedTile;
+        final int _TILE_SIZE_LG = TILE_SIZE_LG;
+
+        final int from = px0      - bboxX0; // first pixel inclusive
+        final int to   = px_bbox1 - bboxX0; //  last pixel exclusive
+
+        final Unsafe _unsafe = OffHeapArray.unsafe;
+        final long SIZE_BYTE = 1L;
+        final long addr_alpha = ALPHA_MAP_UNSAFE.address;
+        long addr_off = _rowAAChunk.address + pos;
+
+        // compute alpha sum into rowAA:
+        for (int x = from, val = 0; x < to; x++) {
+            // alphaRow is in [0; MAX_COVERAGE]
+            val += alphaRow[x]; // [from; to[
+
+            // ensure values are in [0; MAX_AA_ALPHA] range
+            if (DO_AA_RANGE_CHECK) {
+                if (val < 0) {
+                    System.out.println("Invalid coverage = " + val);
+                    val = 0;
+                }
+                if (val > MAX_AA_ALPHA) {
+                    System.out.println("Invalid coverage = " + val);
+                    val = MAX_AA_ALPHA;
+                }
+            }
+
+            // store alpha sum (as byte):
+            if (val == 0) {
+                _unsafe.putByte(addr_off, (byte)0); // [0..255]
+            } else {
+                _unsafe.putByte(addr_off, _unsafe.getByte(addr_alpha + val)); // [0..255]
+
+                // update touchedTile
+                _touchedTile[x >> _TILE_SIZE_LG] += val;
+            }
+            addr_off += SIZE_BYTE;
+        }
+
+        // update tile used marks:
+        int tx = from >> _TILE_SIZE_LG; // inclusive
+        if (tx < tileMin) {
+            tileMin = tx;
+        }
+
+        tx = ((to - 1) >> _TILE_SIZE_LG) + 1; // exclusive (+1 to be sure)
+        if (tx > tileMax) {
+            tileMax = tx;
+        }
+
+        if (doLogBounds) {
+            MarlinUtils.logInfo("clear = [" + from + " ... " + to + "[");
+        }
+
+        // Clear alpha row for reuse:
+        IntArrayCache.fill(alphaRow, from, px1 - bboxX0, 0);
+
+        if (doMonitors) {
+            RendererContext.stats.mon_rdr_copyAARow.stop();
+        }
+    }
+
+    void copyAARowRLE_WithBlockFlags(final int[] blkFlags, final int[] alphaRow,
+                      final int y, final int px0, final int px1)
+    {
+        if (doMonitors) {
+            RendererContext.stats.mon_rdr_copyAARow.start();
+        }
+
+        // Copy rowAA data into the piscesCache if one is present
+        final int _bboxX0 = bboxX0;
+
+        // process tile line [0 - 32]
+        final int row  = y - bboxY0;
+        final int from = px0 - _bboxX0; // first pixel inclusive
+
+        // skip useless pixels above boundary
+        final int px_bbox1 = FloatMath.min(px1, bboxX1);
+        final int to       = px_bbox1 - _bboxX0; //  last pixel exclusive
+
+        if (doLogBounds) {
+            MarlinUtils.logInfo("row = [" + px0 + " ... " + px_bbox1
+                                + " (" + px1 + ") [ for y=" + y);
+        }
+
+        // get current position:
+        final long initialPos = startRLERow(row, px0, px_bbox1);
+
+        // determine need array size:
+        // pessimistic: max needed size = deltaX x 4 (1 int)
+        final int maxLen = (to - from);
+        final long needSize = initialPos + (maxLen << 2);
+
+        // update row data:
+        OffHeapArray _rowAAChunk = rowAAChunk;
+        // ensure rowAAChunk capacity:
+        if (_rowAAChunk.length < needSize) {
+            expandRowAAChunk(needSize);
+        }
+
+        final Unsafe _unsafe = OffHeapArray.unsafe;
+        final long SIZE_INT = 4L;
+        final long addr_alpha = ALPHA_MAP_UNSAFE.address;
+        long addr_off = _rowAAChunk.address + initialPos;
+
+        final int[] _touchedTile = touchedTile;
+        final int _TILE_SIZE_LG = TILE_SIZE_LG;
+        final int _BLK_SIZE_LG  = BLOCK_SIZE_LG;
+
+        // traverse flagged blocks:
+        final int blkW = (from >> _BLK_SIZE_LG);
+        final int blkE = (to   >> _BLK_SIZE_LG) + 1;
+
+        // Perform run-length encoding and store results in the piscesCache
+        int val = 0;
+        int cx0 = from;
+        int runLen;
+
+        final int _MAX_VALUE = Integer.MAX_VALUE;
+        int last_t0 = _MAX_VALUE;
+
+        int skip = 0;
+
+        for (int t = blkW, blk_x0, blk_x1, cx, delta; t <= blkE; t++) {
+            if (blkFlags[t] != 0) {
+                blkFlags[t] = 0;
+
+                if (last_t0 == _MAX_VALUE) {
+                    last_t0 = t;
+                }
+                continue;
+            }
+            if (last_t0 != _MAX_VALUE) {
+                // emit blocks:
+                blk_x0 = FloatMath.max(last_t0 << _BLK_SIZE_LG, from);
+                last_t0 = _MAX_VALUE;
+
+                // (last block pixel+1) inclusive => +1
+                blk_x1 = FloatMath.min((t << _BLK_SIZE_LG) + 1, to);
+
+                for (cx = blk_x0; cx < blk_x1; cx++) {
+                    if ((delta = alphaRow[cx]) != 0) {
+                        alphaRow[cx] = 0;
+
+                        // not first rle entry:
+                        if (cx != cx0) {
+                            runLen = cx - cx0;
+
+                            // store alpha coverage (ensure within bounds):
+                            // as [absX|val] where:
+                            // absX is the absolute x-coordinate:
+                            // note: last pixel exclusive (>= 0)
+                            // note: it should check X is smaller than 23bits (overflow)!
+
+                            // special case to encode entries into a single int:
+                            if (val == 0) {
+                                _unsafe.putInt(addr_off,
+                                    ((_bboxX0 + cx) << 8)
+                                );
+                            } else {
+                                _unsafe.putInt(addr_off,
+                                    ((_bboxX0 + cx) << 8)
+                                    | (((int) _unsafe.getByte(addr_alpha + val)) & 0xFF) // [0..255]
+                                );
+
+                                if (runLen == 1) {
+                                    _touchedTile[cx0 >> _TILE_SIZE_LG] += val;
+                                } else {
+                                    touchTile(cx0, val, cx, runLen, _touchedTile);
+                                }
+                            }
+                            addr_off += SIZE_INT;
+
+                            if (doStats) {
+                                RendererContext.stats.hist_tile_generator_encoding_runLen
+                                    .add(runLen);
+                            }
+                            cx0 = cx;
+                        }
+
+                        // alpha value = running sum of coverage delta:
+                        val += delta;
+
+                        // ensure values are in [0; MAX_AA_ALPHA] range
+                        if (DO_AA_RANGE_CHECK) {
+                            if (val < 0) {
+                                System.out.println("Invalid coverage = " + val);
+                                val = 0;
+                            }
+                            if (val > MAX_AA_ALPHA) {
+                                System.out.println("Invalid coverage = " + val);
+                                val = MAX_AA_ALPHA;
+                            }
+                        }
+                    }
+                }
+            } else if (doStats) {
+                skip++;
+            }
+        }
+
+        // Process remaining RLE run:
+        runLen = to - cx0;
+
+        // store alpha coverage (ensure within bounds):
+        // as (int)[absX|val] where:
+        // absX is the absolute x-coordinate in bits 31 to 8 and val in bits 0..7
+        // note: last pixel exclusive (>= 0)
+        // note: it should check X is smaller than 23bits (overflow)!
+
+        // special case to encode entries into a single int:
+        if (val == 0) {
+            _unsafe.putInt(addr_off,
+                ((_bboxX0 + to) << 8)
+            );
+        } else {
+            _unsafe.putInt(addr_off,
+                ((_bboxX0 + to) << 8)
+                | (((int) _unsafe.getByte(addr_alpha + val)) & 0xFF) // [0..255]
+            );
+
+            if (runLen == 1) {
+                _touchedTile[cx0 >> _TILE_SIZE_LG] += val;
+            } else {
+                touchTile(cx0, val, to, runLen, _touchedTile);
+            }
+        }
+        addr_off += SIZE_INT;
+
+        if (doStats) {
+            RendererContext.stats.hist_tile_generator_encoding_runLen
+                .add(runLen);
+        }
+
+        long len = (addr_off - _rowAAChunk.address);
+
+        // update coded length as bytes:
+        rowAALen[row] = (len - initialPos);
+
+        // update current position:
+        rowAAChunkPos = len;
+
+        if (doStats) {
+            RendererContext.stats.stat_cache_rowAA.add(rowAALen[row]);
+            RendererContext.stats.hist_tile_generator_encoding_ratio.add(
+                (100 * skip) / (blkE - blkW)
+            );
+        }
+
+        // update tile used marks:
+        int tx = from >> _TILE_SIZE_LG; // inclusive
+        if (tx < tileMin) {
+            tileMin = tx;
+        }
+
+        tx = ((to - 1) >> _TILE_SIZE_LG) + 1; // exclusive (+1 to be sure)
+        if (tx > tileMax) {
+            tileMax = tx;
+        }
+
+        // Clear alpha row for reuse:
+        if (px1 > bboxX1) {
+            alphaRow[to    ] = 0;
+            alphaRow[to + 1] = 0;
+        }
+        if (doChecks) {
+            IntArrayCache.check(blkFlags, 0, blkFlags.length, 0);
+            IntArrayCache.check(alphaRow, 0, alphaRow.length, 0);
+        }
+
+        if (doMonitors) {
+            RendererContext.stats.mon_rdr_copyAARow.stop();
+        }
+    }
+
+    long startRLERow(final int row, final int x0, final int x1) {
+        // rows are supposed to be added by increasing y.
+        rowAAx0[row]  = x0; // first pixel inclusive
+        rowAAx1[row]  = x1; // last pixel exclusive
+        rowAAEnc[row] = 1; // RLE encoding
+        rowAAPos[row] = 0L; // position = 0
+
+        // update row index to current position:
+        return (rowAAChunkIndex[row] = rowAAChunkPos);
+    }
+
+    private void expandRowAAChunk(final long needSize) {
+        if (doStats) {
+            RendererContext.stats.stat_array_marlincache_rowAAChunk
+                .add(needSize);
+        }
+
+        // note: throw IOOB if neededSize > 2Gb:
+        final long newSize = ArrayCache.getNewLargeSize(rowAAChunk.length, needSize);
+
+        rowAAChunk.resize(newSize);
+    }
+
+    private void touchTile(final int x0, final int val, final int x1,
+                           final int runLen,
+                           final int[] _touchedTile)
+    {
+        // the x and y of the current row, minus bboxX0, bboxY0
+        // process tile line [0 - 32]
+        final int _TILE_SIZE_LG = TILE_SIZE_LG;
+
+        // update touchedTile
+        int tx = (x0 >> _TILE_SIZE_LG);
+
+        // handle trivial case: same tile (x0, x0+runLen)
+        if (tx == (x1 >> _TILE_SIZE_LG)) {
+            // same tile:
+            _touchedTile[tx] += val * runLen;
+            return;
+        }
+
+        final int tx1 = (x1 - 1) >> _TILE_SIZE_LG;
+
+        if (tx <= tx1) {
+            final int nextTileXCoord = (tx + 1) << _TILE_SIZE_LG;
+            _touchedTile[tx++] += val * (nextTileXCoord - x0);
+        }
+        if (tx < tx1) {
+            // don't go all the way to tx1 - we need to handle the last
+            // tile as a special case (just like we did with the first
+            final int tileVal = (val << _TILE_SIZE_LG);
+            for (; tx < tx1; tx++) {
+                _touchedTile[tx] += tileVal;
+            }
+        }
+        // they will be equal unless x0 >> TILE_SIZE_LG == tx1
+        if (tx == tx1) {
+            final int txXCoord       =  tx      << _TILE_SIZE_LG;
+            final int nextTileXCoord = (tx + 1) << _TILE_SIZE_LG;
+
+            final int lastXCoord = (nextTileXCoord <= x1) ? nextTileXCoord : x1;
+            _touchedTile[tx] += val * (lastXCoord - txXCoord);
+        }
+    }
+
+    int alphaSumInTile(final int x) {
+        return touchedTile[(x - bboxX0) >> TILE_SIZE_LG];
+    }
+
+    @Override
+    public String toString() {
+        return "bbox = ["
+            + bboxX0 + ", " + bboxY0 + " => "
+            + bboxX1 + ", " + bboxY1 + "]\n";
+    }
+
+    private static byte[] buildAlphaMap(final int maxalpha) {
+        // double size !
+        final byte[] alMap = new byte[maxalpha << 1];
+        final int halfmaxalpha = maxalpha >> 2;
+        for (int i = 0; i <= maxalpha; i++) {
+            alMap[i] = (byte) ((i * 255 + halfmaxalpha) / maxalpha);
+//            System.out.println("alphaMap[" + i + "] = "
+//                               + Byte.toUnsignedInt(alMap[i]));
+        }
+        return alMap;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinConst.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+/**
+ * Marlin constant holder using System properties
+ */
+interface MarlinConst {
+    // enable Logs (logger or stdout)
+    static final boolean enableLogs = false;
+    // enable Logger
+    static final boolean useLogger = enableLogs && MarlinProperties.isUseLogger();
+
+    // log new RendererContext
+    static final boolean logCreateContext = enableLogs
+        && MarlinProperties.isLogCreateContext();
+    // log misc.Unsafe alloc/realloc/free
+    static final boolean logUnsafeMalloc = enableLogs
+        && MarlinProperties.isLogUnsafeMalloc();
+
+    // do statistics
+    static final boolean doStats = enableLogs && MarlinProperties.isDoStats();
+    // do monitors
+    // disabled to reduce byte-code size a bit...
+    static final boolean doMonitors = enableLogs && false; // MarlinProperties.isDoMonitors();
+    // do checks
+    static final boolean doChecks = false; // MarlinProperties.isDoChecks();
+
+    // do AA range checks: disable when algorithm / code is stable
+    static final boolean DO_AA_RANGE_CHECK = false;
+
+    // enable logs
+    static final boolean doLogWidenArray = enableLogs && false;
+    // enable oversize logs
+    static final boolean doLogOverSize = enableLogs && false;
+    // enable traces
+    static final boolean doTrace = enableLogs && false;
+    // do flush monitors
+    static final boolean doFlushMonitors = true;
+    // use one polling thread to dump statistics/monitors
+    static final boolean useDumpThread = false;
+    // thread dump interval (ms)
+    static final long statDump = 5000L;
+
+    // do clean dirty array
+    static final boolean doCleanDirty = false;
+
+    // flag to use line simplifier
+    static final boolean useSimplifier = MarlinProperties.isUseSimplifier();
+
+    // flag to enable logs related bounds checks
+    static final boolean doLogBounds = enableLogs && false;
+
+    // Initial Array sizing (initial context capacity) ~ 512K
+
+    // 2048 pixel (width x height) for initial capacity
+    static final int INITIAL_PIXEL_DIM
+        = MarlinProperties.getInitialImageSize();
+
+    // typical array sizes: only odd numbers allowed below
+    static final int INITIAL_ARRAY        = 256;
+    static final int INITIAL_SMALL_ARRAY  = 1024;
+    static final int INITIAL_MEDIUM_ARRAY = 4096;
+    static final int INITIAL_LARGE_ARRAY  = 8192;
+    static final int INITIAL_ARRAY_16K    = 16384;
+    static final int INITIAL_ARRAY_32K    = 32768;
+    // alpha row dimension
+    static final int INITIAL_AA_ARRAY     = INITIAL_PIXEL_DIM;
+
+    // initial edges (24 bytes) = 24K [ints] = 96K
+    static final int INITIAL_EDGES_CAPACITY = 4096 * 24; // 6 ints per edges
+
+    // zero value as byte
+    static final byte BYTE_0 = (byte) 0;
+
+    // subpixels expressed as log2
+    public static final int SUBPIXEL_LG_POSITIONS_X
+        = MarlinProperties.getSubPixel_Log2_X();
+    public static final int SUBPIXEL_LG_POSITIONS_Y
+        = MarlinProperties.getSubPixel_Log2_Y();
+
+    // number of subpixels
+    public static final int SUBPIXEL_POSITIONS_X = 1 << (SUBPIXEL_LG_POSITIONS_X);
+    public static final int SUBPIXEL_POSITIONS_Y = 1 << (SUBPIXEL_LG_POSITIONS_Y);
+
+    public static final float NORM_SUBPIXELS
+        = (float)Math.sqrt(( SUBPIXEL_POSITIONS_X * SUBPIXEL_POSITIONS_X
+                           + SUBPIXEL_POSITIONS_Y * SUBPIXEL_POSITIONS_Y)/2.0);
+
+    public static final int MAX_AA_ALPHA
+        = SUBPIXEL_POSITIONS_X * SUBPIXEL_POSITIONS_Y;
+
+    public static final int TILE_SIZE_LG = MarlinProperties.getTileSize_Log2();
+    public static final int TILE_SIZE = 1 << TILE_SIZE_LG; // 32 by default
+
+    public static final int BLOCK_SIZE_LG = MarlinProperties.getBlockSize_Log2();
+    public static final int BLOCK_SIZE    = 1 << BLOCK_SIZE_LG;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinProperties.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.security.AccessController;
+import static sun.java2d.marlin.MarlinUtils.logInfo;
+import sun.security.action.GetPropertyAction;
+
+public final class MarlinProperties {
+
+    private MarlinProperties() {
+        // no-op
+    }
+
+    // marlin system properties
+
+    public static boolean isUseThreadLocal() {
+        return getBoolean("sun.java2d.renderer.useThreadLocal", "true");
+    }
+
+    /**
+     * Return the initial pixel size used to define initial arrays
+     * (tile AA chunk, alpha line, buckets)
+     *
+     * @return 64 < initial pixel size < 32768 (2048 by default)
+     */
+    public static int getInitialImageSize() {
+        return getInteger("sun.java2d.renderer.pixelsize", 2048, 64, 32 * 1024);
+    }
+
+    /**
+     * Return the log(2) corresponding to subpixel on x-axis (
+     *
+     * @return 1 (2 subpixels) < initial pixel size < 4 (256 subpixels)
+     * (3 by default ie 8 subpixels)
+     */
+    public static int getSubPixel_Log2_X() {
+        return getInteger("sun.java2d.renderer.subPixel_log2_X", 3, 1, 8);
+    }
+
+    /**
+     * Return the log(2) corresponding to subpixel on y-axis (
+     *
+     * @return 1 (2 subpixels) < initial pixel size < 8 (256 subpixels)
+     * (3 by default ie 8 subpixels)
+     */
+    public static int getSubPixel_Log2_Y() {
+        return getInteger("sun.java2d.renderer.subPixel_log2_Y", 3, 1, 8);
+    }
+
+    /**
+     * Return the log(2) corresponding to the square tile size in pixels
+     *
+     * @return 3 (8x8 pixels) < tile size < 8 (256x256 pixels)
+     * (5 by default ie 32x32 pixels)
+     */
+    public static int getTileSize_Log2() {
+        return getInteger("sun.java2d.renderer.tileSize_log2", 5, 3, 8);
+    }
+
+    /**
+     * Return the log(2) corresponding to the block size in pixels
+     *
+     * @return 3 (8 pixels) < block size < 8 (256 pixels)
+     * (5 by default ie 32 pixels)
+     */
+    public static int getBlockSize_Log2() {
+        return getInteger("sun.java2d.renderer.blockSize_log2", 5, 3, 8);
+    }
+
+    // RLE / blockFlags settings
+
+    public static boolean isForceRLE() {
+        return getBoolean("sun.java2d.renderer.forceRLE", "false");
+    }
+
+    public static boolean isForceNoRLE() {
+        return getBoolean("sun.java2d.renderer.forceNoRLE", "false");
+    }
+
+    public static boolean isUseTileFlags() {
+        return getBoolean("sun.java2d.renderer.useTileFlags", "true");
+    }
+
+    public static boolean isUseTileFlagsWithHeuristics() {
+        return isUseTileFlags()
+        && getBoolean("sun.java2d.renderer.useTileFlags.useHeuristics", "true");
+    }
+
+    public static int getRLEMinWidth() {
+        return getInteger("sun.java2d.renderer.rleMinWidth", 64, 0, Integer.MAX_VALUE);
+    }
+
+    // optimisation parameters
+
+    public static boolean isUseSimplifier() {
+        return getBoolean("sun.java2d.renderer.useSimplifier", "false");
+    }
+
+    // debugging parameters
+
+    public static boolean isDoStats() {
+        return getBoolean("sun.java2d.renderer.doStats", "false");
+    }
+
+    public static boolean isDoMonitors() {
+        return getBoolean("sun.java2d.renderer.doMonitors", "false");
+    }
+
+    public static boolean isDoChecks() {
+        return getBoolean("sun.java2d.renderer.doChecks", "false");
+    }
+
+    // logging parameters
+
+    public static boolean isUseLogger() {
+        return getBoolean("sun.java2d.renderer.useLogger", "false");
+    }
+
+    public static boolean isLogCreateContext() {
+        return getBoolean("sun.java2d.renderer.logCreateContext", "false");
+    }
+
+    public static boolean isLogUnsafeMalloc() {
+        return getBoolean("sun.java2d.renderer.logUnsafeMalloc", "false");
+    }
+
+    // system property utilities
+    static boolean getBoolean(final String key, final String def) {
+        return Boolean.valueOf(AccessController.doPrivileged(
+                  new GetPropertyAction(key, def)));
+    }
+
+    static int getInteger(final String key, final int def,
+                                 final int min, final int max)
+    {
+        final String property = AccessController.doPrivileged(
+                                    new GetPropertyAction(key));
+
+        int value = def;
+        if (property != null) {
+            try {
+                value = Integer.decode(property);
+            } catch (NumberFormatException e) {
+                logInfo("Invalid integer value for " + key + " = " + property);
+            }
+        }
+
+        // check for invalid values
+        if ((value < min) || (value > max)) {
+            logInfo("Invalid value for " + key + " = " + value
+                    + "; expected value in range[" + min + ", " + max + "] !");
+            value = def;
+        }
+        return value;
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinRenderingEngine.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,1064 @@
+/*
+ * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.awt.BasicStroke;
+import java.awt.Shape;
+import java.awt.geom.AffineTransform;
+import java.awt.geom.Path2D;
+import java.awt.geom.PathIterator;
+import java.lang.ref.Reference;
+import java.security.AccessController;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import static sun.java2d.marlin.MarlinUtils.logInfo;
+import sun.awt.geom.PathConsumer2D;
+import sun.java2d.pipe.AATileGenerator;
+import sun.java2d.pipe.Region;
+import sun.java2d.pipe.RenderingEngine;
+import sun.security.action.GetPropertyAction;
+
+/**
+ * Marlin RendererEngine implementation (derived from Pisces)
+ */
+public class MarlinRenderingEngine extends RenderingEngine
+                                   implements MarlinConst
+{
+    private static enum NormMode {ON_WITH_AA, ON_NO_AA, OFF}
+
+    private static final float MIN_PEN_SIZE = 1f / NORM_SUBPIXELS;
+
+    /**
+     * Public constructor
+     */
+    public MarlinRenderingEngine() {
+        super();
+        logSettings(MarlinRenderingEngine.class.getName());
+    }
+
+    /**
+     * Create a widened path as specified by the parameters.
+     * <p>
+     * The specified {@code src} {@link Shape} is widened according
+     * to the specified attribute parameters as per the
+     * {@link BasicStroke} specification.
+     *
+     * @param src the source path to be widened
+     * @param width the width of the widened path as per {@code BasicStroke}
+     * @param caps the end cap decorations as per {@code BasicStroke}
+     * @param join the segment join decorations as per {@code BasicStroke}
+     * @param miterlimit the miter limit as per {@code BasicStroke}
+     * @param dashes the dash length array as per {@code BasicStroke}
+     * @param dashphase the initial dash phase as per {@code BasicStroke}
+     * @return the widened path stored in a new {@code Shape} object
+     * @since 1.7
+     */
+    @Override
+    public Shape createStrokedShape(Shape src,
+                                    float width,
+                                    int caps,
+                                    int join,
+                                    float miterlimit,
+                                    float dashes[],
+                                    float dashphase)
+    {
+        final RendererContext rdrCtx = getRendererContext();
+        try {
+            // initialize a large copyable Path2D to avoid a lot of array growing:
+            final Path2D.Float p2d =
+                    (rdrCtx.p2d == null) ?
+                    (rdrCtx.p2d = new Path2D.Float(Path2D.WIND_NON_ZERO,
+                                                   INITIAL_MEDIUM_ARRAY))
+                    : rdrCtx.p2d;
+            // reset
+            p2d.reset();
+
+            strokeTo(rdrCtx,
+                     src,
+                     null,
+                     width,
+                     NormMode.OFF,
+                     caps,
+                     join,
+                     miterlimit,
+                     dashes,
+                     dashphase,
+                     rdrCtx.transformerPC2D.wrapPath2d(p2d)
+                    );
+
+            // Use Path2D copy constructor (trim)
+            return new Path2D.Float(p2d);
+
+        } finally {
+            // recycle the RendererContext instance
+            returnRendererContext(rdrCtx);
+        }
+    }
+
+    /**
+     * Sends the geometry for a widened path as specified by the parameters
+     * to the specified consumer.
+     * <p>
+     * The specified {@code src} {@link Shape} is widened according
+     * to the parameters specified by the {@link BasicStroke} object.
+     * Adjustments are made to the path as appropriate for the
+     * {@link VALUE_STROKE_NORMALIZE} hint if the {@code normalize}
+     * boolean parameter is true.
+     * Adjustments are made to the path as appropriate for the
+     * {@link VALUE_ANTIALIAS_ON} hint if the {@code antialias}
+     * boolean parameter is true.
+     * <p>
+     * The geometry of the widened path is forwarded to the indicated
+     * {@link PathConsumer2D} object as it is calculated.
+     *
+     * @param src the source path to be widened
+     * @param bs the {@code BasicSroke} object specifying the
+     *           decorations to be applied to the widened path
+     * @param normalize indicates whether stroke normalization should
+     *                  be applied
+     * @param antialias indicates whether or not adjustments appropriate
+     *                  to antialiased rendering should be applied
+     * @param consumer the {@code PathConsumer2D} instance to forward
+     *                 the widened geometry to
+     * @since 1.7
+     */
+    @Override
+    public void strokeTo(Shape src,
+                         AffineTransform at,
+                         BasicStroke bs,
+                         boolean thin,
+                         boolean normalize,
+                         boolean antialias,
+                         final PathConsumer2D consumer)
+    {
+        final NormMode norm = (normalize) ?
+                ((antialias) ? NormMode.ON_WITH_AA : NormMode.ON_NO_AA)
+                : NormMode.OFF;
+
+        final RendererContext rdrCtx = getRendererContext();
+        try {
+            strokeTo(rdrCtx, src, at, bs, thin, norm, antialias, consumer);
+        } finally {
+            // recycle the RendererContext instance
+            returnRendererContext(rdrCtx);
+        }
+    }
+
+    final void strokeTo(final RendererContext rdrCtx,
+                        Shape src,
+                        AffineTransform at,
+                        BasicStroke bs,
+                        boolean thin,
+                        NormMode normalize,
+                        boolean antialias,
+                        PathConsumer2D pc2d)
+    {
+        float lw;
+        if (thin) {
+            if (antialias) {
+                lw = userSpaceLineWidth(at, MIN_PEN_SIZE);
+            } else {
+                lw = userSpaceLineWidth(at, 1.0f);
+            }
+        } else {
+            lw = bs.getLineWidth();
+        }
+        strokeTo(rdrCtx,
+                 src,
+                 at,
+                 lw,
+                 normalize,
+                 bs.getEndCap(),
+                 bs.getLineJoin(),
+                 bs.getMiterLimit(),
+                 bs.getDashArray(),
+                 bs.getDashPhase(),
+                 pc2d);
+    }
+
+    private final float userSpaceLineWidth(AffineTransform at, float lw) {
+
+        float widthScale;
+
+        if (at == null) {
+            widthScale = 1.0f;
+        } else if ((at.getType() & (AffineTransform.TYPE_GENERAL_TRANSFORM  |
+                                    AffineTransform.TYPE_GENERAL_SCALE)) != 0) {
+            widthScale = (float)Math.sqrt(at.getDeterminant());
+        } else {
+            // First calculate the "maximum scale" of this transform.
+            double A = at.getScaleX();       // m00
+            double C = at.getShearX();       // m01
+            double B = at.getShearY();       // m10
+            double D = at.getScaleY();       // m11
+
+            /*
+             * Given a 2 x 2 affine matrix [ A B ] such that
+             *                             [ C D ]
+             * v' = [x' y'] = [Ax + Cy, Bx + Dy], we want to
+             * find the maximum magnitude (norm) of the vector v'
+             * with the constraint (x^2 + y^2 = 1).
+             * The equation to maximize is
+             *     |v'| = sqrt((Ax+Cy)^2+(Bx+Dy)^2)
+             * or  |v'| = sqrt((AA+BB)x^2 + 2(AC+BD)xy + (CC+DD)y^2).
+             * Since sqrt is monotonic we can maximize |v'|^2
+             * instead and plug in the substitution y = sqrt(1 - x^2).
+             * Trigonometric equalities can then be used to get
+             * rid of most of the sqrt terms.
+             */
+
+            double EA = A*A + B*B;          // x^2 coefficient
+            double EB = 2.0*(A*C + B*D);    // xy coefficient
+            double EC = C*C + D*D;          // y^2 coefficient
+
+            /*
+             * There is a lot of calculus omitted here.
+             *
+             * Conceptually, in the interests of understanding the
+             * terms that the calculus produced we can consider
+             * that EA and EC end up providing the lengths along
+             * the major axes and the hypot term ends up being an
+             * adjustment for the additional length along the off-axis
+             * angle of rotated or sheared ellipses as well as an
+             * adjustment for the fact that the equation below
+             * averages the two major axis lengths.  (Notice that
+             * the hypot term contains a part which resolves to the
+             * difference of these two axis lengths in the absence
+             * of rotation.)
+             *
+             * In the calculus, the ratio of the EB and (EA-EC) terms
+             * ends up being the tangent of 2*theta where theta is
+             * the angle that the long axis of the ellipse makes
+             * with the horizontal axis.  Thus, this equation is
+             * calculating the length of the hypotenuse of a triangle
+             * along that axis.
+             */
+
+            double hypot = Math.sqrt(EB*EB + (EA-EC)*(EA-EC));
+            // sqrt omitted, compare to squared limits below.
+            double widthsquared = ((EA + EC + hypot)/2.0);
+
+            widthScale = (float)Math.sqrt(widthsquared);
+        }
+
+        return (lw / widthScale);
+    }
+
+    final void strokeTo(final RendererContext rdrCtx,
+                        Shape src,
+                        AffineTransform at,
+                        float width,
+                        NormMode normalize,
+                        int caps,
+                        int join,
+                        float miterlimit,
+                        float dashes[],
+                        float dashphase,
+                        PathConsumer2D pc2d)
+    {
+        // We use strokerat and outat so that in Stroker and Dasher we can work only
+        // with the pre-transformation coordinates. This will repeat a lot of
+        // computations done in the path iterator, but the alternative is to
+        // work with transformed paths and compute untransformed coordinates
+        // as needed. This would be faster but I do not think the complexity
+        // of working with both untransformed and transformed coordinates in
+        // the same code is worth it.
+        // However, if a path's width is constant after a transformation,
+        // we can skip all this untransforming.
+
+        // If normalization is off we save some transformations by not
+        // transforming the input to pisces. Instead, we apply the
+        // transformation after the path processing has been done.
+        // We can't do this if normalization is on, because it isn't a good
+        // idea to normalize before the transformation is applied.
+        AffineTransform strokerat = null;
+        AffineTransform outat = null;
+
+        PathIterator pi;
+        int dashLen = -1;
+        boolean recycleDashes = false;
+
+        if (at != null && !at.isIdentity()) {
+            final double a = at.getScaleX();
+            final double b = at.getShearX();
+            final double c = at.getShearY();
+            final double d = at.getScaleY();
+            final double det = a * d - c * b;
+
+            if (Math.abs(det) <= (2f * Float.MIN_VALUE)) {
+                // this rendering engine takes one dimensional curves and turns
+                // them into 2D shapes by giving them width.
+                // However, if everything is to be passed through a singular
+                // transformation, these 2D shapes will be squashed down to 1D
+                // again so, nothing can be drawn.
+
+                // Every path needs an initial moveTo and a pathDone. If these
+                // are not there this causes a SIGSEGV in libawt.so (at the time
+                // of writing of this comment (September 16, 2010)). Actually,
+                // I am not sure if the moveTo is necessary to avoid the SIGSEGV
+                // but the pathDone is definitely needed.
+                pc2d.moveTo(0f, 0f);
+                pc2d.pathDone();
+                return;
+            }
+
+            // If the transform is a constant multiple of an orthogonal transformation
+            // then every length is just multiplied by a constant, so we just
+            // need to transform input paths to stroker and tell stroker
+            // the scaled width. This condition is satisfied if
+            // a*b == -c*d && a*a+c*c == b*b+d*d. In the actual check below, we
+            // leave a bit of room for error.
+            if (nearZero(a*b + c*d) && nearZero(a*a + c*c - (b*b + d*d))) {
+                final float scale = (float) Math.sqrt(a*a + c*c);
+                if (dashes != null) {
+                    recycleDashes = true;
+                    dashLen = dashes.length;
+                    final float[] newDashes;
+                    if (dashLen <= INITIAL_ARRAY) {
+                        newDashes = rdrCtx.dasher.dashes_initial;
+                    } else {
+                        if (doStats) {
+                            RendererContext.stats.stat_array_dasher_dasher
+                                .add(dashLen);
+                        }
+                        newDashes = rdrCtx.getDirtyFloatArray(dashLen);
+                    }
+                    System.arraycopy(dashes, 0, newDashes, 0, dashLen);
+                    dashes = newDashes;
+                    for (int i = 0; i < dashLen; i++) {
+                        dashes[i] = scale * dashes[i];
+                    }
+                    dashphase = scale * dashphase;
+                }
+                width = scale * width;
+                pi = getNormalizingPathIterator(rdrCtx, normalize,
+                                                src.getPathIterator(at));
+
+                // by now strokerat == null && outat == null. Input paths to
+                // stroker (and maybe dasher) will have the full transform at
+                // applied to them and nothing will happen to the output paths.
+            } else {
+                if (normalize != NormMode.OFF) {
+                    strokerat = at;
+                    pi = getNormalizingPathIterator(rdrCtx, normalize,
+                                                    src.getPathIterator(at));
+
+                    // by now strokerat == at && outat == null. Input paths to
+                    // stroker (and maybe dasher) will have the full transform at
+                    // applied to them, then they will be normalized, and then
+                    // the inverse of *only the non translation part of at* will
+                    // be applied to the normalized paths. This won't cause problems
+                    // in stroker, because, suppose at = T*A, where T is just the
+                    // translation part of at, and A is the rest. T*A has already
+                    // been applied to Stroker/Dasher's input. Then Ainv will be
+                    // applied. Ainv*T*A is not equal to T, but it is a translation,
+                    // which means that none of stroker's assumptions about its
+                    // input will be violated. After all this, A will be applied
+                    // to stroker's output.
+                } else {
+                    outat = at;
+                    pi = src.getPathIterator(null);
+                    // outat == at && strokerat == null. This is because if no
+                    // normalization is done, we can just apply all our
+                    // transformations to stroker's output.
+                }
+            }
+        } else {
+            // either at is null or it's the identity. In either case
+            // we don't transform the path.
+            pi = getNormalizingPathIterator(rdrCtx, normalize,
+                                            src.getPathIterator(null));
+        }
+
+        if (useSimplifier) {
+            // Use simplifier after stroker before Renderer
+            // to remove collinear segments (notably due to cap square)
+            pc2d = rdrCtx.simplifier.init(pc2d);
+        }
+
+        // by now, at least one of outat and strokerat will be null. Unless at is not
+        // a constant multiple of an orthogonal transformation, they will both be
+        // null. In other cases, outat == at if normalization is off, and if
+        // normalization is on, strokerat == at.
+        final TransformingPathConsumer2D transformerPC2D = rdrCtx.transformerPC2D;
+        pc2d = transformerPC2D.transformConsumer(pc2d, outat);
+        pc2d = transformerPC2D.deltaTransformConsumer(pc2d, strokerat);
+
+        pc2d = rdrCtx.stroker.init(pc2d, width, caps, join, miterlimit);
+
+        if (dashes != null) {
+            if (!recycleDashes) {
+                dashLen = dashes.length;
+            }
+            pc2d = rdrCtx.dasher.init(pc2d, dashes, dashLen, dashphase,
+                                      recycleDashes);
+        }
+        pc2d = transformerPC2D.inverseDeltaTransformConsumer(pc2d, strokerat);
+        pathTo(rdrCtx, pi, pc2d);
+
+        /*
+         * Pipeline seems to be:
+         *    shape.getPathIterator
+         * -> NormalizingPathIterator
+         * -> inverseDeltaTransformConsumer
+         * -> Dasher
+         * -> Stroker
+         * -> deltaTransformConsumer OR transformConsumer
+         *
+         * -> CollinearSimplifier to remove redundant segments
+         *
+         * -> pc2d = Renderer (bounding box)
+         */
+    }
+
+    private static boolean nearZero(final double num) {
+        return Math.abs(num) < 2.0 * Math.ulp(num);
+    }
+
+    PathIterator getNormalizingPathIterator(final RendererContext rdrCtx,
+                                            final NormMode mode,
+                                            final PathIterator src)
+    {
+        switch (mode) {
+            case ON_WITH_AA:
+                // NormalizingPathIterator NearestPixelCenter:
+                return rdrCtx.nPCPathIterator.init(src);
+            case ON_NO_AA:
+                // NearestPixel NormalizingPathIterator:
+                return rdrCtx.nPQPathIterator.init(src);
+            case OFF:
+                // return original path iterator if normalization is disabled:
+                return src;
+            default:
+                throw new InternalError("Unrecognized normalization mode");
+        }
+    }
+
+    abstract static class NormalizingPathIterator implements PathIterator {
+
+        private PathIterator src;
+
+        // the adjustment applied to the current position.
+        private float curx_adjust, cury_adjust;
+        // the adjustment applied to the last moveTo position.
+        private float movx_adjust, movy_adjust;
+
+        private final float[] tmp;
+
+        NormalizingPathIterator(final float[] tmp) {
+            this.tmp = tmp;
+        }
+
+        final NormalizingPathIterator init(final PathIterator src) {
+            this.src = src;
+            return this; // fluent API
+        }
+
+        /**
+         * Disposes this path iterator:
+         * clean up before reusing this instance
+         */
+        final void dispose() {
+            // free source PathIterator:
+            this.src = null;
+        }
+
+        @Override
+        public final int currentSegment(final float[] coords) {
+            if (doMonitors) {
+                RendererContext.stats.mon_npi_currentSegment.start();
+            }
+            int lastCoord;
+            final int type = src.currentSegment(coords);
+
+            switch(type) {
+                case PathIterator.SEG_MOVETO:
+                case PathIterator.SEG_LINETO:
+                    lastCoord = 0;
+                    break;
+                case PathIterator.SEG_QUADTO:
+                    lastCoord = 2;
+                    break;
+                case PathIterator.SEG_CUBICTO:
+                    lastCoord = 4;
+                    break;
+                case PathIterator.SEG_CLOSE:
+                    // we don't want to deal with this case later. We just exit now
+                    curx_adjust = movx_adjust;
+                    cury_adjust = movy_adjust;
+
+                    if (doMonitors) {
+                        RendererContext.stats.mon_npi_currentSegment.stop();
+                    }
+                    return type;
+                default:
+                    throw new InternalError("Unrecognized curve type");
+            }
+
+            // TODO: handle NaN, Inf and overflow
+
+            // normalize endpoint
+            float coord, x_adjust, y_adjust;
+
+            coord = coords[lastCoord];
+            x_adjust = normCoord(coord); // new coord
+            coords[lastCoord] = x_adjust;
+            x_adjust -= coord;
+
+            coord = coords[lastCoord + 1];
+            y_adjust = normCoord(coord); // new coord
+            coords[lastCoord + 1] = y_adjust;
+            y_adjust -= coord;
+
+            // now that the end points are done, normalize the control points
+            switch(type) {
+                case PathIterator.SEG_MOVETO:
+                    movx_adjust = x_adjust;
+                    movy_adjust = y_adjust;
+                    break;
+                case PathIterator.SEG_LINETO:
+                    break;
+                case PathIterator.SEG_QUADTO:
+                    coords[0] += (curx_adjust + x_adjust) / 2f;
+                    coords[1] += (cury_adjust + y_adjust) / 2f;
+                    break;
+                case PathIterator.SEG_CUBICTO:
+                    coords[0] += curx_adjust;
+                    coords[1] += cury_adjust;
+                    coords[2] += x_adjust;
+                    coords[3] += y_adjust;
+                    break;
+                case PathIterator.SEG_CLOSE:
+                    // handled earlier
+                default:
+            }
+            curx_adjust = x_adjust;
+            cury_adjust = y_adjust;
+
+            if (doMonitors) {
+                RendererContext.stats.mon_npi_currentSegment.stop();
+            }
+            return type;
+        }
+
+        abstract float normCoord(final float coord);
+
+        @Override
+        public final int currentSegment(final double[] coords) {
+            final float[] _tmp = tmp; // dirty
+            int type = this.currentSegment(_tmp);
+            for (int i = 0; i < 6; i++) {
+                coords[i] = _tmp[i];
+            }
+            return type;
+        }
+
+        @Override
+        public final int getWindingRule() {
+            return src.getWindingRule();
+        }
+
+        @Override
+        public final boolean isDone() {
+            if (src.isDone()) {
+                // Dispose this instance:
+                dispose();
+                return true;
+            }
+            return false;
+        }
+
+        @Override
+        public final void next() {
+            src.next();
+        }
+
+        static final class NearestPixelCenter
+                                extends NormalizingPathIterator
+        {
+            NearestPixelCenter(final float[] tmp) {
+                super(tmp);
+            }
+
+            @Override
+            float normCoord(final float coord) {
+                // round to nearest pixel center
+                return FloatMath.floor_f(coord) + 0.5f;
+            }
+        }
+
+        static final class NearestPixelQuarter
+                                extends NormalizingPathIterator
+        {
+            NearestPixelQuarter(final float[] tmp) {
+                super(tmp);
+            }
+
+            @Override
+            float normCoord(final float coord) {
+                // round to nearest (0.25, 0.25) pixel quarter
+                return FloatMath.floor_f(coord + 0.25f) + 0.25f;
+            }
+        }
+    }
+
+    private static void pathTo(final RendererContext rdrCtx, final PathIterator pi,
+                               final PathConsumer2D pc2d)
+    {
+        // mark context as DIRTY:
+        rdrCtx.dirty = true;
+
+        final float[] coords = rdrCtx.float6;
+
+        pathToLoop(coords, pi, pc2d);
+
+        // mark context as CLEAN:
+        rdrCtx.dirty = false;
+    }
+
+    private static void pathToLoop(final float[] coords, final PathIterator pi,
+                                   final PathConsumer2D pc2d)
+    {
+        for (; !pi.isDone(); pi.next()) {
+            switch (pi.currentSegment(coords)) {
+                case PathIterator.SEG_MOVETO:
+                    pc2d.moveTo(coords[0], coords[1]);
+                    continue;
+                case PathIterator.SEG_LINETO:
+                    pc2d.lineTo(coords[0], coords[1]);
+                    continue;
+                case PathIterator.SEG_QUADTO:
+                    pc2d.quadTo(coords[0], coords[1],
+                                coords[2], coords[3]);
+                    continue;
+                case PathIterator.SEG_CUBICTO:
+                    pc2d.curveTo(coords[0], coords[1],
+                                 coords[2], coords[3],
+                                 coords[4], coords[5]);
+                    continue;
+                case PathIterator.SEG_CLOSE:
+                    pc2d.closePath();
+                    continue;
+                default:
+            }
+        }
+        pc2d.pathDone();
+    }
+
+    /**
+     * Construct an antialiased tile generator for the given shape with
+     * the given rendering attributes and store the bounds of the tile
+     * iteration in the bbox parameter.
+     * The {@code at} parameter specifies a transform that should affect
+     * both the shape and the {@code BasicStroke} attributes.
+     * The {@code clip} parameter specifies the current clip in effect
+     * in device coordinates and can be used to prune the data for the
+     * operation, but the renderer is not required to perform any
+     * clipping.
+     * If the {@code BasicStroke} parameter is null then the shape
+     * should be filled as is, otherwise the attributes of the
+     * {@code BasicStroke} should be used to specify a draw operation.
+     * The {@code thin} parameter indicates whether or not the
+     * transformed {@code BasicStroke} represents coordinates smaller
+     * than the minimum resolution of the antialiasing rasterizer as
+     * specified by the {@code getMinimumAAPenWidth()} method.
+     * <p>
+     * Upon returning, this method will fill the {@code bbox} parameter
+     * with 4 values indicating the bounds of the iteration of the
+     * tile generator.
+     * The iteration order of the tiles will be as specified by the
+     * pseudo-code:
+     * <pre>
+     *     for (y = bbox[1]; y < bbox[3]; y += tileheight) {
+     *         for (x = bbox[0]; x < bbox[2]; x += tilewidth) {
+     *         }
+     *     }
+     * </pre>
+     * If there is no output to be rendered, this method may return
+     * null.
+     *
+     * @param s the shape to be rendered (fill or draw)
+     * @param at the transform to be applied to the shape and the
+     *           stroke attributes
+     * @param clip the current clip in effect in device coordinates
+     * @param bs if non-null, a {@code BasicStroke} whose attributes
+     *           should be applied to this operation
+     * @param thin true if the transformed stroke attributes are smaller
+     *             than the minimum dropout pen width
+     * @param normalize true if the {@code VALUE_STROKE_NORMALIZE}
+     *                  {@code RenderingHint} is in effect
+     * @param bbox returns the bounds of the iteration
+     * @return the {@code AATileGenerator} instance to be consulted
+     *         for tile coverages, or null if there is no output to render
+     * @since 1.7
+     */
+    @Override
+    public AATileGenerator getAATileGenerator(Shape s,
+                                              AffineTransform at,
+                                              Region clip,
+                                              BasicStroke bs,
+                                              boolean thin,
+                                              boolean normalize,
+                                              int bbox[])
+    {
+        MarlinTileGenerator ptg = null;
+        Renderer r = null;
+
+        final RendererContext rdrCtx = getRendererContext();
+        try {
+            // Test if at is identity:
+            final AffineTransform _at = (at != null && !at.isIdentity()) ? at
+                                        : null;
+
+            final NormMode norm = (normalize) ? NormMode.ON_WITH_AA : NormMode.OFF;
+
+            if (bs == null) {
+                // fill shape:
+                final PathIterator pi = getNormalizingPathIterator(rdrCtx, norm,
+                                            s.getPathIterator(_at));
+
+                r = rdrCtx.renderer.init(clip.getLoX(), clip.getLoY(),
+                                         clip.getWidth(), clip.getHeight(),
+                                         pi.getWindingRule());
+
+                // TODO: subdivide quad/cubic curves into monotonic curves ?
+                pathTo(rdrCtx, pi, r);
+            } else {
+                // draw shape with given stroke:
+                r = rdrCtx.renderer.init(clip.getLoX(), clip.getLoY(),
+                                         clip.getWidth(), clip.getHeight(),
+                                         PathIterator.WIND_NON_ZERO);
+
+                strokeTo(rdrCtx, s, _at, bs, thin, norm, true, r);
+            }
+            if (r.endRendering()) {
+                ptg = rdrCtx.ptg.init();
+                ptg.getBbox(bbox);
+                // note: do not returnRendererContext(rdrCtx)
+                // as it will be called later by MarlinTileGenerator.dispose()
+                r = null;
+            }
+        } finally {
+            if (r != null) {
+                // dispose renderer:
+                r.dispose();
+                // recycle the RendererContext instance
+                MarlinRenderingEngine.returnRendererContext(rdrCtx);
+            }
+        }
+
+        // Return null to cancel AA tile generation (nothing to render)
+        return ptg;
+    }
+
+    @Override
+    public final AATileGenerator getAATileGenerator(double x, double y,
+                                                    double dx1, double dy1,
+                                                    double dx2, double dy2,
+                                                    double lw1, double lw2,
+                                                    Region clip,
+                                                    int bbox[])
+    {
+        // REMIND: Deal with large coordinates!
+        double ldx1, ldy1, ldx2, ldy2;
+        boolean innerpgram = (lw1 > 0.0 && lw2 > 0.0);
+
+        if (innerpgram) {
+            ldx1 = dx1 * lw1;
+            ldy1 = dy1 * lw1;
+            ldx2 = dx2 * lw2;
+            ldy2 = dy2 * lw2;
+            x -= (ldx1 + ldx2) / 2.0;
+            y -= (ldy1 + ldy2) / 2.0;
+            dx1 += ldx1;
+            dy1 += ldy1;
+            dx2 += ldx2;
+            dy2 += ldy2;
+            if (lw1 > 1.0 && lw2 > 1.0) {
+                // Inner parallelogram was entirely consumed by stroke...
+                innerpgram = false;
+            }
+        } else {
+            ldx1 = ldy1 = ldx2 = ldy2 = 0.0;
+        }
+
+        MarlinTileGenerator ptg = null;
+        Renderer r = null;
+
+        final RendererContext rdrCtx = getRendererContext();
+        try {
+            r = rdrCtx.renderer.init(clip.getLoX(), clip.getLoY(),
+                                         clip.getWidth(), clip.getHeight(),
+                                         Renderer.WIND_EVEN_ODD);
+
+            r.moveTo((float) x, (float) y);
+            r.lineTo((float) (x+dx1), (float) (y+dy1));
+            r.lineTo((float) (x+dx1+dx2), (float) (y+dy1+dy2));
+            r.lineTo((float) (x+dx2), (float) (y+dy2));
+            r.closePath();
+
+            if (innerpgram) {
+                x += ldx1 + ldx2;
+                y += ldy1 + ldy2;
+                dx1 -= 2.0 * ldx1;
+                dy1 -= 2.0 * ldy1;
+                dx2 -= 2.0 * ldx2;
+                dy2 -= 2.0 * ldy2;
+                r.moveTo((float) x, (float) y);
+                r.lineTo((float) (x+dx1), (float) (y+dy1));
+                r.lineTo((float) (x+dx1+dx2), (float) (y+dy1+dy2));
+                r.lineTo((float) (x+dx2), (float) (y+dy2));
+                r.closePath();
+            }
+            r.pathDone();
+
+            if (r.endRendering()) {
+                ptg = rdrCtx.ptg.init();
+                ptg.getBbox(bbox);
+                // note: do not returnRendererContext(rdrCtx)
+                // as it will be called later by MarlinTileGenerator.dispose()
+                r = null;
+            }
+        } finally {
+            if (r != null) {
+                // dispose renderer:
+                r.dispose();
+                // recycle the RendererContext instance
+                MarlinRenderingEngine.returnRendererContext(rdrCtx);
+            }
+        }
+
+        // Return null to cancel AA tile generation (nothing to render)
+        return ptg;
+    }
+
+    /**
+     * Returns the minimum pen width that the antialiasing rasterizer
+     * can represent without dropouts occuring.
+     * @since 1.7
+     */
+    @Override
+    public float getMinimumAAPenSize() {
+        return MIN_PEN_SIZE;
+    }
+
+    static {
+        if (PathIterator.WIND_NON_ZERO != Renderer.WIND_NON_ZERO ||
+            PathIterator.WIND_EVEN_ODD != Renderer.WIND_EVEN_ODD ||
+            BasicStroke.JOIN_MITER != Stroker.JOIN_MITER ||
+            BasicStroke.JOIN_ROUND != Stroker.JOIN_ROUND ||
+            BasicStroke.JOIN_BEVEL != Stroker.JOIN_BEVEL ||
+            BasicStroke.CAP_BUTT != Stroker.CAP_BUTT ||
+            BasicStroke.CAP_ROUND != Stroker.CAP_ROUND ||
+            BasicStroke.CAP_SQUARE != Stroker.CAP_SQUARE)
+        {
+            throw new InternalError("mismatched renderer constants");
+        }
+    }
+
+    // --- RendererContext handling ---
+    // use ThreadLocal or ConcurrentLinkedQueue to get one RendererContext
+    private static final boolean useThreadLocal;
+
+    // hard reference
+    static final int REF_HARD = 0;
+    // soft reference
+    static final int REF_SOFT = 1;
+    // weak reference
+    static final int REF_WEAK = 2;
+
+    // reference type stored in either TL or CLQ
+    static final int REF_TYPE;
+
+    // Per-thread RendererContext
+    private static final ThreadLocal<Object> rdrCtxThreadLocal;
+    // RendererContext queue when ThreadLocal is disabled
+    private static final ConcurrentLinkedQueue<Object> rdrCtxQueue;
+
+    // Static initializer to use TL or CLQ mode
+    static {
+        // CLQ mode by default:
+        useThreadLocal = MarlinProperties.isUseThreadLocal();
+        rdrCtxThreadLocal = (useThreadLocal) ? new ThreadLocal<Object>()
+                                             : null;
+        rdrCtxQueue = (!useThreadLocal) ? new ConcurrentLinkedQueue<Object>()
+                                        : null;
+
+        // Soft reference by default:
+        String refType = AccessController.doPrivileged(
+                            new GetPropertyAction("sun.java2d.renderer.useRef",
+                            "soft"));
+        switch (refType) {
+            default:
+            case "soft":
+                REF_TYPE = REF_SOFT;
+                break;
+            case "weak":
+                REF_TYPE = REF_WEAK;
+                break;
+            case "hard":
+                REF_TYPE = REF_HARD;
+                break;
+        }
+    }
+
+    private static boolean settingsLogged = !enableLogs;
+
+    private static void logSettings(final String reClass) {
+        // log information at startup
+        if (settingsLogged) {
+            return;
+        }
+        settingsLogged = true;
+
+        String refType;
+        switch (REF_TYPE) {
+            default:
+            case REF_HARD:
+                refType = "hard";
+                break;
+            case REF_SOFT:
+                refType = "soft";
+                break;
+            case REF_WEAK:
+                refType = "weak";
+                break;
+        }
+
+        logInfo("=========================================================="
+                + "=====================");
+
+        logInfo("Marlin software rasterizer           = ENABLED");
+        logInfo("Version                              = ["
+                + Version.getVersion() + "]");
+        logInfo("sun.java2d.renderer                  = "
+                + reClass);
+        logInfo("sun.java2d.renderer.useThreadLocal   = "
+                + useThreadLocal);
+        logInfo("sun.java2d.renderer.useRef           = "
+                + refType);
+
+        logInfo("sun.java2d.renderer.pixelsize        = "
+                + MarlinConst.INITIAL_PIXEL_DIM);
+        logInfo("sun.java2d.renderer.subPixel_log2_X  = "
+                + MarlinConst.SUBPIXEL_LG_POSITIONS_X);
+        logInfo("sun.java2d.renderer.subPixel_log2_Y  = "
+                + MarlinConst.SUBPIXEL_LG_POSITIONS_Y);
+        logInfo("sun.java2d.renderer.tileSize_log2    = "
+                + MarlinConst.TILE_SIZE_LG);
+
+        logInfo("sun.java2d.renderer.blockSize_log2   = "
+                + MarlinConst.BLOCK_SIZE_LG);
+
+        logInfo("sun.java2d.renderer.blockSize_log2   = "
+                + MarlinConst.BLOCK_SIZE_LG);
+
+        // RLE / blockFlags settings
+
+        logInfo("sun.java2d.renderer.forceRLE         = "
+                + MarlinProperties.isForceRLE());
+        logInfo("sun.java2d.renderer.forceNoRLE       = "
+                + MarlinProperties.isForceNoRLE());
+        logInfo("sun.java2d.renderer.useTileFlags     = "
+                + MarlinProperties.isUseTileFlags());
+        logInfo("sun.java2d.renderer.useTileFlags.useHeuristics = "
+                + MarlinProperties.isUseTileFlagsWithHeuristics());
+        logInfo("sun.java2d.renderer.rleMinWidth      = "
+                + MarlinCache.RLE_MIN_WIDTH);
+
+        // optimisation parameters
+        logInfo("sun.java2d.renderer.useSimplifier    = "
+                + MarlinConst.useSimplifier);
+
+        // debugging parameters
+        logInfo("sun.java2d.renderer.doStats          = "
+                + MarlinConst.doStats);
+        logInfo("sun.java2d.renderer.doMonitors       = "
+                + MarlinConst.doMonitors);
+        logInfo("sun.java2d.renderer.doChecks         = "
+                + MarlinConst.doChecks);
+
+        // logging parameters
+        logInfo("sun.java2d.renderer.useLogger        = "
+                + MarlinConst.useLogger);
+        logInfo("sun.java2d.renderer.logCreateContext = "
+                + MarlinConst.logCreateContext);
+        logInfo("sun.java2d.renderer.logUnsafeMalloc  = "
+                + MarlinConst.logUnsafeMalloc);
+
+        // quality settings
+        logInfo("Renderer settings:");
+        logInfo("CUB_COUNT_LG = " + Renderer.CUB_COUNT_LG);
+        logInfo("CUB_DEC_BND  = " + Renderer.CUB_DEC_BND);
+        logInfo("CUB_INC_BND  = " + Renderer.CUB_INC_BND);
+        logInfo("QUAD_DEC_BND = " + Renderer.QUAD_DEC_BND);
+
+        logInfo("=========================================================="
+                + "=====================");
+    }
+
+    /**
+     * Get the RendererContext instance dedicated to the current thread
+     * @return RendererContext instance
+     */
+    @SuppressWarnings({"unchecked"})
+    static RendererContext getRendererContext() {
+        RendererContext rdrCtx = null;
+        final Object ref = (useThreadLocal) ? rdrCtxThreadLocal.get()
+                           : rdrCtxQueue.poll();
+        if (ref != null) {
+            // resolve reference:
+            rdrCtx = (REF_TYPE == REF_HARD) ? ((RendererContext) ref)
+                     : ((Reference<RendererContext>) ref).get();
+        }
+        // create a new RendererContext if none is available
+        if (rdrCtx == null) {
+            rdrCtx = RendererContext.createContext();
+            if (useThreadLocal) {
+                // update thread local reference:
+                rdrCtxThreadLocal.set(rdrCtx.reference);
+            }
+        }
+        if (doMonitors) {
+            RendererContext.stats.mon_pre_getAATileGenerator.start();
+        }
+        return rdrCtx;
+    }
+
+    /**
+     * Reset and return the given RendererContext instance for reuse
+     * @param rdrCtx RendererContext instance
+     */
+    static void returnRendererContext(final RendererContext rdrCtx) {
+        rdrCtx.dispose();
+
+        if (doMonitors) {
+            RendererContext.stats.mon_pre_getAATileGenerator.stop();
+        }
+        if (!useThreadLocal) {
+            rdrCtxQueue.offer(rdrCtx.reference);
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinTileGenerator.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,465 @@
+/*
+ * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import sun.java2d.pipe.AATileGenerator;
+import jdk.internal.misc.Unsafe;
+
+final class MarlinTileGenerator implements AATileGenerator, MarlinConst {
+
+    private static final int MAX_TILE_ALPHA_SUM = TILE_SIZE * TILE_SIZE
+                                                      * MAX_AA_ALPHA;
+
+    private final Renderer rdr;
+    private final MarlinCache cache;
+    private int x, y;
+
+    MarlinTileGenerator(Renderer r) {
+        this.rdr = r;
+        this.cache = r.cache;
+    }
+
+    MarlinTileGenerator init() {
+        this.x = cache.bboxX0;
+        this.y = cache.bboxY0;
+
+        return this; // fluent API
+    }
+
+    /**
+     * Disposes this tile generator:
+     * clean up before reusing this instance
+     */
+    @Override
+    public void dispose() {
+        if (doMonitors) {
+            // called from AAShapePipe.renderTiles() (render tiles end):
+            RendererContext.stats.mon_pipe_renderTiles.stop();
+        }
+        // dispose cache:
+        cache.dispose();
+        // dispose renderer:
+        rdr.dispose();
+        // recycle the RendererContext instance
+        MarlinRenderingEngine.returnRendererContext(rdr.rdrCtx);
+    }
+
+    void getBbox(int bbox[]) {
+        bbox[0] = cache.bboxX0;
+        bbox[1] = cache.bboxY0;
+        bbox[2] = cache.bboxX1;
+        bbox[3] = cache.bboxY1;
+    }
+
+    /**
+     * Gets the width of the tiles that the generator batches output into.
+     * @return the width of the standard alpha tile
+     */
+    @Override
+    public int getTileWidth() {
+        if (doMonitors) {
+            // called from AAShapePipe.renderTiles() (render tiles start):
+            RendererContext.stats.mon_pipe_renderTiles.start();
+        }
+        return TILE_SIZE;
+    }
+
+    /**
+     * Gets the height of the tiles that the generator batches output into.
+     * @return the height of the standard alpha tile
+     */
+    @Override
+    public int getTileHeight() {
+        return TILE_SIZE;
+    }
+
+    /**
+     * Gets the typical alpha value that will characterize the current
+     * tile.
+     * The answer may be 0x00 to indicate that the current tile has
+     * no coverage in any of its pixels, or it may be 0xff to indicate
+     * that the current tile is completely covered by the path, or any
+     * other value to indicate non-trivial coverage cases.
+     * @return 0x00 for no coverage, 0xff for total coverage, or any other
+     *         value for partial coverage of the tile
+     */
+    @Override
+    public int getTypicalAlpha() {
+        int al = cache.alphaSumInTile(x);
+        // Note: if we have a filled rectangle that doesn't end on a tile
+        // border, we could still return 0xff, even though al!=maxTileAlphaSum
+        // This is because if we return 0xff, our users will fill a rectangle
+        // starting at x,y that has width = Math.min(TILE_SIZE, bboxX1-x),
+        // and height min(TILE_SIZE,bboxY1-y), which is what should happen.
+        // However, to support this, we would have to use 2 Math.min's
+        // and 2 multiplications per tile, instead of just 2 multiplications
+        // to compute maxTileAlphaSum. The savings offered would probably
+        // not be worth it, considering how rare this case is.
+        // Note: I have not tested this, so in the future if it is determined
+        // that it is worth it, it should be implemented. Perhaps this method's
+        // interface should be changed to take arguments the width and height
+        // of the current tile. This would eliminate the 2 Math.min calls that
+        // would be needed here, since our caller needs to compute these 2
+        // values anyway.
+        final int alpha = (al == 0x00 ? 0x00
+                              : (al == MAX_TILE_ALPHA_SUM ? 0xff : 0x80));
+        if (doStats) {
+            RendererContext.stats.hist_tile_generator_alpha.add(alpha);
+        }
+        return alpha;
+    }
+
+    /**
+     * Skips the current tile and moves on to the next tile.
+     * Either this method, or the getAlpha() method should be called
+     * once per tile, but not both.
+     */
+    @Override
+    public void nextTile() {
+        if ((x += TILE_SIZE) >= cache.bboxX1) {
+            x = cache.bboxX0;
+            y += TILE_SIZE;
+
+            if (y < cache.bboxY1) {
+                // compute for the tile line
+                // [ y; max(y + TILE_SIZE, bboxY1) ]
+                this.rdr.endRendering(y);
+            }
+        }
+    }
+
+    /**
+     * Gets the alpha coverage values for the current tile.
+     * Either this method, or the nextTile() method should be called
+     * once per tile, but not both.
+     */
+    @Override
+    public void getAlpha(final byte tile[], final int offset,
+                                            final int rowstride)
+    {
+        if (cache.useRLE) {
+            getAlphaRLE(tile, offset, rowstride);
+        } else {
+            getAlphaNoRLE(tile, offset, rowstride);
+        }
+    }
+
+    /**
+     * Gets the alpha coverage values for the current tile.
+     * Either this method, or the nextTile() method should be called
+     * once per tile, but not both.
+     */
+    private void getAlphaNoRLE(final byte tile[], final int offset,
+                               final int rowstride)
+    {
+        if (doMonitors) {
+            RendererContext.stats.mon_ptg_getAlpha.start();
+        }
+
+        // local vars for performance:
+        final MarlinCache _cache = this.cache;
+        final long[] rowAAChunkIndex = _cache.rowAAChunkIndex;
+        final int[] rowAAx0 = _cache.rowAAx0;
+        final int[] rowAAx1 = _cache.rowAAx1;
+
+        final int x0 = this.x;
+        final int x1 = FloatMath.min(x0 + TILE_SIZE, _cache.bboxX1);
+
+        // note: process tile line [0 - 32[
+        final int y0 = 0;
+        final int y1 = FloatMath.min(this.y + TILE_SIZE, _cache.bboxY1) - this.y;
+
+        if (doLogBounds) {
+            MarlinUtils.logInfo("getAlpha = [" + x0 + " ... " + x1
+                                + "[ [" + y0 + " ... " + y1 + "[");
+        }
+
+        final Unsafe _unsafe = OffHeapArray.unsafe;
+        final long SIZE = 1L;
+        final long addr_rowAA = _cache.rowAAChunk.address;
+        long addr;
+
+        final int skipRowPixels = (rowstride - (x1 - x0));
+
+        int aax0, aax1, end;
+        int idx = offset;
+
+        for (int cy = y0, cx; cy < y1; cy++) {
+            // empty line (default)
+            cx = x0;
+
+            aax1 = rowAAx1[cy]; // exclusive
+
+            // quick check if there is AA data
+            // corresponding to this tile [x0; x1[
+            if (aax1 > x0) {
+                aax0 = rowAAx0[cy]; // inclusive
+
+                if (aax0 < x1) {
+                    // note: cx is the cursor pointer in the tile array
+                    // (left to right)
+                    cx = aax0;
+
+                    // ensure cx >= x0
+                    if (cx <= x0) {
+                        cx = x0;
+                    } else {
+                        // fill line start until first AA pixel rowAA exclusive:
+                        for (end = x0; end < cx; end++) {
+                            tile[idx++] = 0;
+                        }
+                    }
+
+                    // now: cx >= x0 but cx < aax0 (x1 < aax0)
+
+                    // Copy AA data (sum alpha data):
+                    addr = addr_rowAA + rowAAChunkIndex[cy] + (cx - aax0);
+
+                    for (end = (aax1 <= x1) ? aax1 : x1; cx < end; cx++) {
+                        // cx inside tile[x0; x1[ :
+                        tile[idx++] = _unsafe.getByte(addr); // [0..255]
+                        addr += SIZE;
+                    }
+                }
+            }
+
+            // fill line end
+            while (cx < x1) {
+                tile[idx++] = 0;
+                cx++;
+            }
+
+            if (doTrace) {
+                for (int i = idx - (x1 - x0); i < idx; i++) {
+                    System.out.print(hex(tile[i], 2));
+                }
+                System.out.println();
+            }
+
+            idx += skipRowPixels;
+        }
+
+        nextTile();
+
+        if (doMonitors) {
+            RendererContext.stats.mon_ptg_getAlpha.stop();
+        }
+    }
+
+    /**
+     * Gets the alpha coverage values for the current tile.
+     * Either this method, or the nextTile() method should be called
+     * once per tile, but not both.
+     */
+    private void getAlphaRLE(final byte tile[], final int offset,
+                             final int rowstride)
+    {
+        if (doMonitors) {
+            RendererContext.stats.mon_ptg_getAlpha.start();
+        }
+
+        // Decode run-length encoded alpha mask data
+        // The data for row j begins at cache.rowOffsetsRLE[j]
+        // and is encoded as a set of 2-byte pairs (val, runLen)
+        // terminated by a (0, 0) pair.
+
+        // local vars for performance:
+        final MarlinCache _cache = this.cache;
+        final long[] rowAAChunkIndex = _cache.rowAAChunkIndex;
+        final int[] rowAAx0 = _cache.rowAAx0;
+        final int[] rowAAx1 = _cache.rowAAx1;
+        final int[] rowAAEnc = _cache.rowAAEnc;
+        final long[] rowAALen = _cache.rowAALen;
+        final long[] rowAAPos = _cache.rowAAPos;
+
+        final int x0 = this.x;
+        final int x1 = FloatMath.min(x0 + TILE_SIZE, _cache.bboxX1);
+
+        // note: process tile line [0 - 32[
+        final int y0 = 0;
+        final int y1 = FloatMath.min(this.y + TILE_SIZE, _cache.bboxY1) - this.y;
+
+        if (doLogBounds) {
+            MarlinUtils.logInfo("getAlpha = [" + x0 + " ... " + x1
+                                + "[ [" + y0 + " ... " + y1 + "[");
+        }
+
+        final Unsafe _unsafe = OffHeapArray.unsafe;
+        final long SIZE_BYTE = 1L;
+        final long SIZE_INT = 4L;
+        final long addr_rowAA = _cache.rowAAChunk.address;
+        long addr, addr_row, last_addr, addr_end;
+
+        final int skipRowPixels = (rowstride - (x1 - x0));
+
+        int cx, cy, cx1;
+        int rx0, rx1, runLen, end;
+        int packed;
+        byte val;
+        int idx = offset;
+
+        for (cy = y0; cy < y1; cy++) {
+            // empty line (default)
+            cx = x0;
+
+            if (rowAAEnc[cy] == 0) {
+                // Raw encoding:
+
+                final int aax1 = rowAAx1[cy]; // exclusive
+
+                // quick check if there is AA data
+                // corresponding to this tile [x0; x1[
+                if (aax1 > x0) {
+                    final int aax0 = rowAAx0[cy]; // inclusive
+
+                    if (aax0 < x1) {
+                        // note: cx is the cursor pointer in the tile array
+                        // (left to right)
+                        cx = aax0;
+
+                        // ensure cx >= x0
+                        if (cx <= x0) {
+                            cx = x0;
+                        } else {
+                            // fill line start until first AA pixel rowAA exclusive:
+                            for (end = x0; end < cx; end++) {
+                                tile[idx++] = 0;
+                            }
+                        }
+
+                        // now: cx >= x0 but cx < aax0 (x1 < aax0)
+
+                        // Copy AA data (sum alpha data):
+                        addr = addr_rowAA + rowAAChunkIndex[cy] + (cx - aax0);
+
+                        for (end = (aax1 <= x1) ? aax1 : x1; cx < end; cx++) {
+                            tile[idx++] = _unsafe.getByte(addr); // [0..255]
+                            addr += SIZE_BYTE;
+                        }
+                    }
+                }
+            } else {
+                // RLE encoding:
+
+                // quick check if there is AA data
+                // corresponding to this tile [x0; x1[
+                if (rowAAx1[cy] > x0) { // last pixel exclusive
+
+                    cx = rowAAx0[cy]; // inclusive
+                    if (cx > x1) {
+                        cx = x1;
+                    }
+
+                    // fill line start until first AA pixel rowAA exclusive:
+                    for (int i = x0; i < cx; i++) {
+                        tile[idx++] = 0;
+                    }
+
+                    // get row address:
+                    addr_row = addr_rowAA + rowAAChunkIndex[cy];
+                    // get row end address:
+                    addr_end = addr_row + rowAALen[cy]; // coded length
+
+                    // reuse previous iteration position:
+                    addr = addr_row + rowAAPos[cy];
+
+                    last_addr = 0L;
+
+                    while ((cx < x1) && (addr < addr_end)) {
+                        // keep current position:
+                        last_addr = addr;
+
+                        // packed value:
+                        packed = _unsafe.getInt(addr);
+
+                        // last exclusive pixel x-coordinate:
+                        cx1 = (packed >> 8);
+                        // as bytes:
+                        addr += SIZE_INT;
+
+                        rx0 = cx;
+                        if (rx0 < x0) {
+                            rx0 = x0;
+                        }
+                        rx1 = cx = cx1;
+                        if (rx1 > x1) {
+                            rx1 = x1;
+                            cx  = x1; // fix last x
+                        }
+                        // adjust runLen:
+                        runLen = rx1 - rx0;
+
+                        // ensure rx1 > rx0:
+                        if (runLen > 0) {
+                            val = (byte)(packed & 0xFF); // [0..255]
+
+                            do {
+                                tile[idx++] = val;
+                            } while (--runLen > 0);
+                        }
+                    }
+
+                    // Update last position in RLE entries:
+                    if (last_addr != 0L) {
+                        // Fix x0:
+                        rowAAx0[cy]  = cx; // inclusive
+                        // Fix position:
+                        rowAAPos[cy] = (last_addr - addr_row);
+                    }
+                }
+            }
+
+            // fill line end
+            while (cx < x1) {
+                tile[idx++] = 0;
+                cx++;
+            }
+
+            if (doTrace) {
+                for (int i = idx - (x1 - x0); i < idx; i++) {
+                    System.out.print(hex(tile[i], 2));
+                }
+                System.out.println();
+            }
+
+            idx += skipRowPixels;
+        }
+
+        nextTile();
+
+        if (doMonitors) {
+            RendererContext.stats.mon_ptg_getAlpha.stop();
+        }
+    }
+
+    static String hex(int v, int d) {
+        String s = Integer.toHexString(v);
+        while (s.length() < d) {
+            s = "0" + s;
+        }
+        return s.substring(0, d);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinUtils.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import jdk.internal.misc.JavaLangAccess;
+import jdk.internal.misc.SharedSecrets;
+
+public final class MarlinUtils {
+    // TODO: use sun.util.logging.PlatformLogger once in JDK9
+    private static final java.util.logging.Logger log;
+
+    static {
+        if (MarlinConst.useLogger) {
+            log = java.util.logging.Logger.getLogger("sun.java2d.marlin");
+        } else {
+            log = null;
+        }
+    }
+
+    private MarlinUtils() {
+        // no-op
+    }
+
+    public static void logInfo(final String msg) {
+        if (MarlinConst.useLogger) {
+            log.info(msg);
+        } else if (MarlinConst.enableLogs) {
+            System.out.print("INFO: ");
+            System.out.println(msg);
+        }
+    }
+
+    public static void logException(final String msg, final Throwable th) {
+        if (MarlinConst.useLogger) {
+//            log.warning(msg, th);
+            log.log(java.util.logging.Level.WARNING, msg, th);
+        } else if (MarlinConst.enableLogs) {
+            System.out.print("WARNING: ");
+            System.out.println(msg);
+            th.printStackTrace(System.err);
+        }
+    }
+
+    // Returns the caller's class and method's name; best effort
+    // if cannot infer, return the logger's name.
+    static String getCallerInfo(String className) {
+        String sourceClassName = null;
+        String sourceMethodName = null;
+
+        JavaLangAccess access = SharedSecrets.getJavaLangAccess();
+        Throwable throwable = new Throwable();
+        int depth = access.getStackTraceDepth(throwable);
+
+        boolean lookingForClassName = true;
+        for (int ix = 0; ix < depth; ix++) {
+            // Calling getStackTraceElement directly prevents the VM
+            // from paying the cost of building the entire stack frame.
+            StackTraceElement frame = access.getStackTraceElement(throwable, ix);
+            String cname = frame.getClassName();
+            if (lookingForClassName) {
+                // Skip all frames until we have found the first frame having the class name.
+                if (cname.equals(className)) {
+                    lookingForClassName = false;
+                }
+            } else {
+                if (!cname.equals(className)) {
+                    // We've found the relevant frame.
+                    sourceClassName = cname;
+                    sourceMethodName = frame.getMethodName();
+                    break;
+                }
+            }
+        }
+
+        if (sourceClassName != null) {
+            return sourceClassName + " " + sourceMethodName;
+        } else {
+            return "unknown";
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MergeSort.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+/**
+ * MergeSort adapted from (OpenJDK 8) java.util.Array.legacyMergeSort(Object[])
+ * to swap two arrays at the same time (x & y)
+ * and use external auxiliary storage for temporary arrays
+ */
+final class MergeSort {
+
+    // insertion sort threshold
+    public static final int INSERTION_SORT_THRESHOLD = 14;
+
+    /**
+     * Modified merge sort:
+     * Input arrays are in both auxX/auxY (sorted: 0 to insertionSortIndex)
+     *                     and x/y (unsorted: insertionSortIndex to toIndex)
+     * Outputs are stored in x/y arrays
+     */
+    static void mergeSortNoCopy(final int[] x, final int[] y,
+                                final int[] auxX, final int[] auxY,
+                                final int toIndex,
+                                final int insertionSortIndex)
+    {
+        if ((toIndex > x.length) || (toIndex > y.length)
+                || (toIndex > auxX.length) || (toIndex > auxY.length)) {
+            // explicit check to avoid bound checks within hot loops (below):
+            throw new ArrayIndexOutOfBoundsException("bad arguments: toIndex="
+                                                     + toIndex);
+        }
+
+        // sort second part only using merge / insertion sort
+        // in auxiliary storage (auxX/auxY)
+        mergeSort(x, y, x, auxX, y, auxY, insertionSortIndex, toIndex);
+
+        // final pass to merge both
+        // Merge sorted parts (auxX/auxY) into x/y arrays
+        if ((insertionSortIndex == 0)
+            || (auxX[insertionSortIndex - 1] <= auxX[insertionSortIndex])) {
+//            System.out.println("mergeSortNoCopy: ordered");
+            // 34 occurences
+            // no initial left part or both sublists (auxX, auxY) are sorted:
+            // copy back data into (x, y):
+            System.arraycopy(auxX, 0, x, 0, toIndex);
+            System.arraycopy(auxY, 0, y, 0, toIndex);
+            return;
+        }
+
+        for (int i = 0, p = 0, q = insertionSortIndex; i < toIndex; i++) {
+            if ((q >= toIndex) || ((p < insertionSortIndex)
+                                   && (auxX[p] <= auxX[q]))) {
+                x[i] = auxX[p];
+                y[i] = auxY[p];
+                p++;
+            } else {
+                x[i] = auxX[q];
+                y[i] = auxY[q];
+                q++;
+            }
+        }
+    }
+
+    /**
+     * Src is the source array that starts at index 0
+     * Dest is the (possibly larger) array destination with a possible offset
+     * low is the index in dest to start sorting
+     * high is the end index in dest to end sorting
+     */
+    private static void mergeSort(final int[] refX, final int[] refY,
+                                  final int[] srcX, final int[] dstX,
+                                  final int[] srcY, final int[] dstY,
+                                  final int low, final int high)
+    {
+        final int length = high - low;
+
+        /*
+         * Tuning parameter: list size at or below which insertion sort
+         * will be used in preference to mergesort.
+         */
+        if (length <= INSERTION_SORT_THRESHOLD) {
+            // Insertion sort on smallest arrays
+            dstX[low] = refX[low];
+            dstY[low] = refY[low];
+
+            for (int i = low + 1, j = low, x, y; i < high; j = i++) {
+                x = refX[i];
+                y = refY[i];
+
+                while (dstX[j] > x) {
+                    // swap element
+                    dstX[j + 1] = dstX[j];
+                    dstY[j + 1] = dstY[j];
+                    if (j-- == low) {
+                        break;
+                    }
+                }
+                dstX[j + 1] = x;
+                dstY[j + 1] = y;
+            }
+            return;
+        }
+
+        // Recursively sort halves of dest into src
+
+        // note: use signed shift (not >>>) for performance
+        // as indices are small enough to exceed Integer.MAX_VALUE
+        final int mid = (low + high) >> 1;
+
+        mergeSort(refX, refY, dstX, srcX, dstY, srcY, low, mid);
+        mergeSort(refX, refY, dstX, srcX, dstY, srcY, mid, high);
+
+        // If arrays are inverted ie all(A) > all(B) do swap A and B to dst
+        if (srcX[high - 1] <= srcX[low]) {
+//            System.out.println("mergeSort: inverse ordered");
+            // 1561 occurences
+            final int left = mid - low;
+            final int right = high - mid;
+            final int off = (left != right) ? 1 : 0;
+            // swap parts:
+            System.arraycopy(srcX, low, dstX, mid + off, left);
+            System.arraycopy(srcX, mid, dstX, low, right);
+            System.arraycopy(srcY, low, dstY, mid + off, left);
+            System.arraycopy(srcY, mid, dstY, low, right);
+            return;
+        }
+
+        // If arrays are already sorted, just copy from src to dest.  This is an
+        // optimization that results in faster sorts for nearly ordered lists.
+        if (srcX[mid - 1] <= srcX[mid]) {
+//            System.out.println("mergeSort: ordered");
+            // 14 occurences
+            System.arraycopy(srcX, low, dstX, low, length);
+            System.arraycopy(srcY, low, dstY, low, length);
+            return;
+        }
+
+        // Merge sorted halves (now in src) into dest
+        for (int i = low, p = low, q = mid; i < high; i++) {
+            if ((q >= high) || ((p < mid) && (srcX[p] <= srcX[q]))) {
+                dstX[i] = srcX[p];
+                dstY[i] = srcY[p];
+                p++;
+            } else {
+                dstX[i] = srcX[q];
+                dstY[i] = srcY[q];
+                q++;
+            }
+        }
+    }
+
+    private MergeSort() {
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/OffHeapArray.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.lang.ref.PhantomReference;
+import java.lang.ref.ReferenceQueue;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.Vector;
+import static sun.java2d.marlin.MarlinConst.logUnsafeMalloc;
+import sun.awt.util.ThreadGroupUtils;
+import jdk.internal.misc.Unsafe;
+
+/**
+ *
+ * @author bourgesl
+ */
+final class OffHeapArray  {
+
+    // unsafe reference
+    static final Unsafe unsafe;
+    // size of int / float
+    static final int SIZE_INT;
+
+    // RendererContext reference queue
+    private static final ReferenceQueue<Object> rdrQueue
+        = new ReferenceQueue<Object>();
+    // reference list
+    private static final Vector<OffHeapReference> refList
+        = new Vector<OffHeapReference>(32);
+
+    static {
+        unsafe   = Unsafe.getUnsafe();
+        SIZE_INT = Unsafe.ARRAY_INT_INDEX_SCALE;
+
+        // Mimics Java2D Disposer:
+        AccessController.doPrivileged(
+            (PrivilegedAction<Void>) () -> {
+                /*
+                 * The thread must be a member of a thread group
+                 * which will not get GCed before VM exit.
+                 * Make its parent the top-level thread group.
+                 */
+                final ThreadGroup rootTG
+                    = ThreadGroupUtils.getRootThreadGroup();
+                final Thread t = new Thread(rootTG, new OffHeapDisposer(),
+                    "MarlinRenderer Disposer");
+                t.setContextClassLoader(null);
+                t.setDaemon(true);
+                t.setPriority(Thread.MAX_PRIORITY);
+                t.start();
+                return null;
+            }
+        );
+    }
+
+    /* members */
+    long address;
+    long length;
+    int  used;
+
+    OffHeapArray(final Object parent, final long len) {
+        // note: may throw OOME:
+        this.address = unsafe.allocateMemory(len);
+        this.length  = len;
+        this.used    = 0;
+        if (logUnsafeMalloc) {
+            MarlinUtils.logInfo(System.currentTimeMillis()
+                                + ": OffHeapArray.allocateMemory = "
+                                + len + " to addr = " + this.address);
+        }
+
+        // Create the phantom reference to ensure freeing off-heap memory:
+        refList.add(new OffHeapReference(parent, this));
+    }
+
+    /*
+     * As realloc may change the address, updating address is MANDATORY
+     * @param len new array length
+     * @throws OutOfMemoryError if the allocation is refused by the system
+     */
+    void resize(final long len) {
+        // note: may throw OOME:
+        this.address = unsafe.reallocateMemory(address, len);
+        this.length  = len;
+        if (logUnsafeMalloc) {
+            MarlinUtils.logInfo(System.currentTimeMillis()
+                                + ": OffHeapArray.reallocateMemory = "
+                                + len + " to addr = " + this.address);
+        }
+    }
+
+    void free() {
+        unsafe.freeMemory(this.address);
+        if (logUnsafeMalloc) {
+            MarlinUtils.logInfo(System.currentTimeMillis()
+                                + ": OffHeapEdgeArray.free = "
+                                + this.length
+                                + " at addr = " + this.address);
+        }
+    }
+
+    void fill(final byte val) {
+        unsafe.setMemory(this.address, this.length, val);
+    }
+
+    static final class OffHeapReference extends PhantomReference<Object> {
+
+        private final OffHeapArray array;
+
+        OffHeapReference(final Object parent, final OffHeapArray edges) {
+            super(parent, rdrQueue);
+            this.array = edges;
+        }
+
+        void dispose() {
+            // free off-heap blocks
+            this.array.free();
+        }
+    }
+
+    static final class OffHeapDisposer implements Runnable {
+        @Override
+        public void run() {
+            final Thread currentThread = Thread.currentThread();
+            OffHeapReference ref;
+
+            // check interrupted:
+            for (; !currentThread.isInterrupted();) {
+                try {
+                    ref = (OffHeapReference)rdrQueue.remove();
+                    ref.dispose();
+
+                    refList.remove(ref);
+
+                } catch (InterruptedException ie) {
+                    MarlinUtils.logException("OffHeapDisposer interrupted:",
+                                             ie);
+                }
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Renderer.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,1546 @@
+/*
+ * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.util.Arrays;
+import sun.awt.geom.PathConsumer2D;
+import static sun.java2d.marlin.OffHeapArray.SIZE_INT;
+import jdk.internal.misc.Unsafe;
+
+final class Renderer implements PathConsumer2D, MarlinConst {
+
+    static final boolean DISABLE_RENDER = false;
+
+    static final boolean ENABLE_BLOCK_FLAGS = MarlinProperties.isUseTileFlags();
+    static final boolean ENABLE_BLOCK_FLAGS_HEURISTICS = MarlinProperties.isUseTileFlagsWithHeuristics();
+
+    private static final int ALL_BUT_LSB = 0xfffffffe;
+    private static final int ERR_STEP_MAX = 0x7fffffff; // = 2^31 - 1
+
+    private static final double POWER_2_TO_32 = FloatMath.powerOfTwoD(32);
+
+    // use float to make tosubpix methods faster (no int to float conversion)
+    public static final float f_SUBPIXEL_POSITIONS_X
+        = (float) SUBPIXEL_POSITIONS_X;
+    public static final float f_SUBPIXEL_POSITIONS_Y
+        = (float) SUBPIXEL_POSITIONS_Y;
+    public static final int SUBPIXEL_MASK_X = SUBPIXEL_POSITIONS_X - 1;
+    public static final int SUBPIXEL_MASK_Y = SUBPIXEL_POSITIONS_Y - 1;
+
+    // number of subpixels corresponding to a tile line
+    private static final int SUBPIXEL_TILE
+        = TILE_SIZE << SUBPIXEL_LG_POSITIONS_Y;
+
+    // 2048 (pixelSize) pixels (height) x 8 subpixels = 64K
+    static final int INITIAL_BUCKET_ARRAY
+        = INITIAL_PIXEL_DIM * SUBPIXEL_POSITIONS_Y;
+
+    public static final int WIND_EVEN_ODD = 0;
+    public static final int WIND_NON_ZERO = 1;
+
+    // common to all types of input path segments.
+    // OFFSET as bytes
+    // only integer values:
+    public static final long OFF_CURX_OR  = 0;
+    public static final long OFF_ERROR    = OFF_CURX_OR  + SIZE_INT;
+    public static final long OFF_BUMP_X   = OFF_ERROR    + SIZE_INT;
+    public static final long OFF_BUMP_ERR = OFF_BUMP_X   + SIZE_INT;
+    public static final long OFF_NEXT     = OFF_BUMP_ERR + SIZE_INT;
+    public static final long OFF_YMAX     = OFF_NEXT     + SIZE_INT;
+
+    // size of one edge in bytes
+    public static final int SIZEOF_EDGE_BYTES = (int)(OFF_YMAX + SIZE_INT);
+
+    // curve break into lines
+    // cubic error in subpixels to decrement step
+    private static final float CUB_DEC_ERR_SUBPIX
+        = 2.5f * (NORM_SUBPIXELS / 8f); // 2.5 subpixel for typical 8x8 subpixels
+    // cubic error in subpixels to increment step
+    private static final float CUB_INC_ERR_SUBPIX
+        = 1f * (NORM_SUBPIXELS / 8f); // 1 subpixel for typical 8x8 subpixels
+
+    // cubic bind length to decrement step = 8 * error in subpixels
+    // pisces: 20 / 8
+    // openjfx pisces: 8 / 3.2
+    // multiply by 8 = error scale factor:
+    public static final float CUB_DEC_BND
+        = 8f * CUB_DEC_ERR_SUBPIX; // 20f means 2.5 subpixel error
+    // cubic bind length to increment step = 8 * error in subpixels
+    public static final float CUB_INC_BND
+        = 8f * CUB_INC_ERR_SUBPIX; // 8f means 1 subpixel error
+
+    // cubic countlg
+    public static final int CUB_COUNT_LG = 2;
+    // cubic count = 2^countlg
+    private static final int CUB_COUNT = 1 << CUB_COUNT_LG;
+    // cubic count^2 = 4^countlg
+    private static final int CUB_COUNT_2 = 1 << (2 * CUB_COUNT_LG);
+    // cubic count^3 = 8^countlg
+    private static final int CUB_COUNT_3 = 1 << (3 * CUB_COUNT_LG);
+    // cubic dt = 1 / count
+    private static final float CUB_INV_COUNT = 1f / CUB_COUNT;
+    // cubic dt^2 = 1 / count^2 = 1 / 4^countlg
+    private static final float CUB_INV_COUNT_2 = 1f / CUB_COUNT_2;
+    // cubic dt^3 = 1 / count^3 = 1 / 8^countlg
+    private static final float CUB_INV_COUNT_3 = 1f / CUB_COUNT_3;
+
+    // quad break into lines
+    // quadratic error in subpixels
+    private static final float QUAD_DEC_ERR_SUBPIX
+        = 1f * (NORM_SUBPIXELS / 8f); // 1 subpixel for typical 8x8 subpixels
+
+    // quadratic bind length to decrement step = 8 * error in subpixels
+    // pisces and openjfx pisces: 32
+    public static final float QUAD_DEC_BND
+        = 8f * QUAD_DEC_ERR_SUBPIX; // 8f means 1 subpixel error
+
+//////////////////////////////////////////////////////////////////////////////
+//  SCAN LINE
+//////////////////////////////////////////////////////////////////////////////
+    // crossings ie subpixel edge x coordinates
+    private int[] crossings;
+    // auxiliary storage for crossings (merge sort)
+    private int[] aux_crossings;
+
+    // indices into the segment pointer lists. They indicate the "active"
+    // sublist in the segment lists (the portion of the list that contains
+    // all the segments that cross the next scan line).
+    private int edgeCount;
+    private int[] edgePtrs;
+    // auxiliary storage for edge pointers (merge sort)
+    private int[] aux_edgePtrs;
+
+    // max used for both edgePtrs and crossings (stats only)
+    private int activeEdgeMaxUsed;
+
+    // per-thread initial arrays (large enough to satisfy most usages) (1024)
+    private final int[] crossings_initial = new int[INITIAL_SMALL_ARRAY]; // 4K
+    // +1 to avoid recycling in Helpers.widenArray()
+    private final int[] edgePtrs_initial  = new int[INITIAL_SMALL_ARRAY + 1]; // 4K
+    // merge sort initial arrays (large enough to satisfy most usages) (1024)
+    private final int[] aux_crossings_initial = new int[INITIAL_SMALL_ARRAY]; // 4K
+    // +1 to avoid recycling in Helpers.widenArray()
+    private final int[] aux_edgePtrs_initial  = new int[INITIAL_SMALL_ARRAY + 1]; // 4K
+
+//////////////////////////////////////////////////////////////////////////////
+//  EDGE LIST
+//////////////////////////////////////////////////////////////////////////////
+    private float edgeMinY = Float.POSITIVE_INFINITY;
+    private float edgeMaxY = Float.NEGATIVE_INFINITY;
+    private float edgeMinX = Float.POSITIVE_INFINITY;
+    private float edgeMaxX = Float.NEGATIVE_INFINITY;
+
+    // edges [floats|ints] stored in off-heap memory
+    private final OffHeapArray edges;
+
+    private int[] edgeBuckets;
+    private int[] edgeBucketCounts; // 2*newedges + (1 if pruning needed)
+    // used range for edgeBuckets / edgeBucketCounts
+    private int buckets_minY;
+    private int buckets_maxY;
+    // sum of each edge delta Y (subpixels)
+    private int edgeSumDeltaY;
+
+    // +1 to avoid recycling in Helpers.widenArray()
+    private final int[] edgeBuckets_initial
+        = new int[INITIAL_BUCKET_ARRAY + 1]; // 64K
+    private final int[] edgeBucketCounts_initial
+        = new int[INITIAL_BUCKET_ARRAY + 1]; // 64K
+
+    // Flattens using adaptive forward differencing. This only carries out
+    // one iteration of the AFD loop. All it does is update AFD variables (i.e.
+    // X0, Y0, D*[X|Y], COUNT; not variables used for computing scanline crossings).
+    private void quadBreakIntoLinesAndAdd(float x0, float y0,
+                                          final Curve c,
+                                          final float x2, final float y2)
+    {
+        int count = 1; // dt = 1 / count
+
+        // maximum(ddX|Y) = norm(dbx, dby) * dt^2 (= 1)
+        float maxDD = FloatMath.max(Math.abs(c.dbx), Math.abs(c.dby));
+
+        final float _DEC_BND = QUAD_DEC_BND;
+
+        while (maxDD >= _DEC_BND) {
+            // divide step by half:
+            maxDD /= 4f; // error divided by 2^2 = 4
+
+            count <<= 1;
+            if (doStats) {
+                RendererContext.stats.stat_rdr_quadBreak_dec.add(count);
+            }
+        }
+
+        int nL = 0; // line count
+        if (count > 1) {
+            final float icount = 1f / count; // dt
+            final float icount2 = icount * icount; // dt^2
+
+            final float ddx = c.dbx * icount2;
+            final float ddy = c.dby * icount2;
+            float dx = c.bx * icount2 + c.cx * icount;
+            float dy = c.by * icount2 + c.cy * icount;
+
+            float x1, y1;
+
+            while (--count > 0) {
+                x1 = x0 + dx;
+                dx += ddx;
+                y1 = y0 + dy;
+                dy += ddy;
+
+                addLine(x0, y0, x1, y1);
+
+                if (doStats) { nL++; }
+                x0 = x1;
+                y0 = y1;
+            }
+        }
+        addLine(x0, y0, x2, y2);
+
+        if (doStats) {
+            RendererContext.stats.stat_rdr_quadBreak.add(nL + 1);
+        }
+    }
+
+    // x0, y0 and x3,y3 are the endpoints of the curve. We could compute these
+    // using c.xat(0),c.yat(0) and c.xat(1),c.yat(1), but this might introduce
+    // numerical errors, and our callers already have the exact values.
+    // Another alternative would be to pass all the control points, and call
+    // c.set here, but then too many numbers are passed around.
+    private void curveBreakIntoLinesAndAdd(float x0, float y0,
+                                           final Curve c,
+                                           final float x3, final float y3)
+    {
+        int count           = CUB_COUNT;
+        final float icount  = CUB_INV_COUNT;   // dt
+        final float icount2 = CUB_INV_COUNT_2; // dt^2
+        final float icount3 = CUB_INV_COUNT_3; // dt^3
+
+        // the dx and dy refer to forward differencing variables, not the last
+        // coefficients of the "points" polynomial
+        float dddx, dddy, ddx, ddy, dx, dy;
+        dddx = 2f * c.dax * icount3;
+        dddy = 2f * c.day * icount3;
+        ddx = dddx + c.dbx * icount2;
+        ddy = dddy + c.dby * icount2;
+        dx = c.ax * icount3 + c.bx * icount2 + c.cx * icount;
+        dy = c.ay * icount3 + c.by * icount2 + c.cy * icount;
+
+        // we use x0, y0 to walk the line
+        float x1 = x0, y1 = y0;
+        int nL = 0; // line count
+
+        final float _DEC_BND = CUB_DEC_BND;
+        final float _INC_BND = CUB_INC_BND;
+
+        while (count > 0) {
+            // divide step by half:
+            while (Math.abs(ddx) >= _DEC_BND || Math.abs(ddy) >= _DEC_BND) {
+                dddx /= 8f;
+                dddy /= 8f;
+                ddx = ddx/4f - dddx;
+                ddy = ddy/4f - dddy;
+                dx = (dx - ddx) / 2f;
+                dy = (dy - ddy) / 2f;
+
+                count <<= 1;
+                if (doStats) {
+                    RendererContext.stats.stat_rdr_curveBreak_dec.add(count);
+                }
+            }
+
+            // double step:
+            // TODO: why use first derivative dX|Y instead of second ddX|Y ?
+            // both scale changes should use speed or acceleration to have the same metric.
+
+            // can only do this on even "count" values, because we must divide count by 2
+            while (count % 2 == 0
+                   && Math.abs(dx) <= _INC_BND && Math.abs(dy) <= _INC_BND)
+            {
+                dx = 2f * dx + ddx;
+                dy = 2f * dy + ddy;
+                ddx = 4f * (ddx + dddx);
+                ddy = 4f * (ddy + dddy);
+                dddx *= 8f;
+                dddy *= 8f;
+
+                count >>= 1;
+                if (doStats) {
+                    RendererContext.stats.stat_rdr_curveBreak_inc.add(count);
+                }
+            }
+            if (--count > 0) {
+                x1 += dx;
+                dx += ddx;
+                ddx += dddx;
+                y1 += dy;
+                dy += ddy;
+                ddy += dddy;
+            } else {
+                x1 = x3;
+                y1 = y3;
+            }
+
+            addLine(x0, y0, x1, y1);
+
+            if (doStats) { nL++; }
+            x0 = x1;
+            y0 = y1;
+        }
+        if (doStats) {
+            RendererContext.stats.stat_rdr_curveBreak.add(nL);
+        }
+    }
+
+    private void addLine(float x1, float y1, float x2, float y2) {
+        if (doMonitors) {
+            RendererContext.stats.mon_rdr_addLine.start();
+        }
+        if (doStats) {
+            RendererContext.stats.stat_rdr_addLine.add(1);
+        }
+        int or = 1; // orientation of the line. 1 if y increases, 0 otherwise.
+        if (y2 < y1) {
+            or = 0;
+            float tmp = y2;
+            y2 = y1;
+            y1 = tmp;
+            tmp = x2;
+            x2 = x1;
+            x1 = tmp;
+        }
+
+        // convert subpixel coordinates (float) into pixel positions (int)
+
+        // The index of the pixel that holds the next HPC is at ceil(trueY - 0.5)
+        // Since y1 and y2 are biased by -0.5 in tosubpixy(), this is simply
+        // ceil(y1) or ceil(y2)
+        // upper integer (inclusive)
+        final int firstCrossing = FloatMath.max(FloatMath.ceil_int(y1), boundsMinY);
+
+        // note: use boundsMaxY (last Y exclusive) to compute correct coverage
+        // upper integer (exclusive)
+        final int lastCrossing  = FloatMath.min(FloatMath.ceil_int(y2), boundsMaxY);
+
+        /* skip horizontal lines in pixel space and clip edges
+           out of y range [boundsMinY; boundsMaxY] */
+        if (firstCrossing >= lastCrossing) {
+            if (doMonitors) {
+                RendererContext.stats.mon_rdr_addLine.stop();
+            }
+            if (doStats) {
+                RendererContext.stats.stat_rdr_addLine_skip.add(1);
+            }
+            return;
+        }
+        // edge min/max X/Y are in subpixel space (inclusive)
+        if (y1 < edgeMinY) {
+            edgeMinY = y1;
+        }
+        if (y2 > edgeMaxY) {
+            edgeMaxY = y2;
+        }
+
+        // Use double-precision for improved accuracy:
+        final double x1d   = x1;
+        final double y1d   = y1;
+        final double slope = (x2 - x1d) / (y2 - y1d);
+
+        if (slope >= 0.0) { // <==> x1 < x2
+            if (x1 < edgeMinX) {
+                edgeMinX = x1;
+            }
+            if (x2 > edgeMaxX) {
+                edgeMaxX = x2;
+            }
+        } else {
+            if (x2 < edgeMinX) {
+                edgeMinX = x2;
+            }
+            if (x1 > edgeMaxX) {
+                edgeMaxX = x1;
+            }
+        }
+
+        // local variables for performance:
+        final int _SIZEOF_EDGE_BYTES = SIZEOF_EDGE_BYTES;
+
+        final OffHeapArray _edges = edges;
+
+        // get free pointer (ie length in bytes)
+        final int edgePtr = _edges.used;
+
+        // use substraction to avoid integer overflow:
+        if (_edges.length - edgePtr < _SIZEOF_EDGE_BYTES) {
+            // suppose _edges.length > _SIZEOF_EDGE_BYTES
+            // so doubling size is enough to add needed bytes
+            // note: throw IOOB if neededSize > 2Gb:
+            final long edgeNewSize = ArrayCache.getNewLargeSize(_edges.length,
+                                        edgePtr + _SIZEOF_EDGE_BYTES);
+
+            if (doStats) {
+                RendererContext.stats.stat_rdr_edges_resizes.add(edgeNewSize);
+            }
+            _edges.resize(edgeNewSize);
+        }
+
+
+        final Unsafe _unsafe = OffHeapArray.unsafe;
+        final long SIZE_INT = 4L;
+        long addr   = _edges.address + edgePtr;
+
+        // The x value must be bumped up to its position at the next HPC we will evaluate.
+        // "firstcrossing" is the (sub)pixel number where the next crossing occurs
+        // thus, the actual coordinate of the next HPC is "firstcrossing + 0.5"
+        // so the Y distance we cover is "firstcrossing + 0.5 - trueY".
+        // Note that since y1 (and y2) are already biased by -0.5 in tosubpixy(), we have
+        // y1 = trueY - 0.5
+        // trueY = y1 + 0.5
+        // firstcrossing + 0.5 - trueY = firstcrossing + 0.5 - (y1 + 0.5)
+        //                             = firstcrossing - y1
+        // The x coordinate at that HPC is then:
+        // x1_intercept = x1 + (firstcrossing - y1) * slope
+        // The next VPC is then given by:
+        // VPC index = ceil(x1_intercept - 0.5), or alternately
+        // VPC index = floor(x1_intercept - 0.5 + 1 - epsilon)
+        // epsilon is hard to pin down in floating point, but easy in fixed point, so if
+        // we convert to fixed point then these operations get easier:
+        // long x1_fixed = x1_intercept * 2^32;  (fixed point 32.32 format)
+        // curx = next VPC = fixed_floor(x1_fixed - 2^31 + 2^32 - 1)
+        //                 = fixed_floor(x1_fixed + 2^31 - 1)
+        //                 = fixed_floor(x1_fixed + 0x7fffffff)
+        // and error       = fixed_fract(x1_fixed + 0x7fffffff)
+        final double x1_intercept = x1d + (firstCrossing - y1d) * slope;
+
+        // inlined scalb(x1_intercept, 32):
+        final long x1_fixed_biased = ((long) (POWER_2_TO_32 * x1_intercept))
+                                     + 0x7fffffffL;
+        // curx:
+        // last bit corresponds to the orientation
+        _unsafe.putInt(addr, (((int) (x1_fixed_biased >> 31L)) & ALL_BUT_LSB) | or);
+        addr += SIZE_INT;
+        _unsafe.putInt(addr,  ((int)  x1_fixed_biased) >>> 1);
+        addr += SIZE_INT;
+
+        // inlined scalb(slope, 32):
+        final long slope_fixed = (long) (POWER_2_TO_32 * slope);
+
+        // last bit set to 0 to keep orientation:
+        _unsafe.putInt(addr, (((int) (slope_fixed >> 31L)) & ALL_BUT_LSB));
+        addr += SIZE_INT;
+        _unsafe.putInt(addr,  ((int)  slope_fixed) >>> 1);
+        addr += SIZE_INT;
+
+        final int[] _edgeBuckets      = edgeBuckets;
+        final int[] _edgeBucketCounts = edgeBucketCounts;
+
+        final int _boundsMinY = boundsMinY;
+
+        // each bucket is a linked list. this method adds ptr to the
+        // start of the "bucket"th linked list.
+        final int bucketIdx = firstCrossing - _boundsMinY;
+
+        // pointer from bucket
+        _unsafe.putInt(addr, _edgeBuckets[bucketIdx]);
+        addr += SIZE_INT;
+        // y max (inclusive)
+        _unsafe.putInt(addr,  lastCrossing);
+
+        // Update buckets:
+        // directly the edge struct "pointer"
+        _edgeBuckets[bucketIdx]       = edgePtr;
+        _edgeBucketCounts[bucketIdx] += 2; // 1 << 1
+        // last bit means edge end
+        _edgeBucketCounts[lastCrossing - _boundsMinY] |= 0x1;
+
+        // update sum of delta Y (subpixels):
+        edgeSumDeltaY += (lastCrossing - firstCrossing);
+
+        // update free pointer (ie length in bytes)
+        _edges.used += _SIZEOF_EDGE_BYTES;
+
+        if (doMonitors) {
+            RendererContext.stats.mon_rdr_addLine.stop();
+        }
+    }
+
+// END EDGE LIST
+//////////////////////////////////////////////////////////////////////////////
+
+    // Cache to store RLE-encoded coverage mask of the current primitive
+    final MarlinCache cache;
+
+    // Bounds of the drawing region, at subpixel precision.
+    private int boundsMinX, boundsMinY, boundsMaxX, boundsMaxY;
+
+    // Current winding rule
+    private int windingRule;
+
+    // Current drawing position, i.e., final point of last segment
+    private float x0, y0;
+
+    // Position of most recent 'moveTo' command
+    private float pix_sx0, pix_sy0;
+
+    // per-thread renderer context
+    final RendererContext rdrCtx;
+    // dirty curve
+    private final Curve curve;
+
+    Renderer(final RendererContext rdrCtx) {
+        this.rdrCtx = rdrCtx;
+
+        this.edges = new OffHeapArray(rdrCtx, INITIAL_EDGES_CAPACITY); // 96K
+
+        this.curve = rdrCtx.curve;
+
+        edgeBuckets = edgeBuckets_initial;
+        edgeBucketCounts = edgeBucketCounts_initial;
+
+        alphaLine  = alphaLine_initial;
+
+        this.cache = rdrCtx.cache;
+
+        // ScanLine:
+        crossings     = crossings_initial;
+        aux_crossings = aux_crossings_initial;
+        edgePtrs      = edgePtrs_initial;
+        aux_edgePtrs  = aux_edgePtrs_initial;
+
+        edgeCount = 0;
+        activeEdgeMaxUsed = 0;
+    }
+
+    Renderer init(final int pix_boundsX, final int pix_boundsY,
+                  final int pix_boundsWidth, final int pix_boundsHeight,
+                  final int windingRule) {
+
+        this.windingRule = windingRule;
+
+        // bounds as half-open intervals: minX <= x < maxX and minY <= y < maxY
+        this.boundsMinX =  pix_boundsX << SUBPIXEL_LG_POSITIONS_X;
+        this.boundsMaxX =
+            (pix_boundsX + pix_boundsWidth) << SUBPIXEL_LG_POSITIONS_X;
+        this.boundsMinY =  pix_boundsY << SUBPIXEL_LG_POSITIONS_Y;
+        this.boundsMaxY =
+            (pix_boundsY + pix_boundsHeight) << SUBPIXEL_LG_POSITIONS_Y;
+
+        if (doLogBounds) {
+            MarlinUtils.logInfo("boundsXY = [" + boundsMinX + " ... "
+                                + boundsMaxX + "[ [" + boundsMinY + " ... "
+                                + boundsMaxY + "[");
+        }
+
+        // see addLine: ceil(boundsMaxY) => boundsMaxY + 1
+        // +1 for edgeBucketCounts
+        final int edgeBucketsLength = (boundsMaxY - boundsMinY) + 1;
+
+        if (edgeBucketsLength > INITIAL_BUCKET_ARRAY) {
+            if (doStats) {
+                RendererContext.stats.stat_array_renderer_edgeBuckets
+                    .add(edgeBucketsLength);
+                RendererContext.stats.stat_array_renderer_edgeBucketCounts
+                    .add(edgeBucketsLength);
+            }
+            edgeBuckets = rdrCtx.getIntArray(edgeBucketsLength);
+            edgeBucketCounts = rdrCtx.getIntArray(edgeBucketsLength);
+        }
+
+        edgeMinY = Float.POSITIVE_INFINITY;
+        edgeMaxY = Float.NEGATIVE_INFINITY;
+        edgeMinX = Float.POSITIVE_INFINITY;
+        edgeMaxX = Float.NEGATIVE_INFINITY;
+
+        // reset used mark:
+        edgeCount = 0;
+        activeEdgeMaxUsed = 0;
+        edges.used = 0;
+
+        edgeSumDeltaY = 0;
+
+        return this; // fluent API
+    }
+
+    /**
+     * Disposes this renderer and recycle it clean up before reusing this instance
+     */
+    void dispose() {
+        if (doStats) {
+            RendererContext.stats.stat_rdr_activeEdges.add(activeEdgeMaxUsed);
+            RendererContext.stats.stat_rdr_edges.add(edges.used);
+            RendererContext.stats.stat_rdr_edges_count
+                .add(edges.used / SIZEOF_EDGE_BYTES);
+        }
+        if (doCleanDirty) {
+            // Force zero-fill dirty arrays:
+            Arrays.fill(crossings,     0);
+            Arrays.fill(aux_crossings, 0);
+            Arrays.fill(edgePtrs,      0);
+            Arrays.fill(aux_edgePtrs,  0);
+        }
+        // Return arrays:
+        if (crossings != crossings_initial) {
+            rdrCtx.putDirtyIntArray(crossings);
+            crossings = crossings_initial;
+            if (aux_crossings != aux_crossings_initial) {
+                rdrCtx.putDirtyIntArray(aux_crossings);
+                aux_crossings = aux_crossings_initial;
+            }
+        }
+        if (edgePtrs != edgePtrs_initial) {
+            rdrCtx.putDirtyIntArray(edgePtrs);
+            edgePtrs = edgePtrs_initial;
+            if (aux_edgePtrs != aux_edgePtrs_initial) {
+                rdrCtx.putDirtyIntArray(aux_edgePtrs);
+                aux_edgePtrs = aux_edgePtrs_initial;
+            }
+        }
+        if (alphaLine != alphaLine_initial) {
+            rdrCtx.putIntArray(alphaLine, 0, 0); // already zero filled
+            alphaLine = alphaLine_initial;
+        }
+        if (blkFlags != blkFlags_initial) {
+            rdrCtx.putIntArray(blkFlags, 0, 0); // already zero filled
+            blkFlags = blkFlags_initial;
+        }
+
+        if (edgeMinY != Float.POSITIVE_INFINITY) {
+            // clear used part
+            if (edgeBuckets == edgeBuckets_initial) {
+                // fill only used part
+                IntArrayCache.fill(edgeBuckets,      buckets_minY,
+                                                     buckets_maxY,     0);
+                IntArrayCache.fill(edgeBucketCounts, buckets_minY,
+                                                     buckets_maxY + 1, 0);
+            } else {
+                 // clear only used part
+                rdrCtx.putIntArray(edgeBuckets,      buckets_minY,
+                                                     buckets_maxY);
+                edgeBuckets = edgeBuckets_initial;
+
+                rdrCtx.putIntArray(edgeBucketCounts, buckets_minY,
+                                                     buckets_maxY + 1);
+                edgeBucketCounts = edgeBucketCounts_initial;
+            }
+        } else if (edgeBuckets != edgeBuckets_initial) {
+            // unused arrays
+            rdrCtx.putIntArray(edgeBuckets, 0, 0);
+            edgeBuckets = edgeBuckets_initial;
+
+            rdrCtx.putIntArray(edgeBucketCounts, 0, 0);
+            edgeBucketCounts = edgeBucketCounts_initial;
+        }
+
+        // At last: resize back off-heap edges to initial size
+        if (edges.length != INITIAL_EDGES_CAPACITY) {
+            // note: may throw OOME:
+            edges.resize(INITIAL_EDGES_CAPACITY);
+        }
+        if (doCleanDirty) {
+            // Force zero-fill dirty arrays:
+            edges.fill(BYTE_0);
+        }
+        if (doMonitors) {
+            RendererContext.stats.mon_rdr_endRendering.stop();
+        }
+    }
+
+    private static float tosubpixx(final float pix_x) {
+        return f_SUBPIXEL_POSITIONS_X * pix_x;
+    }
+
+    private static float tosubpixy(final float pix_y) {
+        // shift y by -0.5 for fast ceil(y - 0.5):
+        return f_SUBPIXEL_POSITIONS_Y * pix_y - 0.5f;
+    }
+
+    @Override
+    public void moveTo(float pix_x0, float pix_y0) {
+        closePath();
+        this.pix_sx0 = pix_x0;
+        this.pix_sy0 = pix_y0;
+        this.y0 = tosubpixy(pix_y0);
+        this.x0 = tosubpixx(pix_x0);
+    }
+
+    @Override
+    public void lineTo(float pix_x1, float pix_y1) {
+        float x1 = tosubpixx(pix_x1);
+        float y1 = tosubpixy(pix_y1);
+        addLine(x0, y0, x1, y1);
+        x0 = x1;
+        y0 = y1;
+    }
+
+    @Override
+    public void curveTo(float x1, float y1,
+            float x2, float y2,
+            float x3, float y3)
+    {
+        final float xe = tosubpixx(x3);
+        final float ye = tosubpixy(y3);
+        curve.set(x0, y0, tosubpixx(x1), tosubpixy(y1),
+                          tosubpixx(x2), tosubpixy(y2), xe, ye);
+        curveBreakIntoLinesAndAdd(x0, y0, curve, xe, ye);
+        x0 = xe;
+        y0 = ye;
+    }
+
+    @Override
+    public void quadTo(float x1, float y1, float x2, float y2) {
+        final float xe = tosubpixx(x2);
+        final float ye = tosubpixy(y2);
+        curve.set(x0, y0, tosubpixx(x1), tosubpixy(y1), xe, ye);
+        quadBreakIntoLinesAndAdd(x0, y0, curve, xe, ye);
+        x0 = xe;
+        y0 = ye;
+    }
+
+    @Override
+    public void closePath() {
+        // lineTo expects its input in pixel coordinates.
+        lineTo(pix_sx0, pix_sy0);
+    }
+
+    @Override
+    public void pathDone() {
+        closePath();
+    }
+
+    @Override
+    public long getNativeConsumer() {
+        throw new InternalError("Renderer does not use a native consumer.");
+    }
+
+    // clean alpha array (zero filled)
+    private int[] alphaLine;
+    // 2048 (pixelsize) pixel large
+    private final int[] alphaLine_initial = new int[INITIAL_AA_ARRAY]; // 8K
+
+    private void _endRendering(final int ymin, final int ymax) {
+        if (DISABLE_RENDER) {
+            return;
+        }
+
+        // Get X bounds as true pixel boundaries to compute correct pixel coverage:
+        final int bboxx0 = bbox_spminX;
+        final int bboxx1 = bbox_spmaxX;
+
+        final boolean windingRuleEvenOdd = (windingRule == WIND_EVEN_ODD);
+
+        // Useful when processing tile line by tile line
+        final int[] _alpha = alphaLine;
+
+        // local vars (performance):
+        final MarlinCache _cache = cache;
+        final OffHeapArray _edges = edges;
+        final int[] _edgeBuckets = edgeBuckets;
+        final int[] _edgeBucketCounts = edgeBucketCounts;
+
+        int[] _crossings = this.crossings;
+        int[] _edgePtrs  = this.edgePtrs;
+
+        // merge sort auxiliary storage:
+        int[] _aux_crossings = this.aux_crossings;
+        int[] _aux_edgePtrs  = this.aux_edgePtrs;
+
+        // copy constants:
+        final long _OFF_ERROR    = OFF_ERROR;
+        final long _OFF_BUMP_X   = OFF_BUMP_X;
+        final long _OFF_BUMP_ERR = OFF_BUMP_ERR;
+
+        final long _OFF_NEXT     = OFF_NEXT;
+        final long _OFF_YMAX     = OFF_YMAX;
+
+        final int _ALL_BUT_LSB   = ALL_BUT_LSB;
+        final int _ERR_STEP_MAX  = ERR_STEP_MAX;
+
+        // unsafe I/O:
+        final Unsafe _unsafe = OffHeapArray.unsafe;
+        final long    addr0  = _edges.address;
+        long addr;
+        final int _SUBPIXEL_LG_POSITIONS_X = SUBPIXEL_LG_POSITIONS_X;
+        final int _SUBPIXEL_LG_POSITIONS_Y = SUBPIXEL_LG_POSITIONS_Y;
+        final int _SUBPIXEL_MASK_X = SUBPIXEL_MASK_X;
+        final int _SUBPIXEL_MASK_Y = SUBPIXEL_MASK_Y;
+        final int _SUBPIXEL_POSITIONS_X = SUBPIXEL_POSITIONS_X;
+
+        final int _MIN_VALUE = Integer.MIN_VALUE;
+        final int _MAX_VALUE = Integer.MAX_VALUE;
+
+        // Now we iterate through the scanlines. We must tell emitRow the coord
+        // of the first non-transparent pixel, so we must keep accumulators for
+        // the first and last pixels of the section of the current pixel row
+        // that we will emit.
+        // We also need to accumulate pix_bbox, but the iterator does it
+        // for us. We will just get the values from it once this loop is done
+        int minX = _MAX_VALUE;
+        int maxX = _MIN_VALUE;
+
+        int y = ymin;
+        int bucket = y - boundsMinY;
+
+        int numCrossings = this.edgeCount;
+        int edgePtrsLen = _edgePtrs.length;
+        int crossingsLen = _crossings.length;
+        int _arrayMaxUsed = activeEdgeMaxUsed;
+        int ptrLen = 0, newCount, ptrEnd;
+
+        int bucketcount, i, j, ecur;
+        int cross, lastCross;
+        int x0, x1, tmp, sum, prev, curx, curxo, crorientation, err;
+        int pix_x, pix_xmaxm1, pix_xmax;
+
+        int low, high, mid, prevNumCrossings;
+        boolean useBinarySearch;
+
+        final int[] _blkFlags = blkFlags;
+        final int _BLK_SIZE_LG = BLOCK_SIZE_LG;
+        final int _BLK_SIZE = BLOCK_SIZE;
+
+        final boolean _enableBlkFlagsHeuristics = ENABLE_BLOCK_FLAGS_HEURISTICS && this.enableBlkFlags;
+
+        // Use block flags if large pixel span and few crossings:
+        // ie mean(distance between crossings) is high
+        boolean useBlkFlags = this.prevUseBlkFlags;
+
+        final int stroking = rdrCtx.stroking;
+
+        int lastY = -1; // last emited row
+
+
+        // Iteration on scanlines
+        for (; y < ymax; y++, bucket++) {
+            // --- from former ScanLineIterator.next()
+            bucketcount = _edgeBucketCounts[bucket];
+
+            // marker on previously sorted edges:
+            prevNumCrossings = numCrossings;
+
+            // bucketCount indicates new edge / edge end:
+            if (bucketcount != 0) {
+                if (doStats) {
+                    RendererContext.stats.stat_rdr_activeEdges_updates
+                        .add(numCrossings);
+                }
+
+                // last bit set to 1 means that edges ends
+                if ((bucketcount & 0x1) != 0) {
+                    // eviction in active edge list
+                    // cache edges[] address + offset
+                    addr = addr0 + _OFF_YMAX;
+
+                    for (i = 0, newCount = 0; i < numCrossings; i++) {
+                        // get the pointer to the edge
+                        ecur = _edgePtrs[i];
+                        // random access so use unsafe:
+                        if (_unsafe.getInt(addr + ecur) > y) {
+                            _edgePtrs[newCount++] = ecur;
+                        }
+                    }
+                    // update marker on sorted edges minus removed edges:
+                    prevNumCrossings = numCrossings = newCount;
+                }
+
+                ptrLen = bucketcount >> 1; // number of new edge
+
+                if (ptrLen != 0) {
+                    if (doStats) {
+                        RendererContext.stats.stat_rdr_activeEdges_adds
+                            .add(ptrLen);
+                        if (ptrLen > 10) {
+                            RendererContext.stats.stat_rdr_activeEdges_adds_high
+                                .add(ptrLen);
+                        }
+                    }
+                    ptrEnd = numCrossings + ptrLen;
+
+                    if (edgePtrsLen < ptrEnd) {
+                        if (doStats) {
+                            RendererContext.stats.stat_array_renderer_edgePtrs
+                                .add(ptrEnd);
+                        }
+                        this.edgePtrs = _edgePtrs
+                            = rdrCtx.widenDirtyIntArray(_edgePtrs, numCrossings,
+                                                        ptrEnd);
+
+                        edgePtrsLen = _edgePtrs.length;
+                        // Get larger auxiliary storage:
+                        if (_aux_edgePtrs != aux_edgePtrs_initial) {
+                            rdrCtx.putDirtyIntArray(_aux_edgePtrs);
+                        }
+                        // use ArrayCache.getNewSize() to use the same growing
+                        // factor than widenDirtyIntArray():
+                        if (doStats) {
+                            RendererContext.stats.stat_array_renderer_aux_edgePtrs
+                                .add(ptrEnd);
+                        }
+                        this.aux_edgePtrs = _aux_edgePtrs
+                            = rdrCtx.getDirtyIntArray(
+                                ArrayCache.getNewSize(numCrossings, ptrEnd)
+                            );
+                    }
+
+                    // cache edges[] address + offset
+                    addr = addr0 + _OFF_NEXT;
+
+                    // add new edges to active edge list:
+                    for (ecur = _edgeBuckets[bucket];
+                         numCrossings < ptrEnd; numCrossings++)
+                    {
+                        // store the pointer to the edge
+                        _edgePtrs[numCrossings] = ecur;
+                        // random access so use unsafe:
+                        ecur = _unsafe.getInt(addr + ecur);
+                    }
+
+                    if (crossingsLen < numCrossings) {
+                        // Get larger array:
+                        if (_crossings != crossings_initial) {
+                            rdrCtx.putDirtyIntArray(_crossings);
+                        }
+                        if (doStats) {
+                            RendererContext.stats.stat_array_renderer_crossings
+                                .add(numCrossings);
+                        }
+                        this.crossings = _crossings
+                            = rdrCtx.getDirtyIntArray(numCrossings);
+
+                        // Get larger auxiliary storage:
+                        if (_aux_crossings != aux_crossings_initial) {
+                            rdrCtx.putDirtyIntArray(_aux_crossings);
+                        }
+                        if (doStats) {
+                            RendererContext.stats.stat_array_renderer_aux_crossings
+                                .add(numCrossings);
+                        }
+                        this.aux_crossings = _aux_crossings
+                            = rdrCtx.getDirtyIntArray(numCrossings);
+
+                        crossingsLen = _crossings.length;
+                    }
+                    if (doStats) {
+                        // update max used mark
+                        if (numCrossings > _arrayMaxUsed) {
+                            _arrayMaxUsed = numCrossings;
+                        }
+                    }
+                } // ptrLen != 0
+            } // bucketCount != 0
+
+
+            if (numCrossings != 0) {
+                /*
+                 * thresholds to switch to optimized merge sort
+                 * for newly added edges + final merge pass.
+                 */
+                if ((ptrLen < 10) || (numCrossings < 40)) {
+                    if (doStats) {
+                        RendererContext.stats.hist_rdr_crossings
+                            .add(numCrossings);
+                        RendererContext.stats.hist_rdr_crossings_adds
+                            .add(ptrLen);
+                    }
+
+                    /*
+                     * threshold to use binary insertion sort instead of
+                     * straight insertion sort (to reduce minimize comparisons).
+                     */
+                    useBinarySearch = (numCrossings >= 20);
+
+                    // if small enough:
+                    lastCross = _MIN_VALUE;
+
+                    for (i = 0; i < numCrossings; i++) {
+                        // get the pointer to the edge
+                        ecur = _edgePtrs[i];
+
+                        /* convert subpixel coordinates (float) into pixel
+                            positions (int) for coming scanline */
+                        /* note: it is faster to always update edges even
+                           if it is removed from AEL for coming or last scanline */
+
+                        // random access so use unsafe:
+                        addr = addr0 + ecur; // ecur + OFF_F_CURX
+
+                        // get current crossing:
+                        curx = _unsafe.getInt(addr);
+
+                        // update crossing with orientation at last bit:
+                        cross = curx;
+
+                        // Increment x using DDA (fixed point):
+                        curx += _unsafe.getInt(addr + _OFF_BUMP_X);
+
+                        // Increment error:
+                        err  =  _unsafe.getInt(addr + _OFF_ERROR)
+                              + _unsafe.getInt(addr + _OFF_BUMP_ERR);
+
+                        // Manual carry handling:
+                        // keep sign and carry bit only and ignore last bit (preserve orientation):
+                        _unsafe.putInt(addr,               curx - ((err >> 30) & _ALL_BUT_LSB));
+                        _unsafe.putInt(addr + _OFF_ERROR, (err & _ERR_STEP_MAX));
+
+                        if (doStats) {
+                            RendererContext.stats.stat_rdr_crossings_updates
+                                .add(numCrossings);
+                        }
+
+                        // insertion sort of crossings:
+                        if (cross < lastCross) {
+                            if (doStats) {
+                                RendererContext.stats.stat_rdr_crossings_sorts
+                                    .add(i);
+                            }
+
+                            /* use binary search for newly added edges
+                               in crossings if arrays are large enough */
+                            if (useBinarySearch && (i >= prevNumCrossings)) {
+                                if (doStats) {
+                                    RendererContext.stats.
+                                        stat_rdr_crossings_bsearch.add(i);
+                                }
+                                low = 0;
+                                high = i - 1;
+
+                                do {
+                                    // note: use signed shift (not >>>) for performance
+                                    // as indices are small enough to exceed Integer.MAX_VALUE
+                                    mid = (low + high) >> 1;
+
+                                    if (_crossings[mid] < cross) {
+                                        low = mid + 1;
+                                    } else {
+                                        high = mid - 1;
+                                    }
+                                } while (low <= high);
+
+                                for (j = i - 1; j >= low; j--) {
+                                    _crossings[j + 1] = _crossings[j];
+                                    _edgePtrs [j + 1] = _edgePtrs[j];
+                                }
+                                _crossings[low] = cross;
+                                _edgePtrs [low] = ecur;
+
+                            } else {
+                                j = i - 1;
+                                _crossings[i] = _crossings[j];
+                                _edgePtrs[i] = _edgePtrs[j];
+
+                                while ((--j >= 0) && (_crossings[j] > cross)) {
+                                    _crossings[j + 1] = _crossings[j];
+                                    _edgePtrs [j + 1] = _edgePtrs[j];
+                                }
+                                _crossings[j + 1] = cross;
+                                _edgePtrs [j + 1] = ecur;
+                            }
+
+                        } else {
+                            _crossings[i] = lastCross = cross;
+                        }
+                    }
+                } else {
+                    if (doStats) {
+                        RendererContext.stats.stat_rdr_crossings_msorts
+                            .add(numCrossings);
+                        RendererContext.stats.hist_rdr_crossings_ratio
+                            .add((1000 * ptrLen) / numCrossings);
+                        RendererContext.stats.hist_rdr_crossings_msorts
+                            .add(numCrossings);
+                        RendererContext.stats.hist_rdr_crossings_msorts_adds
+                            .add(ptrLen);
+                    }
+
+                    // Copy sorted data in auxiliary arrays
+                    // and perform insertion sort on almost sorted data
+                    // (ie i < prevNumCrossings):
+
+                    lastCross = _MIN_VALUE;
+
+                    for (i = 0; i < numCrossings; i++) {
+                        // get the pointer to the edge
+                        ecur = _edgePtrs[i];
+
+                        /* convert subpixel coordinates (float) into pixel
+                            positions (int) for coming scanline */
+                        /* note: it is faster to always update edges even
+                           if it is removed from AEL for coming or last scanline */
+
+                        // random access so use unsafe:
+                        addr = addr0 + ecur; // ecur + OFF_F_CURX
+
+                        // get current crossing:
+                        curx = _unsafe.getInt(addr);
+
+                        // update crossing with orientation at last bit:
+                        cross = curx;
+
+                        // Increment x using DDA (fixed point):
+                        curx += _unsafe.getInt(addr + _OFF_BUMP_X);
+
+                        // Increment error:
+                        err  =  _unsafe.getInt(addr + _OFF_ERROR)
+                              + _unsafe.getInt(addr + _OFF_BUMP_ERR);
+
+                        // Manual carry handling:
+                        // keep sign and carry bit only and ignore last bit (preserve orientation):
+                        _unsafe.putInt(addr,               curx - ((err >> 30) & _ALL_BUT_LSB));
+                        _unsafe.putInt(addr + _OFF_ERROR, (err & _ERR_STEP_MAX));
+
+                        if (doStats) {
+                            RendererContext.stats.stat_rdr_crossings_updates
+                                .add(numCrossings);
+                        }
+
+                        if (i >= prevNumCrossings) {
+                            // simply store crossing as edgePtrs is in-place:
+                            // will be copied and sorted efficiently by mergesort later:
+                            _crossings[i]     = cross;
+
+                        } else if (cross < lastCross) {
+                            if (doStats) {
+                                RendererContext.stats.stat_rdr_crossings_sorts
+                                    .add(i);
+                            }
+
+                            // (straight) insertion sort of crossings:
+                            j = i - 1;
+                            _aux_crossings[i] = _aux_crossings[j];
+                            _aux_edgePtrs[i] = _aux_edgePtrs[j];
+
+                            while ((--j >= 0) && (_aux_crossings[j] > cross)) {
+                                _aux_crossings[j + 1] = _aux_crossings[j];
+                                _aux_edgePtrs [j + 1] = _aux_edgePtrs[j];
+                            }
+                            _aux_crossings[j + 1] = cross;
+                            _aux_edgePtrs [j + 1] = ecur;
+
+                        } else {
+                            // auxiliary storage:
+                            _aux_crossings[i] = lastCross = cross;
+                            _aux_edgePtrs [i] = ecur;
+                        }
+                    }
+
+                    // use Mergesort using auxiliary arrays (sort only right part)
+                    MergeSort.mergeSortNoCopy(_crossings,     _edgePtrs,
+                                              _aux_crossings, _aux_edgePtrs,
+                                              numCrossings,   prevNumCrossings);
+                }
+
+                // reset ptrLen
+                ptrLen = 0;
+                // --- from former ScanLineIterator.next()
+
+
+                /* note: bboxx0 and bboxx1 must be pixel boundaries
+                   to have correct coverage computation */
+
+                // right shift on crossings to get the x-coordinate:
+                curxo = _crossings[0];
+                x0    = curxo >> 1;
+                if (x0 < minX) {
+                    minX = x0; // subpixel coordinate
+                }
+
+                x1 = _crossings[numCrossings - 1] >> 1;
+                if (x1 > maxX) {
+                    maxX = x1; // subpixel coordinate
+                }
+
+
+                // compute pixel coverages
+                prev = curx = x0;
+                // to turn {0, 1} into {-1, 1}, multiply by 2 and subtract 1.
+                // last bit contains orientation (0 or 1)
+                crorientation = ((curxo & 0x1) << 1) - 1;
+
+                if (windingRuleEvenOdd) {
+                    sum = crorientation;
+
+                    // Even Odd winding rule: take care of mask ie sum(orientations)
+                    for (i = 1; i < numCrossings; i++) {
+                        curxo = _crossings[i];
+                        curx  =  curxo >> 1;
+                        // to turn {0, 1} into {-1, 1}, multiply by 2 and subtract 1.
+                        // last bit contains orientation (0 or 1)
+                        crorientation = ((curxo & 0x1) << 1) - 1;
+
+                        if ((sum & 0x1) != 0) {
+                            // TODO: perform line clipping on left-right sides
+                            // to avoid such bound checks:
+                            x0 = (prev > bboxx0) ? prev : bboxx0;
+                            x1 = (curx < bboxx1) ? curx : bboxx1;
+
+                            if (x0 < x1) {
+                                x0 -= bboxx0; // turn x0, x1 from coords to indices
+                                x1 -= bboxx0; // in the alpha array.
+
+                                pix_x      =  x0      >> _SUBPIXEL_LG_POSITIONS_X;
+                                pix_xmaxm1 = (x1 - 1) >> _SUBPIXEL_LG_POSITIONS_X;
+
+                                if (pix_x == pix_xmaxm1) {
+                                    // Start and end in same pixel
+                                    tmp = (x1 - x0); // number of subpixels
+                                    _alpha[pix_x    ] += tmp;
+                                    _alpha[pix_x + 1] -= tmp;
+
+                                    if (useBlkFlags) {
+                                        // flag used blocks:
+                                        _blkFlags[pix_x >> _BLK_SIZE_LG] = 1;
+                                    }
+                                } else {
+                                    tmp = (x0 & _SUBPIXEL_MASK_X);
+                                    _alpha[pix_x    ]
+                                        += (_SUBPIXEL_POSITIONS_X - tmp);
+                                    _alpha[pix_x + 1]
+                                        += tmp;
+
+                                    pix_xmax = x1 >> _SUBPIXEL_LG_POSITIONS_X;
+
+                                    tmp = (x1 & _SUBPIXEL_MASK_X);
+                                    _alpha[pix_xmax    ]
+                                        -= (_SUBPIXEL_POSITIONS_X - tmp);
+                                    _alpha[pix_xmax + 1]
+                                        -= tmp;
+
+                                    if (useBlkFlags) {
+                                        // flag used blocks:
+                                        _blkFlags[pix_x    >> _BLK_SIZE_LG] = 1;
+                                        _blkFlags[pix_xmax >> _BLK_SIZE_LG] = 1;
+                                    }
+                                }
+                            }
+                        }
+
+                        sum += crorientation;
+                        prev = curx;
+                    }
+                } else {
+                    // Non-zero winding rule: optimize that case (default)
+                    // and avoid processing intermediate crossings
+                    for (i = 1, sum = 0;; i++) {
+                        sum += crorientation;
+
+                        if (sum != 0) {
+                            // prev = min(curx)
+                            if (prev > curx) {
+                                prev = curx;
+                            }
+                        } else {
+                            // TODO: perform line clipping on left-right sides
+                            // to avoid such bound checks:
+                            x0 = (prev > bboxx0) ? prev : bboxx0;
+                            x1 = (curx < bboxx1) ? curx : bboxx1;
+
+                            if (x0 < x1) {
+                                x0 -= bboxx0; // turn x0, x1 from coords to indices
+                                x1 -= bboxx0; // in the alpha array.
+
+                                pix_x      =  x0      >> _SUBPIXEL_LG_POSITIONS_X;
+                                pix_xmaxm1 = (x1 - 1) >> _SUBPIXEL_LG_POSITIONS_X;
+
+                                if (pix_x == pix_xmaxm1) {
+                                    // Start and end in same pixel
+                                    tmp = (x1 - x0); // number of subpixels
+                                    _alpha[pix_x    ] += tmp;
+                                    _alpha[pix_x + 1] -= tmp;
+
+                                    if (useBlkFlags) {
+                                        // flag used blocks:
+                                        _blkFlags[pix_x >> _BLK_SIZE_LG] = 1;
+                                    }
+                                } else {
+                                    tmp = (x0 & _SUBPIXEL_MASK_X);
+                                    _alpha[pix_x    ]
+                                        += (_SUBPIXEL_POSITIONS_X - tmp);
+                                    _alpha[pix_x + 1]
+                                        += tmp;
+
+                                    pix_xmax = x1 >> _SUBPIXEL_LG_POSITIONS_X;
+
+                                    tmp = (x1 & _SUBPIXEL_MASK_X);
+                                    _alpha[pix_xmax    ]
+                                        -= (_SUBPIXEL_POSITIONS_X - tmp);
+                                    _alpha[pix_xmax + 1]
+                                        -= tmp;
+
+                                    if (useBlkFlags) {
+                                        // flag used blocks:
+                                        _blkFlags[pix_x    >> _BLK_SIZE_LG] = 1;
+                                        _blkFlags[pix_xmax >> _BLK_SIZE_LG] = 1;
+                                    }
+                                }
+                            }
+                            prev = _MAX_VALUE;
+                        }
+
+                        if (i == numCrossings) {
+                            break;
+                        }
+
+                        curxo = _crossings[i];
+                        curx  =  curxo >> 1;
+                        // to turn {0, 1} into {-1, 1}, multiply by 2 and subtract 1.
+                        // last bit contains orientation (0 or 1)
+                        crorientation = ((curxo & 0x1) << 1) - 1;
+                    }
+                }
+            } // numCrossings > 0
+
+            // even if this last row had no crossings, alpha will be zeroed
+            // from the last emitRow call. But this doesn't matter because
+            // maxX < minX, so no row will be emitted to the MarlinCache.
+            if ((y & _SUBPIXEL_MASK_Y) == _SUBPIXEL_MASK_Y) {
+                lastY = y >> _SUBPIXEL_LG_POSITIONS_Y;
+
+                // convert subpixel to pixel coordinate within boundaries:
+                minX = FloatMath.max(minX, bboxx0) >> _SUBPIXEL_LG_POSITIONS_X;
+                maxX = FloatMath.min(maxX, bboxx1) >> _SUBPIXEL_LG_POSITIONS_X;
+
+                if (maxX >= minX) {
+                    // note: alpha array will be zeroed by copyAARow()
+                    // +2 because alpha [pix_minX; pix_maxX+1]
+                    // fix range [x0; x1[
+                    copyAARow(_alpha, lastY, minX, maxX + 2, useBlkFlags);
+
+                    // speculative for next pixel row (scanline coherence):
+                    if (_enableBlkFlagsHeuristics) {
+                        // Use block flags if large pixel span and few crossings:
+                        // ie mean(distance between crossings) is larger than
+                        // 1 block size;
+
+                        // fast check width:
+                        maxX -= minX;
+
+                        // if stroking: numCrossings /= 2
+                        // => shift numCrossings by 1
+                        // condition = (width / (numCrossings - 1)) > blockSize
+                        useBlkFlags = (maxX > _BLK_SIZE) && (maxX >
+                            (((numCrossings >> stroking) - 1) << _BLK_SIZE_LG));
+
+                        if (doStats) {
+                            tmp = FloatMath.max(1,
+                                    ((numCrossings >> stroking) - 1));
+                            RendererContext.stats.hist_tile_generator_encoding_dist
+                                .add(maxX / tmp);
+                        }
+                    }
+                } else {
+                    _cache.clearAARow(lastY);
+                }
+                minX = _MAX_VALUE;
+                maxX = _MIN_VALUE;
+            }
+        } // scan line iterator
+
+        // Emit final row
+        y--;
+        y >>= _SUBPIXEL_LG_POSITIONS_Y;
+
+        // convert subpixel to pixel coordinate within boundaries:
+        minX = FloatMath.max(minX, bboxx0) >> _SUBPIXEL_LG_POSITIONS_X;
+        maxX = FloatMath.min(maxX, bboxx1) >> _SUBPIXEL_LG_POSITIONS_X;
+
+        if (maxX >= minX) {
+            // note: alpha array will be zeroed by copyAARow()
+            // +2 because alpha [pix_minX; pix_maxX+1]
+            // fix range [x0; x1[
+            copyAARow(_alpha, y, minX, maxX + 2, useBlkFlags);
+        } else if (y != lastY) {
+            _cache.clearAARow(y);
+        }
+
+        // update member:
+        edgeCount = numCrossings;
+        prevUseBlkFlags = useBlkFlags;
+
+        if (doStats) {
+            // update max used mark
+            activeEdgeMaxUsed = _arrayMaxUsed;
+        }
+    }
+
+    boolean endRendering() {
+        if (doMonitors) {
+            RendererContext.stats.mon_rdr_endRendering.start();
+        }
+        if (edgeMinY == Float.POSITIVE_INFINITY) {
+            return false; // undefined edges bounds
+        }
+
+        final int _boundsMinY = boundsMinY;
+        final int _boundsMaxY = boundsMaxY;
+
+        // bounds as inclusive intervals
+        final int spminX = FloatMath.max(FloatMath.ceil_int(edgeMinX - 0.5f), boundsMinX);
+        final int spmaxX = FloatMath.min(FloatMath.ceil_int(edgeMaxX - 0.5f), boundsMaxX - 1);
+
+        // y1 (and y2) are already biased by -0.5 in tosubpixy():
+        final int spminY = FloatMath.max(FloatMath.ceil_int(edgeMinY), _boundsMinY);
+        int maxY = FloatMath.ceil_int(edgeMaxY);
+
+        final int spmaxY;
+
+        if (maxY <= _boundsMaxY - 1) {
+            spmaxY = maxY;
+        } else {
+            spmaxY = _boundsMaxY - 1;
+            maxY   = _boundsMaxY;
+        }
+        buckets_minY = spminY - _boundsMinY;
+        buckets_maxY = maxY   - _boundsMinY;
+
+        if (doLogBounds) {
+            MarlinUtils.logInfo("edgesXY = [" + edgeMinX + " ... " + edgeMaxX
+                                + "][" + edgeMinY + " ... " + edgeMaxY + "]");
+            MarlinUtils.logInfo("spXY    = [" + spminX + " ... " + spmaxX
+                                + "][" + spminY + " ... " + spmaxY + "]");
+        }
+
+        // test clipping for shapes out of bounds
+        if ((spminX > spmaxX) || (spminY > spmaxY)) {
+            return false;
+        }
+
+        // half open intervals
+        // inclusive:
+        final int pminX =  spminX                    >> SUBPIXEL_LG_POSITIONS_X;
+        // exclusive:
+        final int pmaxX = (spmaxX + SUBPIXEL_MASK_X) >> SUBPIXEL_LG_POSITIONS_X;
+        // inclusive:
+        final int pminY =  spminY                    >> SUBPIXEL_LG_POSITIONS_Y;
+        // exclusive:
+        final int pmaxY = (spmaxY + SUBPIXEL_MASK_Y) >> SUBPIXEL_LG_POSITIONS_Y;
+
+        // store BBox to answer ptg.getBBox():
+        this.cache.init(pminX, pminY, pmaxX, pmaxY, edgeSumDeltaY);
+
+        // Heuristics for using block flags:
+        if (ENABLE_BLOCK_FLAGS) {
+            enableBlkFlags = this.cache.useRLE;
+            prevUseBlkFlags = enableBlkFlags && !ENABLE_BLOCK_FLAGS_HEURISTICS;
+
+            if (enableBlkFlags) {
+                // ensure blockFlags array is large enough:
+                // note: +2 to ensure enough space left at end
+                final int nxTiles = ((pmaxX - pminX) >> TILE_SIZE_LG) + 2;
+                if (nxTiles > INITIAL_ARRAY) {
+                    blkFlags = rdrCtx.getIntArray(nxTiles);
+                }
+            }
+        }
+
+        // memorize the rendering bounding box:
+        /* note: bbox_spminX and bbox_spmaxX must be pixel boundaries
+           to have correct coverage computation */
+        // inclusive:
+        bbox_spminX = pminX << SUBPIXEL_LG_POSITIONS_X;
+        // exclusive:
+        bbox_spmaxX = pmaxX << SUBPIXEL_LG_POSITIONS_X;
+        // inclusive:
+        bbox_spminY = spminY;
+        // exclusive:
+        bbox_spmaxY = FloatMath.min(spmaxY + 1, pmaxY << SUBPIXEL_LG_POSITIONS_Y);
+
+        if (doLogBounds) {
+            MarlinUtils.logInfo("pXY       = [" + pminX + " ... " + pmaxX
+                                + "[ [" + pminY + " ... " + pmaxY + "[");
+            MarlinUtils.logInfo("bbox_spXY = [" + bbox_spminX + " ... "
+                                + bbox_spmaxX + "[ [" + bbox_spminY + " ... "
+                                + bbox_spmaxY + "[");
+        }
+
+        // Prepare alpha line:
+        // add 2 to better deal with the last pixel in a pixel row.
+        final int width = (pmaxX - pminX) + 2;
+
+        // Useful when processing tile line by tile line
+        if (width > INITIAL_AA_ARRAY) {
+            if (doStats) {
+                RendererContext.stats.stat_array_renderer_alphaline
+                    .add(width);
+            }
+            alphaLine = rdrCtx.getIntArray(width);
+        }
+
+        // process first tile line:
+        endRendering(pminY);
+
+        return true;
+    }
+
+    private int bbox_spminX, bbox_spmaxX, bbox_spminY, bbox_spmaxY;
+
+    void endRendering(final int pminY) {
+        if (doMonitors) {
+            RendererContext.stats.mon_rdr_endRendering_Y.start();
+        }
+
+        final int spminY       = pminY << SUBPIXEL_LG_POSITIONS_Y;
+        final int fixed_spminY = FloatMath.max(bbox_spminY, spminY);
+
+        // avoid rendering for last call to nextTile()
+        if (fixed_spminY < bbox_spmaxY) {
+            // process a complete tile line ie scanlines for 32 rows
+            final int spmaxY = FloatMath.min(bbox_spmaxY, spminY + SUBPIXEL_TILE);
+
+            // process tile line [0 - 32]
+            cache.resetTileLine(pminY);
+
+            // Process only one tile line:
+            _endRendering(fixed_spminY, spmaxY);
+        }
+        if (doMonitors) {
+            RendererContext.stats.mon_rdr_endRendering_Y.stop();
+        }
+    }
+
+    private boolean enableBlkFlags = false;
+    private boolean prevUseBlkFlags = false;
+
+    private final int[] blkFlags_initial = new int[INITIAL_ARRAY]; // 1 tile line
+    /* block flags (0|1) */
+    private int[] blkFlags = blkFlags_initial;
+
+    void copyAARow(final int[] alphaRow,
+                   final int pix_y, final int pix_from, final int pix_to,
+                   final boolean useBlockFlags)
+    {
+        if (useBlockFlags) {
+            if (doStats) {
+                RendererContext.stats.hist_tile_generator_encoding.add(1);
+            }
+            cache.copyAARowRLE_WithBlockFlags(blkFlags, alphaRow, pix_y, pix_from, pix_to);
+        } else {
+            if (doStats) {
+                RendererContext.stats.hist_tile_generator_encoding.add(0);
+            }
+            cache.copyAARowNoRLE(alphaRow, pix_y, pix_from, pix_to);
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/RendererContext.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,471 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.awt.geom.Path2D;
+import java.lang.ref.SoftReference;
+import java.lang.ref.WeakReference;
+import java.util.concurrent.atomic.AtomicInteger;
+import static sun.java2d.marlin.ArrayCache.*;
+import sun.java2d.marlin.MarlinRenderingEngine.NormalizingPathIterator;
+import static sun.java2d.marlin.MarlinUtils.getCallerInfo;
+import static sun.java2d.marlin.MarlinUtils.logInfo;
+
+/**
+ * This class is a renderer context dedicated to a single thread
+ */
+final class RendererContext implements MarlinConst {
+
+    private static final String className = RendererContext.class.getName();
+    // RendererContext creation counter
+    private static final AtomicInteger contextCount = new AtomicInteger(1);
+    // RendererContext statistics
+    static final RendererStats stats = (doStats || doMonitors)
+                                       ? RendererStats.getInstance(): null;
+
+    private static final boolean USE_CACHE_HARD_REF = doStats
+        || (MarlinRenderingEngine.REF_TYPE == MarlinRenderingEngine.REF_WEAK);
+
+    /**
+     * Create a new renderer context
+     *
+     * @return new RendererContext instance
+     */
+    static RendererContext createContext() {
+        final RendererContext newCtx = new RendererContext("ctx"
+                    + Integer.toString(contextCount.getAndIncrement()));
+        if (RendererContext.stats != null) {
+            RendererContext.stats.allContexts.add(newCtx);
+        }
+        return newCtx;
+    }
+
+    // context name (debugging purposes)
+    final String name;
+    /*
+     * Reference to this instance (hard, soft or weak).
+     * @see MarlinRenderingEngine#REF_TYPE
+     */
+    final Object reference;
+    // dirty flag indicating an exception occured during pipeline in pathTo()
+    boolean dirty = false;
+    // dynamic array caches kept using weak reference (low memory footprint)
+    WeakReference<ArrayCachesHolder> refArrayCaches = null;
+    // hard reference to array caches (for statistics)
+    ArrayCachesHolder hardRefArrayCaches = null;
+    // shared data
+    final float[] float6 = new float[6];
+    // shared curve (dirty) (Renderer / Stroker)
+    final Curve curve = new Curve();
+    // MarlinRenderingEngine NormalizingPathIterator NearestPixelCenter:
+    final NormalizingPathIterator nPCPathIterator;
+    // MarlinRenderingEngine NearestPixelQuarter NormalizingPathIterator:
+    final NormalizingPathIterator nPQPathIterator;
+    // MarlinRenderingEngine.TransformingPathConsumer2D
+    final TransformingPathConsumer2D transformerPC2D;
+    // recycled Path2D instance
+    Path2D.Float p2d = null;
+    final Renderer renderer;
+    final Stroker stroker;
+    // Simplifies out collinear lines
+    final CollinearSimplifier simplifier = new CollinearSimplifier();
+    final Dasher dasher;
+    final MarlinTileGenerator ptg;
+    final MarlinCache cache;
+    // flag indicating the shape is stroked (1) or filled (0)
+    int stroking = 0;
+
+    /**
+     * Constructor
+     *
+     * @param name
+     */
+    RendererContext(final String name) {
+        if (logCreateContext) {
+            MarlinUtils.logInfo("new RendererContext = " + name);
+        }
+
+        this.name = name;
+
+        // NormalizingPathIterator instances:
+        nPCPathIterator = new NormalizingPathIterator.NearestPixelCenter(float6);
+        nPQPathIterator  = new NormalizingPathIterator.NearestPixelQuarter(float6);
+
+        // MarlinRenderingEngine.TransformingPathConsumer2D
+        transformerPC2D = new TransformingPathConsumer2D();
+
+        // Renderer:
+        cache = new MarlinCache(this);
+        renderer = new Renderer(this); // needs MarlinCache from rdrCtx.cache
+        ptg = new MarlinTileGenerator(renderer);
+
+        stroker = new Stroker(this);
+        dasher = new Dasher(this);
+
+        // Create the reference to this instance (hard, soft or weak):
+        switch (MarlinRenderingEngine.REF_TYPE) {
+            default:
+            case MarlinRenderingEngine.REF_HARD:
+                reference = this;
+                break;
+            case MarlinRenderingEngine.REF_SOFT:
+                reference = new SoftReference<RendererContext>(this);
+                break;
+            case MarlinRenderingEngine.REF_WEAK:
+                reference = new WeakReference<RendererContext>(this);
+                break;
+        }
+    }
+
+    /**
+     * Disposes this renderer context:
+     * clean up before reusing this context
+     */
+    void dispose() {
+        stroking = 0;
+        // reset hard reference to array caches if needed:
+        if (!USE_CACHE_HARD_REF) {
+            hardRefArrayCaches = null;
+        }
+        // if context is maked as DIRTY:
+        if (dirty) {
+            // may happen if an exception if thrown in the pipeline processing:
+            // force cleanup of all possible pipelined blocks (except Renderer):
+
+            // NormalizingPathIterator instances:
+            this.nPCPathIterator.dispose();
+            this.nPQPathIterator.dispose();
+            // Dasher:
+            this.dasher.dispose();
+            // Stroker:
+            this.stroker.dispose();
+
+            // mark context as CLEAN:
+            dirty = false;
+        }
+    }
+
+    // Array caches
+    ArrayCachesHolder getArrayCachesHolder() {
+        // Use hard reference first (cached resolved weak reference):
+        ArrayCachesHolder holder = hardRefArrayCaches;
+        if (holder == null) {
+            // resolve reference:
+            holder = (refArrayCaches != null)
+                     ? refArrayCaches.get()
+                     : null;
+            // create a new ArrayCachesHolder if none is available
+            if (holder == null) {
+                if (logCreateContext) {
+                    MarlinUtils.logInfo("new ArrayCachesHolder for "
+                                        + "RendererContext = " + name);
+                }
+
+                holder = new ArrayCachesHolder();
+
+                if (USE_CACHE_HARD_REF) {
+                    // update hard reference:
+                    hardRefArrayCaches = holder;
+                }
+
+                // update weak reference:
+                refArrayCaches = new WeakReference<ArrayCachesHolder>(holder);
+            }
+        }
+        return holder;
+    }
+
+    // dirty byte array cache
+    ByteArrayCache getDirtyByteArrayCache(final int length) {
+        final int bucket = ArrayCache.getBucketDirtyBytes(length);
+        return getArrayCachesHolder().dirtyByteArrayCaches[bucket];
+    }
+
+    byte[] getDirtyByteArray(final int length) {
+        if (length <= MAX_DIRTY_BYTE_ARRAY_SIZE) {
+            return getDirtyByteArrayCache(length).getArray();
+        }
+
+        if (doStats) {
+            incOversize();
+        }
+
+        if (doLogOverSize) {
+            logInfo("getDirtyByteArray[oversize]: length=\t" + length
+                    + "\tfrom=\t" + getCallerInfo(className));
+        }
+
+        return new byte[length];
+    }
+
+    void putDirtyByteArray(final byte[] array) {
+        final int length = array.length;
+        // odd sized array are non-cached arrays (initial arrays)
+        // ensure to never store initial arrays in cache:
+        if (((length & 0x1) == 0) && (length <= MAX_DIRTY_BYTE_ARRAY_SIZE)) {
+            getDirtyByteArrayCache(length).putDirtyArray(array, length);
+        }
+    }
+
+    byte[] widenDirtyByteArray(final byte[] in,
+                               final int usedSize, final int needSize)
+    {
+        final int length = in.length;
+        if (doChecks && length >= needSize) {
+            return in;
+        }
+        if (doStats) {
+            incResizeDirtyByte();
+        }
+
+        // maybe change bucket:
+        // ensure getNewSize() > newSize:
+        final byte[] res = getDirtyByteArray(getNewSize(usedSize, needSize));
+
+        System.arraycopy(in, 0, res, 0, usedSize); // copy only used elements
+
+        // maybe return current array:
+        // NO clean-up of array data = DIRTY ARRAY
+        putDirtyByteArray(in);
+
+        if (doLogWidenArray) {
+            logInfo("widenDirtyByteArray[" + res.length + "]: usedSize=\t"
+                    + usedSize + "\tlength=\t" + length + "\tneeded length=\t"
+                    + needSize + "\tfrom=\t" + getCallerInfo(className));
+        }
+        return res;
+    }
+
+    // int array cache
+    IntArrayCache getIntArrayCache(final int length) {
+        final int bucket = ArrayCache.getBucket(length);
+        return getArrayCachesHolder().intArrayCaches[bucket];
+    }
+
+    int[] getIntArray(final int length) {
+        if (length <= MAX_ARRAY_SIZE) {
+            return getIntArrayCache(length).getArray();
+        }
+
+        if (doStats) {
+            incOversize();
+        }
+
+        if (doLogOverSize) {
+            logInfo("getIntArray[oversize]: length=\t" + length + "\tfrom=\t"
+                    + getCallerInfo(className));
+        }
+
+        return new int[length];
+    }
+
+    // unused
+    int[] widenIntArray(final int[] in, final int usedSize,
+                        final int needSize, final int clearTo)
+    {
+        final int length = in.length;
+        if (doChecks && length >= needSize) {
+            return in;
+        }
+        if (doStats) {
+            incResizeInt();
+        }
+
+        // maybe change bucket:
+        // ensure getNewSize() > newSize:
+        final int[] res = getIntArray(getNewSize(usedSize, needSize));
+
+        System.arraycopy(in, 0, res, 0, usedSize); // copy only used elements
+
+        // maybe return current array:
+        putIntArray(in, 0, clearTo); // ensure all array is cleared (grow-reduce algo)
+
+        if (doLogWidenArray) {
+            logInfo("widenIntArray[" + res.length + "]: usedSize=\t"
+                    + usedSize + "\tlength=\t" + length + "\tneeded length=\t"
+                    + needSize + "\tfrom=\t" + getCallerInfo(className));
+        }
+        return res;
+    }
+
+    void putIntArray(final int[] array, final int fromIndex,
+                     final int toIndex)
+    {
+        final int length = array.length;
+        // odd sized array are non-cached arrays (initial arrays)
+        // ensure to never store initial arrays in cache:
+        if (((length & 0x1) == 0) && (length <= MAX_ARRAY_SIZE)) {
+            getIntArrayCache(length).putArray(array, length, fromIndex, toIndex);
+        }
+    }
+
+    // dirty int array cache
+    IntArrayCache getDirtyIntArrayCache(final int length) {
+        final int bucket = ArrayCache.getBucket(length);
+        return getArrayCachesHolder().dirtyIntArrayCaches[bucket];
+    }
+
+    int[] getDirtyIntArray(final int length) {
+        if (length <= MAX_ARRAY_SIZE) {
+            return getDirtyIntArrayCache(length).getArray();
+        }
+
+        if (doStats) {
+            incOversize();
+        }
+
+        if (doLogOverSize) {
+            logInfo("getDirtyIntArray[oversize]: length=\t" + length
+                    + "\tfrom=\t" + getCallerInfo(className));
+        }
+
+        return new int[length];
+    }
+
+    int[] widenDirtyIntArray(final int[] in,
+                             final int usedSize, final int needSize)
+    {
+        final int length = in.length;
+        if (doChecks && length >= needSize) {
+            return in;
+        }
+        if (doStats) {
+            incResizeDirtyInt();
+        }
+
+        // maybe change bucket:
+        // ensure getNewSize() > newSize:
+        final int[] res = getDirtyIntArray(getNewSize(usedSize, needSize));
+
+        System.arraycopy(in, 0, res, 0, usedSize); // copy only used elements
+
+        // maybe return current array:
+        // NO clean-up of array data = DIRTY ARRAY
+        putDirtyIntArray(in);
+
+        if (doLogWidenArray) {
+            logInfo("widenDirtyIntArray[" + res.length + "]: usedSize=\t"
+                    + usedSize + "\tlength=\t" + length + "\tneeded length=\t"
+                    + needSize + "\tfrom=\t" + getCallerInfo(className));
+        }
+        return res;
+    }
+
+    void putDirtyIntArray(final int[] array) {
+        final int length = array.length;
+        // odd sized array are non-cached arrays (initial arrays)
+        // ensure to never store initial arrays in cache:
+        if (((length & 0x1) == 0) && (length <= MAX_ARRAY_SIZE)) {
+            getDirtyIntArrayCache(length).putDirtyArray(array, length);
+        }
+    }
+
+    // dirty float array cache
+    FloatArrayCache getDirtyFloatArrayCache(final int length) {
+        final int bucket = ArrayCache.getBucket(length);
+        return getArrayCachesHolder().dirtyFloatArrayCaches[bucket];
+    }
+
+    float[] getDirtyFloatArray(final int length) {
+        if (length <= MAX_ARRAY_SIZE) {
+            return getDirtyFloatArrayCache(length).getArray();
+        }
+
+        if (doStats) {
+            incOversize();
+        }
+
+        if (doLogOverSize) {
+            logInfo("getDirtyFloatArray[oversize]: length=\t" + length
+                    + "\tfrom=\t" + getCallerInfo(className));
+        }
+
+        return new float[length];
+    }
+
+    float[] widenDirtyFloatArray(final float[] in,
+                                 final int usedSize, final int needSize)
+    {
+        final int length = in.length;
+        if (doChecks && length >= needSize) {
+            return in;
+        }
+        if (doStats) {
+            incResizeDirtyFloat();
+        }
+
+        // maybe change bucket:
+        // ensure getNewSize() > newSize:
+        final float[] res = getDirtyFloatArray(getNewSize(usedSize, needSize));
+
+        System.arraycopy(in, 0, res, 0, usedSize); // copy only used elements
+
+        // maybe return current array:
+        // NO clean-up of array data = DIRTY ARRAY
+        putDirtyFloatArray(in);
+
+        if (doLogWidenArray) {
+            logInfo("widenDirtyFloatArray[" + res.length + "]: usedSize=\t"
+                    + usedSize + "\tlength=\t" + length + "\tneeded length=\t"
+                    + needSize + "\tfrom=\t" + getCallerInfo(className));
+        }
+        return res;
+    }
+
+    void putDirtyFloatArray(final float[] array) {
+        final int length = array.length;
+        // odd sized array are non-cached arrays (initial arrays)
+        // ensure to never store initial arrays in cache:
+        if (((length & 0x1) == 0) && (length <= MAX_ARRAY_SIZE)) {
+            getDirtyFloatArrayCache(length).putDirtyArray(array, length);
+        }
+    }
+
+    /* class holding all array cache instances */
+    static final class ArrayCachesHolder {
+        // zero-filled int array cache:
+        final IntArrayCache[] intArrayCaches;
+        // dirty array caches:
+        final IntArrayCache[] dirtyIntArrayCaches;
+        final FloatArrayCache[] dirtyFloatArrayCaches;
+        final ByteArrayCache[] dirtyByteArrayCaches;
+
+        ArrayCachesHolder() {
+            intArrayCaches = new IntArrayCache[BUCKETS];
+            dirtyIntArrayCaches = new IntArrayCache[BUCKETS];
+            dirtyFloatArrayCaches = new FloatArrayCache[BUCKETS];
+            dirtyByteArrayCaches = new ByteArrayCache[BUCKETS];
+
+            for (int i = 0; i < BUCKETS; i++) {
+                intArrayCaches[i] = new IntArrayCache(ARRAY_SIZES[i]);
+                // dirty array caches:
+                dirtyIntArrayCaches[i] = new IntArrayCache(ARRAY_SIZES[i]);
+                dirtyFloatArrayCaches[i] = new FloatArrayCache(ARRAY_SIZES[i]);
+                dirtyByteArrayCaches[i] = new ByteArrayCache(DIRTY_BYTE_ARRAY_SIZES[i]);
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/RendererStats.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import static sun.java2d.marlin.MarlinUtils.logInfo;
+import sun.java2d.marlin.stats.Histogram;
+import sun.java2d.marlin.stats.Monitor;
+import sun.java2d.marlin.stats.StatLong;
+
+/**
+ * This class gathers global rendering statistics for debugging purposes only
+ */
+public final class RendererStats implements MarlinConst {
+
+    // singleton
+    private static volatile RendererStats singleton = null;
+
+    static RendererStats getInstance() {
+        if (singleton == null) {
+            singleton = new RendererStats();
+        }
+        return singleton;
+    }
+
+    public static void dumpStats() {
+        if (singleton != null) {
+            singleton.dump();
+        }
+    }
+
+    /* RendererContext collection as hard references
+       (only used for debugging purposes) */
+    final ConcurrentLinkedQueue<RendererContext> allContexts
+        = new ConcurrentLinkedQueue<RendererContext>();
+    // stats
+    final StatLong stat_cache_rowAA
+        = new StatLong("cache.rowAA");
+    final StatLong stat_cache_rowAAChunk
+        = new StatLong("cache.rowAAChunk");
+    final StatLong stat_cache_tiles
+        = new StatLong("cache.tiles");
+    final StatLong stat_rdr_poly_stack_curves
+        = new StatLong("renderer.poly.stack.curves");
+    final StatLong stat_rdr_poly_stack_types
+        = new StatLong("renderer.poly.stack.types");
+    final StatLong stat_rdr_addLine
+        = new StatLong("renderer.addLine");
+    final StatLong stat_rdr_addLine_skip
+        = new StatLong("renderer.addLine.skip");
+    final StatLong stat_rdr_curveBreak
+        = new StatLong("renderer.curveBreakIntoLinesAndAdd");
+    final StatLong stat_rdr_curveBreak_dec
+        = new StatLong("renderer.curveBreakIntoLinesAndAdd.dec");
+    final StatLong stat_rdr_curveBreak_inc
+        = new StatLong("renderer.curveBreakIntoLinesAndAdd.inc");
+    final StatLong stat_rdr_quadBreak
+        = new StatLong("renderer.quadBreakIntoLinesAndAdd");
+    final StatLong stat_rdr_quadBreak_dec
+        = new StatLong("renderer.quadBreakIntoLinesAndAdd.dec");
+    final StatLong stat_rdr_edges
+        = new StatLong("renderer.edges");
+    final StatLong stat_rdr_edges_count
+        = new StatLong("renderer.edges.count");
+    final StatLong stat_rdr_edges_resizes
+        = new StatLong("renderer.edges.resize");
+    final StatLong stat_rdr_activeEdges
+        = new StatLong("renderer.activeEdges");
+    final StatLong stat_rdr_activeEdges_updates
+        = new StatLong("renderer.activeEdges.updates");
+    final StatLong stat_rdr_activeEdges_adds
+        = new StatLong("renderer.activeEdges.adds");
+    final StatLong stat_rdr_activeEdges_adds_high
+        = new StatLong("renderer.activeEdges.adds_high");
+    final StatLong stat_rdr_crossings_updates
+        = new StatLong("renderer.crossings.updates");
+    final StatLong stat_rdr_crossings_sorts
+        = new StatLong("renderer.crossings.sorts");
+    final StatLong stat_rdr_crossings_bsearch
+        = new StatLong("renderer.crossings.bsearch");
+    final StatLong stat_rdr_crossings_msorts
+        = new StatLong("renderer.crossings.msorts");
+    // growable arrays
+    final StatLong stat_array_dasher_dasher
+        = new StatLong("array.dasher.dasher.d_float");
+    final StatLong stat_array_dasher_firstSegmentsBuffer
+        = new StatLong("array.dasher.firstSegmentsBuffer.d_float");
+    final StatLong stat_array_stroker_polystack_curves
+        = new StatLong("array.stroker.polystack.curves.d_float");
+    final StatLong stat_array_stroker_polystack_curveTypes
+        = new StatLong("array.stroker.polystack.curveTypes.d_byte");
+    final StatLong stat_array_marlincache_rowAAChunk
+        = new StatLong("array.marlincache.rowAAChunk.d_byte");
+    final StatLong stat_array_marlincache_touchedTile
+        = new StatLong("array.marlincache.touchedTile.int");
+    final StatLong stat_array_renderer_alphaline
+        = new StatLong("array.renderer.alphaline.int");
+    final StatLong stat_array_renderer_crossings
+        = new StatLong("array.renderer.crossings.int");
+    final StatLong stat_array_renderer_aux_crossings
+        = new StatLong("array.renderer.aux_crossings.int");
+    final StatLong stat_array_renderer_edgeBuckets
+        = new StatLong("array.renderer.edgeBuckets.int");
+    final StatLong stat_array_renderer_edgeBucketCounts
+        = new StatLong("array.renderer.edgeBucketCounts.int");
+    final StatLong stat_array_renderer_edgePtrs
+        = new StatLong("array.renderer.edgePtrs.int");
+    final StatLong stat_array_renderer_aux_edgePtrs
+        = new StatLong("array.renderer.aux_edgePtrs.int");
+    // histograms
+    final Histogram hist_rdr_crossings
+        = new Histogram("renderer.crossings");
+    final Histogram hist_rdr_crossings_ratio
+        = new Histogram("renderer.crossings.ratio");
+    final Histogram hist_rdr_crossings_adds
+        = new Histogram("renderer.crossings.adds");
+    final Histogram hist_rdr_crossings_msorts
+        = new Histogram("renderer.crossings.msorts");
+    final Histogram hist_rdr_crossings_msorts_adds
+        = new Histogram("renderer.crossings.msorts.adds");
+    final Histogram hist_tile_generator_alpha
+        = new Histogram("tile_generator.alpha");
+    final Histogram hist_tile_generator_encoding
+        = new Histogram("tile_generator.encoding");
+    final Histogram hist_tile_generator_encoding_dist
+        = new Histogram("tile_generator.encoding.dist");
+    final Histogram hist_tile_generator_encoding_ratio
+        = new Histogram("tile_generator.encoding.ratio");
+    final Histogram hist_tile_generator_encoding_runLen
+        = new Histogram("tile_generator.encoding.runLen");
+    // all stats
+    final StatLong[] statistics = new StatLong[]{
+        stat_cache_rowAA,
+        stat_cache_rowAAChunk,
+        stat_cache_tiles,
+        stat_rdr_poly_stack_types,
+        stat_rdr_poly_stack_curves,
+        stat_rdr_addLine,
+        stat_rdr_addLine_skip,
+        stat_rdr_curveBreak,
+        stat_rdr_curveBreak_dec,
+        stat_rdr_curveBreak_inc,
+        stat_rdr_quadBreak,
+        stat_rdr_quadBreak_dec,
+        stat_rdr_edges,
+        stat_rdr_edges_count,
+        stat_rdr_edges_resizes,
+        stat_rdr_activeEdges,
+        stat_rdr_activeEdges_updates,
+        stat_rdr_activeEdges_adds,
+        stat_rdr_activeEdges_adds_high,
+        stat_rdr_crossings_updates,
+        stat_rdr_crossings_sorts,
+        stat_rdr_crossings_bsearch,
+        stat_rdr_crossings_msorts,
+        hist_rdr_crossings,
+        hist_rdr_crossings_ratio,
+        hist_rdr_crossings_adds,
+        hist_rdr_crossings_msorts,
+        hist_rdr_crossings_msorts_adds,
+        hist_tile_generator_alpha,
+        hist_tile_generator_encoding,
+        hist_tile_generator_encoding_dist,
+        hist_tile_generator_encoding_ratio,
+        hist_tile_generator_encoding_runLen,
+        stat_array_dasher_dasher,
+        stat_array_dasher_firstSegmentsBuffer,
+        stat_array_stroker_polystack_curves,
+        stat_array_stroker_polystack_curveTypes,
+        stat_array_marlincache_rowAAChunk,
+        stat_array_marlincache_touchedTile,
+        stat_array_renderer_alphaline,
+        stat_array_renderer_crossings,
+        stat_array_renderer_aux_crossings,
+        stat_array_renderer_edgeBuckets,
+        stat_array_renderer_edgeBucketCounts,
+        stat_array_renderer_edgePtrs,
+        stat_array_renderer_aux_edgePtrs
+    };
+    // monitors
+    final Monitor mon_pre_getAATileGenerator
+        = new Monitor("MarlinRenderingEngine.getAATileGenerator()");
+    final Monitor mon_npi_currentSegment
+        = new Monitor("NormalizingPathIterator.currentSegment()");
+    final Monitor mon_rdr_addLine
+        = new Monitor("Renderer.addLine()");
+    final Monitor mon_rdr_endRendering
+        = new Monitor("Renderer.endRendering()");
+    final Monitor mon_rdr_endRendering_Y
+        = new Monitor("Renderer._endRendering(Y)");
+    final Monitor mon_rdr_copyAARow
+        = new Monitor("Renderer.copyAARow()");
+    final Monitor mon_pipe_renderTiles
+        = new Monitor("AAShapePipe.renderTiles()");
+    final Monitor mon_ptg_getAlpha
+        = new Monitor("MarlinTileGenerator.getAlpha()");
+    final Monitor mon_debug
+        = new Monitor("DEBUG()");
+    // all monitors
+    final Monitor[] monitors = new Monitor[]{
+        mon_pre_getAATileGenerator,
+        mon_npi_currentSegment,
+        mon_rdr_addLine,
+        mon_rdr_endRendering,
+        mon_rdr_endRendering_Y,
+        mon_rdr_copyAARow,
+        mon_pipe_renderTiles,
+        mon_ptg_getAlpha,
+        mon_debug
+    };
+
+    private RendererStats() {
+        super();
+
+        Runtime.getRuntime().addShutdownHook(new Thread() {
+            @Override
+            public void run() {
+                dump();
+            }
+        });
+
+        if (useDumpThread) {
+            final Timer statTimer = new Timer("RendererStats");
+            statTimer.scheduleAtFixedRate(new TimerTask() {
+                @Override
+                public void run() {
+                    dump();
+                }
+            }, statDump, statDump);
+        }
+    }
+
+    void dump() {
+        if (doStats) {
+            ArrayCache.dumpStats();
+        }
+        final RendererContext[] all = allContexts.toArray(
+                                          new RendererContext[allContexts.size()]);
+        for (RendererContext rdrCtx : all) {
+            logInfo("RendererContext: " + rdrCtx.name);
+
+            if (doMonitors) {
+                for (Monitor monitor : monitors) {
+                    if (monitor.count != 0) {
+                        logInfo(monitor.toString());
+                    }
+                }
+                // As getAATileGenerator percents:
+                final long total = mon_pre_getAATileGenerator.sum;
+                if (total != 0L) {
+                    for (Monitor monitor : monitors) {
+                        logInfo(monitor.name + " : "
+                                + ((100d * monitor.sum) / total) + " %");
+                    }
+                }
+                if (doFlushMonitors) {
+                    for (Monitor m : monitors) {
+                        m.reset();
+                    }
+                }
+            }
+
+            if (doStats) {
+                for (StatLong stat : statistics) {
+                    if (stat.count != 0) {
+                        logInfo(stat.toString());
+                        stat.reset();
+                    }
+                }
+                // IntArrayCaches stats:
+                final RendererContext.ArrayCachesHolder holder
+                    = rdrCtx.getArrayCachesHolder();
+
+                logInfo("Array caches for thread: " + rdrCtx.name);
+
+                for (IntArrayCache cache : holder.intArrayCaches) {
+                    cache.dumpStats();
+                }
+
+                logInfo("Dirty Array caches for thread: " + rdrCtx.name);
+
+                for (IntArrayCache cache : holder.dirtyIntArrayCaches) {
+                    cache.dumpStats();
+                }
+                for (FloatArrayCache cache : holder.dirtyFloatArrayCaches) {
+                    cache.dumpStats();
+                }
+                for (ByteArrayCache cache : holder.dirtyByteArrayCaches) {
+                    cache.dumpStats();
+                }
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Stroker.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,1388 @@
+/*
+ * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.util.Arrays;
+import static java.lang.Math.ulp;
+import static java.lang.Math.sqrt;
+
+import sun.awt.geom.PathConsumer2D;
+import sun.java2d.marlin.Curve.BreakPtrIterator;
+
+
+// TODO: some of the arithmetic here is too verbose and prone to hard to
+// debug typos. We should consider making a small Point/Vector class that
+// has methods like plus(Point), minus(Point), dot(Point), cross(Point)and such
+final class Stroker implements PathConsumer2D, MarlinConst {
+
+    private static final int MOVE_TO = 0;
+    private static final int DRAWING_OP_TO = 1; // ie. curve, line, or quad
+    private static final int CLOSE = 2;
+
+    /**
+     * Constant value for join style.
+     */
+    public static final int JOIN_MITER = 0;
+
+    /**
+     * Constant value for join style.
+     */
+    public static final int JOIN_ROUND = 1;
+
+    /**
+     * Constant value for join style.
+     */
+    public static final int JOIN_BEVEL = 2;
+
+    /**
+     * Constant value for end cap style.
+     */
+    public static final int CAP_BUTT = 0;
+
+    /**
+     * Constant value for end cap style.
+     */
+    public static final int CAP_ROUND = 1;
+
+    /**
+     * Constant value for end cap style.
+     */
+    public static final int CAP_SQUARE = 2;
+
+    // pisces used to use fixed point arithmetic with 16 decimal digits. I
+    // didn't want to change the values of the constant below when I converted
+    // it to floating point, so that's why the divisions by 2^16 are there.
+    private static final float ROUND_JOIN_THRESHOLD = 1000/65536f;
+
+    private static final float C = 0.5522847498307933f;
+
+    private static final int MAX_N_CURVES = 11;
+
+    private PathConsumer2D out;
+
+    private int capStyle;
+    private int joinStyle;
+
+    private float lineWidth2;
+
+    private final float[] offset0 = new float[2];
+    private final float[] offset1 = new float[2];
+    private final float[] offset2 = new float[2];
+    private final float[] miter = new float[2];
+    private float miterLimitSq;
+
+    private int prev;
+
+    // The starting point of the path, and the slope there.
+    private float sx0, sy0, sdx, sdy;
+    // the current point and the slope there.
+    private float cx0, cy0, cdx, cdy; // c stands for current
+    // vectors that when added to (sx0,sy0) and (cx0,cy0) respectively yield the
+    // first and last points on the left parallel path. Since this path is
+    // parallel, it's slope at any point is parallel to the slope of the
+    // original path (thought they may have different directions), so these
+    // could be computed from sdx,sdy and cdx,cdy (and vice versa), but that
+    // would be error prone and hard to read, so we keep these anyway.
+    private float smx, smy, cmx, cmy;
+
+    private final PolyStack reverse;
+
+    // This is where the curve to be processed is put. We give it
+    // enough room to store 2 curves: one for the current subdivision, the
+    // other for the rest of the curve.
+    private final float[] middle = new float[2 * 8];
+    private final float[] lp = new float[8];
+    private final float[] rp = new float[8];
+    private final float[] subdivTs = new float[MAX_N_CURVES - 1];
+
+    // per-thread renderer context
+    final RendererContext rdrCtx;
+
+    // dirty curve
+    final Curve curve;
+
+    /**
+     * Constructs a <code>Stroker</code>.
+     * @param rdrCtx per-thread renderer context
+     */
+    Stroker(final RendererContext rdrCtx) {
+        this.rdrCtx = rdrCtx;
+
+        this.reverse = new PolyStack(rdrCtx);
+        this.curve = rdrCtx.curve;
+    }
+
+    /**
+     * Inits the <code>Stroker</code>.
+     *
+     * @param pc2d an output <code>PathConsumer2D</code>.
+     * @param lineWidth the desired line width in pixels
+     * @param capStyle the desired end cap style, one of
+     * <code>CAP_BUTT</code>, <code>CAP_ROUND</code> or
+     * <code>CAP_SQUARE</code>.
+     * @param joinStyle the desired line join style, one of
+     * <code>JOIN_MITER</code>, <code>JOIN_ROUND</code> or
+     * <code>JOIN_BEVEL</code>.
+     * @param miterLimit the desired miter limit
+     * @return this instance
+     */
+    Stroker init(PathConsumer2D pc2d,
+              float lineWidth,
+              int capStyle,
+              int joinStyle,
+              float miterLimit)
+    {
+        this.out = pc2d;
+
+        this.lineWidth2 = lineWidth / 2f;
+        this.capStyle = capStyle;
+        this.joinStyle = joinStyle;
+
+        float limit = miterLimit * lineWidth2;
+        this.miterLimitSq = limit * limit;
+
+        this.prev = CLOSE;
+
+        rdrCtx.stroking = 1;
+
+        return this; // fluent API
+    }
+
+    /**
+     * Disposes this stroker:
+     * clean up before reusing this instance
+     */
+    void dispose() {
+        reverse.dispose();
+
+        if (doCleanDirty) {
+            // Force zero-fill dirty arrays:
+            Arrays.fill(offset0, 0f);
+            Arrays.fill(offset1, 0f);
+            Arrays.fill(offset2, 0f);
+            Arrays.fill(miter, 0f);
+            Arrays.fill(middle, 0f);
+            Arrays.fill(lp, 0f);
+            Arrays.fill(rp, 0f);
+            Arrays.fill(subdivTs, 0f);
+        }
+    }
+
+    private static void computeOffset(final float lx, final float ly,
+                                      final float w, final float[] m)
+    {
+        float len = lx*lx + ly*ly;
+        if (len == 0f) {
+            m[0] = 0f;
+            m[1] = 0f;
+        } else {
+            len = (float) sqrt(len);
+            m[0] =  (ly * w) / len;
+            m[1] = -(lx * w) / len;
+        }
+    }
+
+    // Returns true if the vectors (dx1, dy1) and (dx2, dy2) are
+    // clockwise (if dx1,dy1 needs to be rotated clockwise to close
+    // the smallest angle between it and dx2,dy2).
+    // This is equivalent to detecting whether a point q is on the right side
+    // of a line passing through points p1, p2 where p2 = p1+(dx1,dy1) and
+    // q = p2+(dx2,dy2), which is the same as saying p1, p2, q are in a
+    // clockwise order.
+    // NOTE: "clockwise" here assumes coordinates with 0,0 at the bottom left.
+    private static boolean isCW(final float dx1, final float dy1,
+                                final float dx2, final float dy2)
+    {
+        return dx1 * dy2 <= dy1 * dx2;
+    }
+
+    private void drawRoundJoin(float x, float y,
+                               float omx, float omy, float mx, float my,
+                               boolean rev,
+                               float threshold)
+    {
+        if ((omx == 0 && omy == 0) || (mx == 0 && my == 0)) {
+            return;
+        }
+
+        float domx = omx - mx;
+        float domy = omy - my;
+        float len = domx*domx + domy*domy;
+        if (len < threshold) {
+            return;
+        }
+
+        if (rev) {
+            omx = -omx;
+            omy = -omy;
+            mx  = -mx;
+            my  = -my;
+        }
+        drawRoundJoin(x, y, omx, omy, mx, my, rev);
+    }
+
+    private void drawRoundJoin(float cx, float cy,
+                               float omx, float omy,
+                               float mx, float my,
+                               boolean rev)
+    {
+        // The sign of the dot product of mx,my and omx,omy is equal to the
+        // the sign of the cosine of ext
+        // (ext is the angle between omx,omy and mx,my).
+        double cosext = omx * mx + omy * my;
+        // If it is >=0, we know that abs(ext) is <= 90 degrees, so we only
+        // need 1 curve to approximate the circle section that joins omx,omy
+        // and mx,my.
+        final int numCurves = cosext >= 0 ? 1 : 2;
+
+        switch (numCurves) {
+        case 1:
+            drawBezApproxForArc(cx, cy, omx, omy, mx, my, rev);
+            break;
+        case 2:
+            // we need to split the arc into 2 arcs spanning the same angle.
+            // The point we want will be one of the 2 intersections of the
+            // perpendicular bisector of the chord (omx,omy)->(mx,my) and the
+            // circle. We could find this by scaling the vector
+            // (omx+mx, omy+my)/2 so that it has length=lineWidth2 (and thus lies
+            // on the circle), but that can have numerical problems when the angle
+            // between omx,omy and mx,my is close to 180 degrees. So we compute a
+            // normal of (omx,omy)-(mx,my). This will be the direction of the
+            // perpendicular bisector. To get one of the intersections, we just scale
+            // this vector that its length is lineWidth2 (this works because the
+            // perpendicular bisector goes through the origin). This scaling doesn't
+            // have numerical problems because we know that lineWidth2 divided by
+            // this normal's length is at least 0.5 and at most sqrt(2)/2 (because
+            // we know the angle of the arc is > 90 degrees).
+            float nx = my - omy, ny = omx - mx;
+            float nlen = (float) sqrt(nx*nx + ny*ny);
+            float scale = lineWidth2/nlen;
+            float mmx = nx * scale, mmy = ny * scale;
+
+            // if (isCW(omx, omy, mx, my) != isCW(mmx, mmy, mx, my)) then we've
+            // computed the wrong intersection so we get the other one.
+            // The test above is equivalent to if (rev).
+            if (rev) {
+                mmx = -mmx;
+                mmy = -mmy;
+            }
+            drawBezApproxForArc(cx, cy, omx, omy, mmx, mmy, rev);
+            drawBezApproxForArc(cx, cy, mmx, mmy, mx, my, rev);
+            break;
+        default:
+        }
+    }
+
+    // the input arc defined by omx,omy and mx,my must span <= 90 degrees.
+    private void drawBezApproxForArc(final float cx, final float cy,
+                                     final float omx, final float omy,
+                                     final float mx, final float my,
+                                     boolean rev)
+    {
+        float cosext2 = (omx * mx + omy * my) / (2f * lineWidth2 * lineWidth2);
+        // cv is the length of P1-P0 and P2-P3 divided by the radius of the arc
+        // (so, cv assumes the arc has radius 1). P0, P1, P2, P3 are the points that
+        // define the bezier curve we're computing.
+        // It is computed using the constraints that P1-P0 and P3-P2 are parallel
+        // to the arc tangents at the endpoints, and that |P1-P0|=|P3-P2|.
+        float cv = (float) ((4.0 / 3.0) * sqrt(0.5-cosext2) /
+                            (1.0 + sqrt(cosext2+0.5)));
+        // if clockwise, we need to negate cv.
+        if (rev) { // rev is equivalent to isCW(omx, omy, mx, my)
+            cv = -cv;
+        }
+        final float x1 = cx + omx;
+        final float y1 = cy + omy;
+        final float x2 = x1 - cv * omy;
+        final float y2 = y1 + cv * omx;
+
+        final float x4 = cx + mx;
+        final float y4 = cy + my;
+        final float x3 = x4 + cv * my;
+        final float y3 = y4 - cv * mx;
+
+        emitCurveTo(x1, y1, x2, y2, x3, y3, x4, y4, rev);
+    }
+
+    private void drawRoundCap(float cx, float cy, float mx, float my) {
+        // the first and second arguments of the following two calls
+        // are really will be ignored by emitCurveTo (because of the false),
+        // but we put them in anyway, as opposed to just giving it 4 zeroes,
+        // because it's just 4 additions and it's not good to rely on this
+        // sort of assumption (right now it's true, but that may change).
+        emitCurveTo(cx+mx-C*my, cy+my+C*mx,
+                    cx-my+C*mx, cy+mx+C*my,
+                    cx-my,      cy+mx);
+        emitCurveTo(cx-my-C*mx, cy+mx-C*my,
+                    cx-mx-C*my, cy-my+C*mx,
+                    cx-mx,      cy-my);
+    }
+
+    // Put the intersection point of the lines (x0, y0) -> (x1, y1)
+    // and (x0p, y0p) -> (x1p, y1p) in m[off] and m[off+1].
+    // If the lines are parallel, it will put a non finite number in m.
+    private static void computeIntersection(final float x0, final float y0,
+                                            final float x1, final float y1,
+                                            final float x0p, final float y0p,
+                                            final float x1p, final float y1p,
+                                            final float[] m, int off)
+    {
+        float x10 = x1 - x0;
+        float y10 = y1 - y0;
+        float x10p = x1p - x0p;
+        float y10p = y1p - y0p;
+
+        float den = x10*y10p - x10p*y10;
+        float t = x10p*(y0-y0p) - y10p*(x0-x0p);
+        t /= den;
+        m[off++] = x0 + t*x10;
+        m[off]   = y0 + t*y10;
+    }
+
+    private void drawMiter(final float pdx, final float pdy,
+                           final float x0, final float y0,
+                           final float dx, final float dy,
+                           float omx, float omy, float mx, float my,
+                           boolean rev)
+    {
+        if ((mx == omx && my == omy) ||
+            (pdx == 0f && pdy == 0f) ||
+            (dx == 0f && dy == 0f))
+        {
+            return;
+        }
+
+        if (rev) {
+            omx = -omx;
+            omy = -omy;
+            mx  = -mx;
+            my  = -my;
+        }
+
+        computeIntersection((x0 - pdx) + omx, (y0 - pdy) + omy, x0 + omx, y0 + omy,
+                            (dx + x0) + mx, (dy + y0) + my, x0 + mx, y0 + my,
+                            miter, 0);
+
+        final float miterX = miter[0];
+        final float miterY = miter[1];
+        float lenSq = (miterX-x0)*(miterX-x0) + (miterY-y0)*(miterY-y0);
+
+        // If the lines are parallel, lenSq will be either NaN or +inf
+        // (actually, I'm not sure if the latter is possible. The important
+        // thing is that -inf is not possible, because lenSq is a square).
+        // For both of those values, the comparison below will fail and
+        // no miter will be drawn, which is correct.
+        if (lenSq < miterLimitSq) {
+            emitLineTo(miterX, miterY, rev);
+        }
+    }
+
+    @Override
+    public void moveTo(float x0, float y0) {
+        if (prev == DRAWING_OP_TO) {
+            finish();
+        }
+        this.sx0 = this.cx0 = x0;
+        this.sy0 = this.cy0 = y0;
+        this.cdx = this.sdx = 1;
+        this.cdy = this.sdy = 0;
+        this.prev = MOVE_TO;
+    }
+
+    @Override
+    public void lineTo(float x1, float y1) {
+        float dx = x1 - cx0;
+        float dy = y1 - cy0;
+        if (dx == 0f && dy == 0f) {
+            dx = 1f;
+        }
+        computeOffset(dx, dy, lineWidth2, offset0);
+        final float mx = offset0[0];
+        final float my = offset0[1];
+
+        drawJoin(cdx, cdy, cx0, cy0, dx, dy, cmx, cmy, mx, my);
+
+        emitLineTo(cx0 + mx, cy0 + my);
+        emitLineTo( x1 + mx,  y1 + my);
+
+        emitLineToRev(cx0 - mx, cy0 - my);
+        emitLineToRev( x1 - mx,  y1 - my);
+
+        this.cmx = mx;
+        this.cmy = my;
+        this.cdx = dx;
+        this.cdy = dy;
+        this.cx0 = x1;
+        this.cy0 = y1;
+        this.prev = DRAWING_OP_TO;
+    }
+
+    @Override
+    public void closePath() {
+        if (prev != DRAWING_OP_TO) {
+            if (prev == CLOSE) {
+                return;
+            }
+            emitMoveTo(cx0, cy0 - lineWidth2);
+            this.cmx = this.smx = 0;
+            this.cmy = this.smy = -lineWidth2;
+            this.cdx = this.sdx = 1;
+            this.cdy = this.sdy = 0;
+            finish();
+            return;
+        }
+
+        if (cx0 != sx0 || cy0 != sy0) {
+            lineTo(sx0, sy0);
+        }
+
+        drawJoin(cdx, cdy, cx0, cy0, sdx, sdy, cmx, cmy, smx, smy);
+
+        emitLineTo(sx0 + smx, sy0 + smy);
+
+        emitMoveTo(sx0 - smx, sy0 - smy);
+        emitReverse();
+
+        this.prev = CLOSE;
+        emitClose();
+    }
+
+    private void emitReverse() {
+        reverse.popAll(out);
+    }
+
+    @Override
+    public void pathDone() {
+        if (prev == DRAWING_OP_TO) {
+            finish();
+        }
+
+        out.pathDone();
+
+        // this shouldn't matter since this object won't be used
+        // after the call to this method.
+        this.prev = CLOSE;
+
+        // Dispose this instance:
+        dispose();
+    }
+
+    private void finish() {
+        if (capStyle == CAP_ROUND) {
+            drawRoundCap(cx0, cy0, cmx, cmy);
+        } else if (capStyle == CAP_SQUARE) {
+            emitLineTo(cx0 - cmy + cmx, cy0 + cmx + cmy);
+            emitLineTo(cx0 - cmy - cmx, cy0 + cmx - cmy);
+        }
+
+        emitReverse();
+
+        if (capStyle == CAP_ROUND) {
+            drawRoundCap(sx0, sy0, -smx, -smy);
+        } else if (capStyle == CAP_SQUARE) {
+            emitLineTo(sx0 + smy - smx, sy0 - smx - smy);
+            emitLineTo(sx0 + smy + smx, sy0 - smx + smy);
+        }
+
+        emitClose();
+    }
+
+    private void emitMoveTo(final float x0, final float y0) {
+        out.moveTo(x0, y0);
+    }
+
+    private void emitLineTo(final float x1, final float y1) {
+        out.lineTo(x1, y1);
+    }
+
+    private void emitLineToRev(final float x1, final float y1) {
+        reverse.pushLine(x1, y1);
+    }
+
+    private void emitLineTo(final float x1, final float y1,
+                            final boolean rev)
+    {
+        if (rev) {
+            emitLineToRev(x1, y1);
+        } else {
+            emitLineTo(x1, y1);
+        }
+    }
+
+    private void emitQuadTo(final float x1, final float y1,
+                            final float x2, final float y2)
+    {
+        out.quadTo(x1, y1, x2, y2);
+    }
+
+    private void emitQuadToRev(final float x0, final float y0,
+                               final float x1, final float y1)
+    {
+        reverse.pushQuad(x0, y0, x1, y1);
+    }
+
+    private void emitCurveTo(final float x1, final float y1,
+                             final float x2, final float y2,
+                             final float x3, final float y3)
+    {
+        out.curveTo(x1, y1, x2, y2, x3, y3);
+    }
+
+    private void emitCurveToRev(final float x0, final float y0,
+                                final float x1, final float y1,
+                                final float x2, final float y2)
+    {
+        reverse.pushCubic(x0, y0, x1, y1, x2, y2);
+    }
+
+    private void emitCurveTo(final float x0, final float y0,
+                             final float x1, final float y1,
+                             final float x2, final float y2,
+                             final float x3, final float y3, final boolean rev)
+    {
+        if (rev) {
+            reverse.pushCubic(x0, y0, x1, y1, x2, y2);
+        } else {
+            out.curveTo(x1, y1, x2, y2, x3, y3);
+        }
+    }
+
+    private void emitClose() {
+        out.closePath();
+    }
+
+    private void drawJoin(float pdx, float pdy,
+                          float x0, float y0,
+                          float dx, float dy,
+                          float omx, float omy,
+                          float mx, float my)
+    {
+        if (prev != DRAWING_OP_TO) {
+            emitMoveTo(x0 + mx, y0 + my);
+            this.sdx = dx;
+            this.sdy = dy;
+            this.smx = mx;
+            this.smy = my;
+        } else {
+            boolean cw = isCW(pdx, pdy, dx, dy);
+            if (joinStyle == JOIN_MITER) {
+                drawMiter(pdx, pdy, x0, y0, dx, dy, omx, omy, mx, my, cw);
+            } else if (joinStyle == JOIN_ROUND) {
+                drawRoundJoin(x0, y0,
+                              omx, omy,
+                              mx, my, cw,
+                              ROUND_JOIN_THRESHOLD);
+            }
+            emitLineTo(x0, y0, !cw);
+        }
+        prev = DRAWING_OP_TO;
+    }
+
+    private static boolean within(final float x1, final float y1,
+                                  final float x2, final float y2,
+                                  final float ERR)
+    {
+        assert ERR > 0 : "";
+        // compare taxicab distance. ERR will always be small, so using
+        // true distance won't give much benefit
+        return (Helpers.within(x1, x2, ERR) &&  // we want to avoid calling Math.abs
+                Helpers.within(y1, y2, ERR)); // this is just as good.
+    }
+
+    private void getLineOffsets(float x1, float y1,
+                                float x2, float y2,
+                                float[] left, float[] right) {
+        computeOffset(x2 - x1, y2 - y1, lineWidth2, offset0);
+        final float mx = offset0[0];
+        final float my = offset0[1];
+        left[0] = x1 + mx;
+        left[1] = y1 + my;
+        left[2] = x2 + mx;
+        left[3] = y2 + my;
+        right[0] = x1 - mx;
+        right[1] = y1 - my;
+        right[2] = x2 - mx;
+        right[3] = y2 - my;
+    }
+
+    private int computeOffsetCubic(float[] pts, final int off,
+                                   float[] leftOff, float[] rightOff)
+    {
+        // if p1=p2 or p3=p4 it means that the derivative at the endpoint
+        // vanishes, which creates problems with computeOffset. Usually
+        // this happens when this stroker object is trying to winden
+        // a curve with a cusp. What happens is that curveTo splits
+        // the input curve at the cusp, and passes it to this function.
+        // because of inaccuracies in the splitting, we consider points
+        // equal if they're very close to each other.
+        final float x1 = pts[off + 0], y1 = pts[off + 1];
+        final float x2 = pts[off + 2], y2 = pts[off + 3];
+        final float x3 = pts[off + 4], y3 = pts[off + 5];
+        final float x4 = pts[off + 6], y4 = pts[off + 7];
+
+        float dx4 = x4 - x3;
+        float dy4 = y4 - y3;
+        float dx1 = x2 - x1;
+        float dy1 = y2 - y1;
+
+        // if p1 == p2 && p3 == p4: draw line from p1->p4, unless p1 == p4,
+        // in which case ignore if p1 == p2
+        final boolean p1eqp2 = within(x1,y1,x2,y2, 6f * ulp(y2));
+        final boolean p3eqp4 = within(x3,y3,x4,y4, 6f * ulp(y4));
+        if (p1eqp2 && p3eqp4) {
+            getLineOffsets(x1, y1, x4, y4, leftOff, rightOff);
+            return 4;
+        } else if (p1eqp2) {
+            dx1 = x3 - x1;
+            dy1 = y3 - y1;
+        } else if (p3eqp4) {
+            dx4 = x4 - x2;
+            dy4 = y4 - y2;
+        }
+
+        // if p2-p1 and p4-p3 are parallel, that must mean this curve is a line
+        float dotsq = (dx1 * dx4 + dy1 * dy4);
+        dotsq *= dotsq;
+        float l1sq = dx1 * dx1 + dy1 * dy1, l4sq = dx4 * dx4 + dy4 * dy4;
+        if (Helpers.within(dotsq, l1sq * l4sq, 4f * ulp(dotsq))) {
+            getLineOffsets(x1, y1, x4, y4, leftOff, rightOff);
+            return 4;
+        }
+
+//      What we're trying to do in this function is to approximate an ideal
+//      offset curve (call it I) of the input curve B using a bezier curve Bp.
+//      The constraints I use to get the equations are:
+//
+//      1. The computed curve Bp should go through I(0) and I(1). These are
+//      x1p, y1p, x4p, y4p, which are p1p and p4p. We still need to find
+//      4 variables: the x and y components of p2p and p3p (i.e. x2p, y2p, x3p, y3p).
+//
+//      2. Bp should have slope equal in absolute value to I at the endpoints. So,
+//      (by the way, the operator || in the comments below means "aligned with".
+//      It is defined on vectors, so when we say I'(0) || Bp'(0) we mean that
+//      vectors I'(0) and Bp'(0) are aligned, which is the same as saying
+//      that the tangent lines of I and Bp at 0 are parallel. Mathematically
+//      this means (I'(t) || Bp'(t)) <==> (I'(t) = c * Bp'(t)) where c is some
+//      nonzero constant.)
+//      I'(0) || Bp'(0) and I'(1) || Bp'(1). Obviously, I'(0) || B'(0) and
+//      I'(1) || B'(1); therefore, Bp'(0) || B'(0) and Bp'(1) || B'(1).
+//      We know that Bp'(0) || (p2p-p1p) and Bp'(1) || (p4p-p3p) and the same
+//      is true for any bezier curve; therefore, we get the equations
+//          (1) p2p = c1 * (p2-p1) + p1p
+//          (2) p3p = c2 * (p4-p3) + p4p
+//      We know p1p, p4p, p2, p1, p3, and p4; therefore, this reduces the number
+//      of unknowns from 4 to 2 (i.e. just c1 and c2).
+//      To eliminate these 2 unknowns we use the following constraint:
+//
+//      3. Bp(0.5) == I(0.5). Bp(0.5)=(x,y) and I(0.5)=(xi,yi), and I should note
+//      that I(0.5) is *the only* reason for computing dxm,dym. This gives us
+//          (3) Bp(0.5) = (p1p + 3 * (p2p + p3p) + p4p)/8, which is equivalent to
+//          (4) p2p + p3p = (Bp(0.5)*8 - p1p - p4p) / 3
+//      We can substitute (1) and (2) from above into (4) and we get:
+//          (5) c1*(p2-p1) + c2*(p4-p3) = (Bp(0.5)*8 - p1p - p4p)/3 - p1p - p4p
+//      which is equivalent to
+//          (6) c1*(p2-p1) + c2*(p4-p3) = (4/3) * (Bp(0.5) * 2 - p1p - p4p)
+//
+//      The right side of this is a 2D vector, and we know I(0.5), which gives us
+//      Bp(0.5), which gives us the value of the right side.
+//      The left side is just a matrix vector multiplication in disguise. It is
+//
+//      [x2-x1, x4-x3][c1]
+//      [y2-y1, y4-y3][c2]
+//      which, is equal to
+//      [dx1, dx4][c1]
+//      [dy1, dy4][c2]
+//      At this point we are left with a simple linear system and we solve it by
+//      getting the inverse of the matrix above. Then we use [c1,c2] to compute
+//      p2p and p3p.
+
+        float x = (x1 + 3f * (x2 + x3) + x4) / 8f;
+        float y = (y1 + 3f * (y2 + y3) + y4) / 8f;
+        // (dxm,dym) is some tangent of B at t=0.5. This means it's equal to
+        // c*B'(0.5) for some constant c.
+        float dxm = x3 + x4 - x1 - x2, dym = y3 + y4 - y1 - y2;
+
+        // this computes the offsets at t=0, 0.5, 1, using the property that
+        // for any bezier curve the vectors p2-p1 and p4-p3 are parallel to
+        // the (dx/dt, dy/dt) vectors at the endpoints.
+        computeOffset(dx1, dy1, lineWidth2, offset0);
+        computeOffset(dxm, dym, lineWidth2, offset1);
+        computeOffset(dx4, dy4, lineWidth2, offset2);
+        float x1p = x1 + offset0[0]; // start
+        float y1p = y1 + offset0[1]; // point
+        float xi  = x  + offset1[0]; // interpolation
+        float yi  = y  + offset1[1]; // point
+        float x4p = x4 + offset2[0]; // end
+        float y4p = y4 + offset2[1]; // point
+
+        float invdet43 = 4f / (3f * (dx1 * dy4 - dy1 * dx4));
+
+        float two_pi_m_p1_m_p4x = 2f * xi - x1p - x4p;
+        float two_pi_m_p1_m_p4y = 2f * yi - y1p - y4p;
+        float c1 = invdet43 * (dy4 * two_pi_m_p1_m_p4x - dx4 * two_pi_m_p1_m_p4y);
+        float c2 = invdet43 * (dx1 * two_pi_m_p1_m_p4y - dy1 * two_pi_m_p1_m_p4x);
+
+        float x2p, y2p, x3p, y3p;
+        x2p = x1p + c1*dx1;
+        y2p = y1p + c1*dy1;
+        x3p = x4p + c2*dx4;
+        y3p = y4p + c2*dy4;
+
+        leftOff[0] = x1p; leftOff[1] = y1p;
+        leftOff[2] = x2p; leftOff[3] = y2p;
+        leftOff[4] = x3p; leftOff[5] = y3p;
+        leftOff[6] = x4p; leftOff[7] = y4p;
+
+        x1p = x1 - offset0[0]; y1p = y1 - offset0[1];
+        xi = xi - 2f * offset1[0]; yi = yi - 2f * offset1[1];
+        x4p = x4 - offset2[0]; y4p = y4 - offset2[1];
+
+        two_pi_m_p1_m_p4x = 2f * xi - x1p - x4p;
+        two_pi_m_p1_m_p4y = 2f * yi - y1p - y4p;
+        c1 = invdet43 * (dy4 * two_pi_m_p1_m_p4x - dx4 * two_pi_m_p1_m_p4y);
+        c2 = invdet43 * (dx1 * two_pi_m_p1_m_p4y - dy1 * two_pi_m_p1_m_p4x);
+
+        x2p = x1p + c1*dx1;
+        y2p = y1p + c1*dy1;
+        x3p = x4p + c2*dx4;
+        y3p = y4p + c2*dy4;
+
+        rightOff[0] = x1p; rightOff[1] = y1p;
+        rightOff[2] = x2p; rightOff[3] = y2p;
+        rightOff[4] = x3p; rightOff[5] = y3p;
+        rightOff[6] = x4p; rightOff[7] = y4p;
+        return 8;
+    }
+
+    // return the kind of curve in the right and left arrays.
+    private int computeOffsetQuad(float[] pts, final int off,
+                                  float[] leftOff, float[] rightOff)
+    {
+        final float x1 = pts[off + 0], y1 = pts[off + 1];
+        final float x2 = pts[off + 2], y2 = pts[off + 3];
+        final float x3 = pts[off + 4], y3 = pts[off + 5];
+
+        final float dx3 = x3 - x2;
+        final float dy3 = y3 - y2;
+        final float dx1 = x2 - x1;
+        final float dy1 = y2 - y1;
+
+        // this computes the offsets at t = 0, 1
+        computeOffset(dx1, dy1, lineWidth2, offset0);
+        computeOffset(dx3, dy3, lineWidth2, offset1);
+
+        leftOff[0]  = x1 + offset0[0]; leftOff[1]  = y1 + offset0[1];
+        leftOff[4]  = x3 + offset1[0]; leftOff[5]  = y3 + offset1[1];
+        rightOff[0] = x1 - offset0[0]; rightOff[1] = y1 - offset0[1];
+        rightOff[4] = x3 - offset1[0]; rightOff[5] = y3 - offset1[1];
+
+        float x1p = leftOff[0]; // start
+        float y1p = leftOff[1]; // point
+        float x3p = leftOff[4]; // end
+        float y3p = leftOff[5]; // point
+
+        // Corner cases:
+        // 1. If the two control vectors are parallel, we'll end up with NaN's
+        //    in leftOff (and rightOff in the body of the if below), so we'll
+        //    do getLineOffsets, which is right.
+        // 2. If the first or second two points are equal, then (dx1,dy1)==(0,0)
+        //    or (dx3,dy3)==(0,0), so (x1p, y1p)==(x1p+dx1, y1p+dy1)
+        //    or (x3p, y3p)==(x3p-dx3, y3p-dy3), which means that
+        //    computeIntersection will put NaN's in leftOff and right off, and
+        //    we will do getLineOffsets, which is right.
+        computeIntersection(x1p, y1p, x1p+dx1, y1p+dy1, x3p, y3p, x3p-dx3, y3p-dy3, leftOff, 2);
+        float cx = leftOff[2];
+        float cy = leftOff[3];
+
+        if (!(isFinite(cx) && isFinite(cy))) {
+            // maybe the right path is not degenerate.
+            x1p = rightOff[0];
+            y1p = rightOff[1];
+            x3p = rightOff[4];
+            y3p = rightOff[5];
+            computeIntersection(x1p, y1p, x1p+dx1, y1p+dy1, x3p, y3p, x3p-dx3, y3p-dy3, rightOff, 2);
+            cx = rightOff[2];
+            cy = rightOff[3];
+            if (!(isFinite(cx) && isFinite(cy))) {
+                // both are degenerate. This curve is a line.
+                getLineOffsets(x1, y1, x3, y3, leftOff, rightOff);
+                return 4;
+            }
+            // {left,right}Off[0,1,4,5] are already set to the correct values.
+            leftOff[2] = 2f * x2 - cx;
+            leftOff[3] = 2f * y2 - cy;
+            return 6;
+        }
+
+        // rightOff[2,3] = (x2,y2) - ((left_x2, left_y2) - (x2, y2))
+        // == 2*(x2, y2) - (left_x2, left_y2)
+        rightOff[2] = 2f * x2 - cx;
+        rightOff[3] = 2f * y2 - cy;
+        return 6;
+    }
+
+    private static boolean isFinite(float x) {
+        return (Float.NEGATIVE_INFINITY < x && x < Float.POSITIVE_INFINITY);
+    }
+
+    // If this class is compiled with ecj, then Hotspot crashes when OSR
+    // compiling this function. See bugs 7004570 and 6675699
+    // TODO: until those are fixed, we should work around that by
+    // manually inlining this into curveTo and quadTo.
+/******************************* WORKAROUND **********************************
+    private void somethingTo(final int type) {
+        // need these so we can update the state at the end of this method
+        final float xf = middle[type-2], yf = middle[type-1];
+        float dxs = middle[2] - middle[0];
+        float dys = middle[3] - middle[1];
+        float dxf = middle[type - 2] - middle[type - 4];
+        float dyf = middle[type - 1] - middle[type - 3];
+        switch(type) {
+        case 6:
+            if ((dxs == 0f && dys == 0f) ||
+                (dxf == 0f && dyf == 0f)) {
+               dxs = dxf = middle[4] - middle[0];
+               dys = dyf = middle[5] - middle[1];
+            }
+            break;
+        case 8:
+            boolean p1eqp2 = (dxs == 0f && dys == 0f);
+            boolean p3eqp4 = (dxf == 0f && dyf == 0f);
+            if (p1eqp2) {
+                dxs = middle[4] - middle[0];
+                dys = middle[5] - middle[1];
+                if (dxs == 0f && dys == 0f) {
+                    dxs = middle[6] - middle[0];
+                    dys = middle[7] - middle[1];
+                }
+            }
+            if (p3eqp4) {
+                dxf = middle[6] - middle[2];
+                dyf = middle[7] - middle[3];
+                if (dxf == 0f && dyf == 0f) {
+                    dxf = middle[6] - middle[0];
+                    dyf = middle[7] - middle[1];
+                }
+            }
+        }
+        if (dxs == 0f && dys == 0f) {
+            // this happens iff the "curve" is just a point
+            lineTo(middle[0], middle[1]);
+            return;
+        }
+        // if these vectors are too small, normalize them, to avoid future
+        // precision problems.
+        if (Math.abs(dxs) < 0.1f && Math.abs(dys) < 0.1f) {
+            float len = (float) sqrt(dxs*dxs + dys*dys);
+            dxs /= len;
+            dys /= len;
+        }
+        if (Math.abs(dxf) < 0.1f && Math.abs(dyf) < 0.1f) {
+            float len = (float) sqrt(dxf*dxf + dyf*dyf);
+            dxf /= len;
+            dyf /= len;
+        }
+
+        computeOffset(dxs, dys, lineWidth2, offset0);
+        final float mx = offset0[0];
+        final float my = offset0[1];
+        drawJoin(cdx, cdy, cx0, cy0, dxs, dys, cmx, cmy, mx, my);
+
+        int nSplits = findSubdivPoints(curve, middle, subdivTs, type, lineWidth2);
+
+        int kind = 0;
+        BreakPtrIterator it = curve.breakPtsAtTs(middle, type, subdivTs, nSplits);
+        while(it.hasNext()) {
+            int curCurveOff = it.next();
+
+            switch (type) {
+            case 8:
+                kind = computeOffsetCubic(middle, curCurveOff, lp, rp);
+                break;
+            case 6:
+                kind = computeOffsetQuad(middle, curCurveOff, lp, rp);
+                break;
+            }
+            emitLineTo(lp[0], lp[1]);
+            switch(kind) {
+            case 8:
+                emitCurveTo(lp[2], lp[3], lp[4], lp[5], lp[6], lp[7]);
+                emitCurveToRev(rp[0], rp[1], rp[2], rp[3], rp[4], rp[5]);
+                break;
+            case 6:
+                emitQuadTo(lp[2], lp[3], lp[4], lp[5]);
+                emitQuadToRev(rp[0], rp[1], rp[2], rp[3]);
+                break;
+            case 4:
+                emitLineTo(lp[2], lp[3]);
+                emitLineTo(rp[0], rp[1], true);
+                break;
+            }
+            emitLineTo(rp[kind - 2], rp[kind - 1], true);
+        }
+
+        this.cmx = (lp[kind - 2] - rp[kind - 2]) / 2;
+        this.cmy = (lp[kind - 1] - rp[kind - 1]) / 2;
+        this.cdx = dxf;
+        this.cdy = dyf;
+        this.cx0 = xf;
+        this.cy0 = yf;
+        this.prev = DRAWING_OP_TO;
+    }
+****************************** END WORKAROUND *******************************/
+
+    // finds values of t where the curve in pts should be subdivided in order
+    // to get good offset curves a distance of w away from the middle curve.
+    // Stores the points in ts, and returns how many of them there were.
+    private static int findSubdivPoints(final Curve c, float[] pts, float[] ts,
+                                        final int type, final float w)
+    {
+        final float x12 = pts[2] - pts[0];
+        final float y12 = pts[3] - pts[1];
+        // if the curve is already parallel to either axis we gain nothing
+        // from rotating it.
+        if (y12 != 0f && x12 != 0f) {
+            // we rotate it so that the first vector in the control polygon is
+            // parallel to the x-axis. This will ensure that rotated quarter
+            // circles won't be subdivided.
+            final float hypot = (float) sqrt(x12 * x12 + y12 * y12);
+            final float cos = x12 / hypot;
+            final float sin = y12 / hypot;
+            final float x1 = cos * pts[0] + sin * pts[1];
+            final float y1 = cos * pts[1] - sin * pts[0];
+            final float x2 = cos * pts[2] + sin * pts[3];
+            final float y2 = cos * pts[3] - sin * pts[2];
+            final float x3 = cos * pts[4] + sin * pts[5];
+            final float y3 = cos * pts[5] - sin * pts[4];
+
+            switch(type) {
+            case 8:
+                final float x4 = cos * pts[6] + sin * pts[7];
+                final float y4 = cos * pts[7] - sin * pts[6];
+                c.set(x1, y1, x2, y2, x3, y3, x4, y4);
+                break;
+            case 6:
+                c.set(x1, y1, x2, y2, x3, y3);
+                break;
+            default:
+            }
+        } else {
+            c.set(pts, type);
+        }
+
+        int ret = 0;
+        // we subdivide at values of t such that the remaining rotated
+        // curves are monotonic in x and y.
+        ret += c.dxRoots(ts, ret);
+        ret += c.dyRoots(ts, ret);
+        // subdivide at inflection points.
+        if (type == 8) {
+            // quadratic curves can't have inflection points
+            ret += c.infPoints(ts, ret);
+        }
+
+        // now we must subdivide at points where one of the offset curves will have
+        // a cusp. This happens at ts where the radius of curvature is equal to w.
+        ret += c.rootsOfROCMinusW(ts, ret, w, 0.0001f);
+
+        ret = Helpers.filterOutNotInAB(ts, 0, ret, 0.0001f, 0.9999f);
+        Helpers.isort(ts, 0, ret);
+        return ret;
+    }
+
+    @Override public void curveTo(float x1, float y1,
+                                  float x2, float y2,
+                                  float x3, float y3)
+    {
+        final float[] mid = middle;
+
+        mid[0] = cx0; mid[1] = cy0;
+        mid[2] = x1;  mid[3] = y1;
+        mid[4] = x2;  mid[5] = y2;
+        mid[6] = x3;  mid[7] = y3;
+
+        // inlined version of somethingTo(8);
+        // See the TODO on somethingTo
+
+        // need these so we can update the state at the end of this method
+        final float xf = mid[6], yf = mid[7];
+        float dxs = mid[2] - mid[0];
+        float dys = mid[3] - mid[1];
+        float dxf = mid[6] - mid[4];
+        float dyf = mid[7] - mid[5];
+
+        boolean p1eqp2 = (dxs == 0f && dys == 0f);
+        boolean p3eqp4 = (dxf == 0f && dyf == 0f);
+        if (p1eqp2) {
+            dxs = mid[4] - mid[0];
+            dys = mid[5] - mid[1];
+            if (dxs == 0f && dys == 0f) {
+                dxs = mid[6] - mid[0];
+                dys = mid[7] - mid[1];
+            }
+        }
+        if (p3eqp4) {
+            dxf = mid[6] - mid[2];
+            dyf = mid[7] - mid[3];
+            if (dxf == 0f && dyf == 0f) {
+                dxf = mid[6] - mid[0];
+                dyf = mid[7] - mid[1];
+            }
+        }
+        if (dxs == 0f && dys == 0f) {
+            // this happens if the "curve" is just a point
+            lineTo(mid[0], mid[1]);
+            return;
+        }
+
+        // if these vectors are too small, normalize them, to avoid future
+        // precision problems.
+        if (Math.abs(dxs) < 0.1f && Math.abs(dys) < 0.1f) {
+            float len = (float) sqrt(dxs*dxs + dys*dys);
+            dxs /= len;
+            dys /= len;
+        }
+        if (Math.abs(dxf) < 0.1f && Math.abs(dyf) < 0.1f) {
+            float len = (float) sqrt(dxf*dxf + dyf*dyf);
+            dxf /= len;
+            dyf /= len;
+        }
+
+        computeOffset(dxs, dys, lineWidth2, offset0);
+        drawJoin(cdx, cdy, cx0, cy0, dxs, dys, cmx, cmy, offset0[0], offset0[1]);
+
+        int nSplits = findSubdivPoints(curve, mid, subdivTs, 8, lineWidth2);
+
+        final float[] l = lp;
+        final float[] r = rp;
+
+        int kind = 0;
+        BreakPtrIterator it = curve.breakPtsAtTs(mid, 8, subdivTs, nSplits);
+        while(it.hasNext()) {
+            int curCurveOff = it.next();
+
+            kind = computeOffsetCubic(mid, curCurveOff, l, r);
+            emitLineTo(l[0], l[1]);
+
+            switch(kind) {
+            case 8:
+                emitCurveTo(l[2], l[3], l[4], l[5], l[6], l[7]);
+                emitCurveToRev(r[0], r[1], r[2], r[3], r[4], r[5]);
+                break;
+            case 4:
+                emitLineTo(l[2], l[3]);
+                emitLineToRev(r[0], r[1]);
+                break;
+            default:
+            }
+            emitLineToRev(r[kind - 2], r[kind - 1]);
+        }
+
+        this.cmx = (l[kind - 2] - r[kind - 2]) / 2f;
+        this.cmy = (l[kind - 1] - r[kind - 1]) / 2f;
+        this.cdx = dxf;
+        this.cdy = dyf;
+        this.cx0 = xf;
+        this.cy0 = yf;
+        this.prev = DRAWING_OP_TO;
+    }
+
+    @Override public void quadTo(float x1, float y1, float x2, float y2) {
+        final float[] mid = middle;
+
+        mid[0] = cx0; mid[1] = cy0;
+        mid[2] = x1;  mid[3] = y1;
+        mid[4] = x2;  mid[5] = y2;
+
+        // inlined version of somethingTo(8);
+        // See the TODO on somethingTo
+
+        // need these so we can update the state at the end of this method
+        final float xf = mid[4], yf = mid[5];
+        float dxs = mid[2] - mid[0];
+        float dys = mid[3] - mid[1];
+        float dxf = mid[4] - mid[2];
+        float dyf = mid[5] - mid[3];
+        if ((dxs == 0f && dys == 0f) || (dxf == 0f && dyf == 0f)) {
+            dxs = dxf = mid[4] - mid[0];
+            dys = dyf = mid[5] - mid[1];
+        }
+        if (dxs == 0f && dys == 0f) {
+            // this happens if the "curve" is just a point
+            lineTo(mid[0], mid[1]);
+            return;
+        }
+        // if these vectors are too small, normalize them, to avoid future
+        // precision problems.
+        if (Math.abs(dxs) < 0.1f && Math.abs(dys) < 0.1f) {
+            float len = (float) sqrt(dxs*dxs + dys*dys);
+            dxs /= len;
+            dys /= len;
+        }
+        if (Math.abs(dxf) < 0.1f && Math.abs(dyf) < 0.1f) {
+            float len = (float) sqrt(dxf*dxf + dyf*dyf);
+            dxf /= len;
+            dyf /= len;
+        }
+
+        computeOffset(dxs, dys, lineWidth2, offset0);
+        drawJoin(cdx, cdy, cx0, cy0, dxs, dys, cmx, cmy, offset0[0], offset0[1]);
+
+        int nSplits = findSubdivPoints(curve, mid, subdivTs, 6, lineWidth2);
+
+        final float[] l = lp;
+        final float[] r = rp;
+
+        int kind = 0;
+        BreakPtrIterator it = curve.breakPtsAtTs(mid, 6, subdivTs, nSplits);
+        while(it.hasNext()) {
+            int curCurveOff = it.next();
+
+            kind = computeOffsetQuad(mid, curCurveOff, l, r);
+            emitLineTo(l[0], l[1]);
+
+            switch(kind) {
+            case 6:
+                emitQuadTo(l[2], l[3], l[4], l[5]);
+                emitQuadToRev(r[0], r[1], r[2], r[3]);
+                break;
+            case 4:
+                emitLineTo(l[2], l[3]);
+                emitLineToRev(r[0], r[1]);
+                break;
+            default:
+            }
+            emitLineToRev(r[kind - 2], r[kind - 1]);
+        }
+
+        this.cmx = (l[kind - 2] - r[kind - 2]) / 2f;
+        this.cmy = (l[kind - 1] - r[kind - 1]) / 2f;
+        this.cdx = dxf;
+        this.cdy = dyf;
+        this.cx0 = xf;
+        this.cy0 = yf;
+        this.prev = DRAWING_OP_TO;
+    }
+
+    @Override public long getNativeConsumer() {
+        throw new InternalError("Stroker doesn't use a native consumer");
+    }
+
+    // a stack of polynomial curves where each curve shares endpoints with
+    // adjacent ones.
+    static final class PolyStack {
+        private static final byte TYPE_LINETO  = (byte) 0;
+        private static final byte TYPE_QUADTO  = (byte) 1;
+        private static final byte TYPE_CUBICTO = (byte) 2;
+
+        float[] curves;
+        int end;
+        byte[] curveTypes;
+        int numCurves;
+
+        // per-thread renderer context
+        final RendererContext rdrCtx;
+
+        // per-thread initial arrays (large enough to satisfy most usages: 8192)
+        // +1 to avoid recycling in Helpers.widenArray()
+        private final float[] curves_initial = new float[INITIAL_LARGE_ARRAY + 1]; // 32K
+        private final byte[] curveTypes_initial = new byte[INITIAL_LARGE_ARRAY + 1]; // 8K
+
+        // used marks (stats only)
+        int curveTypesUseMark;
+        int curvesUseMark;
+
+        /**
+         * Constructor
+         * @param rdrCtx per-thread renderer context
+         */
+        PolyStack(final RendererContext rdrCtx) {
+            this.rdrCtx = rdrCtx;
+
+            curves = curves_initial;
+            curveTypes = curveTypes_initial;
+            end = 0;
+            numCurves = 0;
+
+            if (doStats) {
+                curveTypesUseMark = 0;
+                curvesUseMark = 0;
+            }
+        }
+
+        /**
+         * Disposes this PolyStack:
+         * clean up before reusing this instance
+         */
+        void dispose() {
+            end = 0;
+            numCurves = 0;
+
+            if (doStats) {
+                RendererContext.stats.stat_rdr_poly_stack_types
+                    .add(curveTypesUseMark);
+                RendererContext.stats.stat_rdr_poly_stack_curves
+                    .add(curvesUseMark);
+                // reset marks
+                curveTypesUseMark = 0;
+                curvesUseMark = 0;
+            }
+
+            // Return arrays:
+            // curves and curveTypes are kept dirty
+            if (curves != curves_initial) {
+                rdrCtx.putDirtyFloatArray(curves);
+                curves = curves_initial;
+            }
+
+            if (curveTypes != curveTypes_initial) {
+                rdrCtx.putDirtyByteArray(curveTypes);
+                curveTypes = curveTypes_initial;
+            }
+        }
+
+        private void ensureSpace(final int n) {
+            if (end + n > curves.length) {
+                if (doStats) {
+                    RendererContext.stats.stat_array_stroker_polystack_curves
+                        .add(end + n);
+                }
+                curves = rdrCtx.widenDirtyFloatArray(curves, end, end + n);
+            }
+            if (numCurves + 1 > curveTypes.length) {
+                if (doStats) {
+                    RendererContext.stats.stat_array_stroker_polystack_curveTypes
+                        .add(numCurves + 1);
+                }
+                curveTypes = rdrCtx.widenDirtyByteArray(curveTypes,
+                                                        numCurves,
+                                                        numCurves + 1);
+            }
+        }
+
+        void pushCubic(float x0, float y0,
+                       float x1, float y1,
+                       float x2, float y2)
+        {
+            ensureSpace(6);
+            curveTypes[numCurves++] = TYPE_CUBICTO;
+            // we reverse the coordinate order to make popping easier
+            final float[] _curves = curves;
+            int e = end;
+            _curves[e++] = x2;    _curves[e++] = y2;
+            _curves[e++] = x1;    _curves[e++] = y1;
+            _curves[e++] = x0;    _curves[e++] = y0;
+            end = e;
+        }
+
+        void pushQuad(float x0, float y0,
+                      float x1, float y1)
+        {
+            ensureSpace(4);
+            curveTypes[numCurves++] = TYPE_QUADTO;
+            final float[] _curves = curves;
+            int e = end;
+            _curves[e++] = x1;    _curves[e++] = y1;
+            _curves[e++] = x0;    _curves[e++] = y0;
+            end = e;
+        }
+
+        void pushLine(float x, float y) {
+            ensureSpace(2);
+            curveTypes[numCurves++] = TYPE_LINETO;
+            curves[end++] = x;    curves[end++] = y;
+        }
+
+        void popAll(PathConsumer2D io) {
+            if (doStats) {
+                // update used marks:
+                if (numCurves > curveTypesUseMark) {
+                    curveTypesUseMark = numCurves;
+                }
+                if (end > curvesUseMark) {
+                    curvesUseMark = end;
+                }
+            }
+            final byte[]  _curveTypes = curveTypes;
+            final float[] _curves = curves;
+            int nc = numCurves;
+            int e  = end;
+
+            while (nc != 0) {
+                switch(_curveTypes[--nc]) {
+                case TYPE_LINETO:
+                    e -= 2;
+                    io.lineTo(_curves[e], _curves[e+1]);
+                    continue;
+                case TYPE_QUADTO:
+                    e -= 4;
+                    io.quadTo(_curves[e+0], _curves[e+1],
+                              _curves[e+2], _curves[e+3]);
+                    continue;
+                case TYPE_CUBICTO:
+                    e -= 6;
+                    io.curveTo(_curves[e+0], _curves[e+1],
+                               _curves[e+2], _curves[e+3],
+                               _curves[e+4], _curves[e+5]);
+                    continue;
+                default:
+                }
+            }
+            numCurves = 0;
+            end = 0;
+        }
+
+        @Override
+        public String toString() {
+            String ret = "";
+            int nc = numCurves;
+            int e  = end;
+            int len;
+            while (nc != 0) {
+                switch(curveTypes[--nc]) {
+                case TYPE_LINETO:
+                    len = 2;
+                    ret += "line: ";
+                    break;
+                case TYPE_QUADTO:
+                    len = 4;
+                    ret += "quad: ";
+                    break;
+                case TYPE_CUBICTO:
+                    len = 6;
+                    ret += "cubic: ";
+                    break;
+                default:
+                    len = 0;
+                }
+                e -= len;
+                ret += Arrays.toString(Arrays.copyOfRange(curves, e, e+len))
+                                       + "\n";
+            }
+            return ret;
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/TransformingPathConsumer2D.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,507 @@
+/*
+ * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import sun.awt.geom.PathConsumer2D;
+import java.awt.geom.AffineTransform;
+import java.awt.geom.Path2D;
+
+final class TransformingPathConsumer2D {
+
+    TransformingPathConsumer2D() {
+        // used by RendererContext
+    }
+
+    // recycled PathConsumer2D instance from transformConsumer()
+    private final Path2DWrapper        wp_Path2DWrapper        = new Path2DWrapper();
+
+    PathConsumer2D wrapPath2d(Path2D.Float p2d)
+    {
+        return wp_Path2DWrapper.init(p2d);
+    }
+
+    // recycled PathConsumer2D instances from transformConsumer()
+    private final TranslateFilter      tx_TranslateFilter      = new TranslateFilter();
+    private final DeltaScaleFilter     tx_DeltaScaleFilter     = new DeltaScaleFilter();
+    private final ScaleFilter          tx_ScaleFilter          = new ScaleFilter();
+    private final DeltaTransformFilter tx_DeltaTransformFilter = new DeltaTransformFilter();
+    private final TransformFilter      tx_TransformFilter      = new TransformFilter();
+
+    PathConsumer2D transformConsumer(PathConsumer2D out,
+                                     AffineTransform at)
+    {
+        if (at == null) {
+            return out;
+        }
+        float mxx = (float) at.getScaleX();
+        float mxy = (float) at.getShearX();
+        float mxt = (float) at.getTranslateX();
+        float myx = (float) at.getShearY();
+        float myy = (float) at.getScaleY();
+        float myt = (float) at.getTranslateY();
+        if (mxy == 0f && myx == 0f) {
+            if (mxx == 1f && myy == 1f) {
+                if (mxt == 0f && myt == 0f) {
+                    return out;
+                } else {
+                    return tx_TranslateFilter.init(out, mxt, myt);
+                }
+            } else {
+                if (mxt == 0f && myt == 0f) {
+                    return tx_DeltaScaleFilter.init(out, mxx, myy);
+                } else {
+                    return tx_ScaleFilter.init(out, mxx, myy, mxt, myt);
+                }
+            }
+        } else if (mxt == 0f && myt == 0f) {
+            return tx_DeltaTransformFilter.init(out, mxx, mxy, myx, myy);
+        } else {
+            return tx_TransformFilter.init(out, mxx, mxy, mxt, myx, myy, myt);
+        }
+    }
+
+    // recycled PathConsumer2D instances from deltaTransformConsumer()
+    private final DeltaScaleFilter     dt_DeltaScaleFilter     = new DeltaScaleFilter();
+    private final DeltaTransformFilter dt_DeltaTransformFilter = new DeltaTransformFilter();
+
+    PathConsumer2D deltaTransformConsumer(PathConsumer2D out,
+                                          AffineTransform at)
+    {
+        if (at == null) {
+            return out;
+        }
+        float mxx = (float) at.getScaleX();
+        float mxy = (float) at.getShearX();
+        float myx = (float) at.getShearY();
+        float myy = (float) at.getScaleY();
+        if (mxy == 0f && myx == 0f) {
+            if (mxx == 1f && myy == 1f) {
+                return out;
+            } else {
+                return dt_DeltaScaleFilter.init(out, mxx, myy);
+            }
+        } else {
+            return dt_DeltaTransformFilter.init(out, mxx, mxy, myx, myy);
+        }
+    }
+
+    // recycled PathConsumer2D instances from inverseDeltaTransformConsumer()
+    private final DeltaScaleFilter     iv_DeltaScaleFilter     = new DeltaScaleFilter();
+    private final DeltaTransformFilter iv_DeltaTransformFilter = new DeltaTransformFilter();
+
+    PathConsumer2D inverseDeltaTransformConsumer(PathConsumer2D out,
+                                                 AffineTransform at)
+    {
+        if (at == null) {
+            return out;
+        }
+        float mxx = (float) at.getScaleX();
+        float mxy = (float) at.getShearX();
+        float myx = (float) at.getShearY();
+        float myy = (float) at.getScaleY();
+        if (mxy == 0f && myx == 0f) {
+            if (mxx == 1f && myy == 1f) {
+                return out;
+            } else {
+                return iv_DeltaScaleFilter.init(out, 1.0f/mxx, 1.0f/myy);
+            }
+        } else {
+            float det = mxx * myy - mxy * myx;
+            return iv_DeltaTransformFilter.init(out,
+                                                myy / det,
+                                               -mxy / det,
+                                               -myx / det,
+                                                mxx / det);
+        }
+    }
+
+    static final class TranslateFilter implements PathConsumer2D {
+        private PathConsumer2D out;
+        private float tx, ty;
+
+        TranslateFilter() {}
+
+        TranslateFilter init(PathConsumer2D out,
+                             float tx, float ty)
+        {
+            this.out = out;
+            this.tx = tx;
+            this.ty = ty;
+            return this; // fluent API
+        }
+
+        @Override
+        public void moveTo(float x0, float y0) {
+            out.moveTo(x0 + tx, y0 + ty);
+        }
+
+        @Override
+        public void lineTo(float x1, float y1) {
+            out.lineTo(x1 + tx, y1 + ty);
+        }
+
+        @Override
+        public void quadTo(float x1, float y1,
+                           float x2, float y2)
+        {
+            out.quadTo(x1 + tx, y1 + ty,
+                       x2 + tx, y2 + ty);
+        }
+
+        @Override
+        public void curveTo(float x1, float y1,
+                            float x2, float y2,
+                            float x3, float y3)
+        {
+            out.curveTo(x1 + tx, y1 + ty,
+                        x2 + tx, y2 + ty,
+                        x3 + tx, y3 + ty);
+        }
+
+        @Override
+        public void closePath() {
+            out.closePath();
+        }
+
+        @Override
+        public void pathDone() {
+            out.pathDone();
+        }
+
+        @Override
+        public long getNativeConsumer() {
+            return 0;
+        }
+    }
+
+    static final class ScaleFilter implements PathConsumer2D {
+        private PathConsumer2D out;
+        private float sx, sy, tx, ty;
+
+        ScaleFilter() {}
+
+        ScaleFilter init(PathConsumer2D out,
+                         float sx, float sy,
+                         float tx, float ty)
+        {
+            this.out = out;
+            this.sx = sx;
+            this.sy = sy;
+            this.tx = tx;
+            this.ty = ty;
+            return this; // fluent API
+        }
+
+        @Override
+        public void moveTo(float x0, float y0) {
+            out.moveTo(x0 * sx + tx, y0 * sy + ty);
+        }
+
+        @Override
+        public void lineTo(float x1, float y1) {
+            out.lineTo(x1 * sx + tx, y1 * sy + ty);
+        }
+
+        @Override
+        public void quadTo(float x1, float y1,
+                           float x2, float y2)
+        {
+            out.quadTo(x1 * sx + tx, y1 * sy + ty,
+                       x2 * sx + tx, y2 * sy + ty);
+        }
+
+        @Override
+        public void curveTo(float x1, float y1,
+                            float x2, float y2,
+                            float x3, float y3)
+        {
+            out.curveTo(x1 * sx + tx, y1 * sy + ty,
+                        x2 * sx + tx, y2 * sy + ty,
+                        x3 * sx + tx, y3 * sy + ty);
+        }
+
+        @Override
+        public void closePath() {
+            out.closePath();
+        }
+
+        @Override
+        public void pathDone() {
+            out.pathDone();
+        }
+
+        @Override
+        public long getNativeConsumer() {
+            return 0;
+        }
+    }
+
+    static final class TransformFilter implements PathConsumer2D {
+        private PathConsumer2D out;
+        private float mxx, mxy, mxt, myx, myy, myt;
+
+        TransformFilter() {}
+
+        TransformFilter init(PathConsumer2D out,
+                             float mxx, float mxy, float mxt,
+                             float myx, float myy, float myt)
+        {
+            this.out = out;
+            this.mxx = mxx;
+            this.mxy = mxy;
+            this.mxt = mxt;
+            this.myx = myx;
+            this.myy = myy;
+            this.myt = myt;
+            return this; // fluent API
+        }
+
+        @Override
+        public void moveTo(float x0, float y0) {
+            out.moveTo(x0 * mxx + y0 * mxy + mxt,
+                       x0 * myx + y0 * myy + myt);
+        }
+
+        @Override
+        public void lineTo(float x1, float y1) {
+            out.lineTo(x1 * mxx + y1 * mxy + mxt,
+                       x1 * myx + y1 * myy + myt);
+        }
+
+        @Override
+        public void quadTo(float x1, float y1,
+                           float x2, float y2)
+        {
+            out.quadTo(x1 * mxx + y1 * mxy + mxt,
+                       x1 * myx + y1 * myy + myt,
+                       x2 * mxx + y2 * mxy + mxt,
+                       x2 * myx + y2 * myy + myt);
+        }
+
+        @Override
+        public void curveTo(float x1, float y1,
+                            float x2, float y2,
+                            float x3, float y3)
+        {
+            out.curveTo(x1 * mxx + y1 * mxy + mxt,
+                        x1 * myx + y1 * myy + myt,
+                        x2 * mxx + y2 * mxy + mxt,
+                        x2 * myx + y2 * myy + myt,
+                        x3 * mxx + y3 * mxy + mxt,
+                        x3 * myx + y3 * myy + myt);
+        }
+
+        @Override
+        public void closePath() {
+            out.closePath();
+        }
+
+        @Override
+        public void pathDone() {
+            out.pathDone();
+        }
+
+        @Override
+        public long getNativeConsumer() {
+            return 0;
+        }
+    }
+
+    static final class DeltaScaleFilter implements PathConsumer2D {
+        private PathConsumer2D out;
+        private float sx, sy;
+
+        DeltaScaleFilter() {}
+
+        DeltaScaleFilter init(PathConsumer2D out,
+                              float mxx, float myy)
+        {
+            this.out = out;
+            sx = mxx;
+            sy = myy;
+            return this; // fluent API
+        }
+
+        @Override
+        public void moveTo(float x0, float y0) {
+            out.moveTo(x0 * sx, y0 * sy);
+        }
+
+        @Override
+        public void lineTo(float x1, float y1) {
+            out.lineTo(x1 * sx, y1 * sy);
+        }
+
+        @Override
+        public void quadTo(float x1, float y1,
+                           float x2, float y2)
+        {
+            out.quadTo(x1 * sx, y1 * sy,
+                       x2 * sx, y2 * sy);
+        }
+
+        @Override
+        public void curveTo(float x1, float y1,
+                            float x2, float y2,
+                            float x3, float y3)
+        {
+            out.curveTo(x1 * sx, y1 * sy,
+                        x2 * sx, y2 * sy,
+                        x3 * sx, y3 * sy);
+        }
+
+        @Override
+        public void closePath() {
+            out.closePath();
+        }
+
+        @Override
+        public void pathDone() {
+            out.pathDone();
+        }
+
+        @Override
+        public long getNativeConsumer() {
+            return 0;
+        }
+    }
+
+    static final class DeltaTransformFilter implements PathConsumer2D {
+        private PathConsumer2D out;
+        private float mxx, mxy, myx, myy;
+
+        DeltaTransformFilter() {}
+
+        DeltaTransformFilter init(PathConsumer2D out,
+                                  float mxx, float mxy,
+                                  float myx, float myy)
+        {
+            this.out = out;
+            this.mxx = mxx;
+            this.mxy = mxy;
+            this.myx = myx;
+            this.myy = myy;
+            return this; // fluent API
+        }
+
+        @Override
+        public void moveTo(float x0, float y0) {
+            out.moveTo(x0 * mxx + y0 * mxy,
+                       x0 * myx + y0 * myy);
+        }
+
+        @Override
+        public void lineTo(float x1, float y1) {
+            out.lineTo(x1 * mxx + y1 * mxy,
+                       x1 * myx + y1 * myy);
+        }
+
+        @Override
+        public void quadTo(float x1, float y1,
+                           float x2, float y2)
+        {
+            out.quadTo(x1 * mxx + y1 * mxy,
+                       x1 * myx + y1 * myy,
+                       x2 * mxx + y2 * mxy,
+                       x2 * myx + y2 * myy);
+        }
+
+        @Override
+        public void curveTo(float x1, float y1,
+                            float x2, float y2,
+                            float x3, float y3)
+        {
+            out.curveTo(x1 * mxx + y1 * mxy,
+                        x1 * myx + y1 * myy,
+                        x2 * mxx + y2 * mxy,
+                        x2 * myx + y2 * myy,
+                        x3 * mxx + y3 * mxy,
+                        x3 * myx + y3 * myy);
+        }
+
+        @Override
+        public void closePath() {
+            out.closePath();
+        }
+
+        @Override
+        public void pathDone() {
+            out.pathDone();
+        }
+
+        @Override
+        public long getNativeConsumer() {
+            return 0;
+        }
+    }
+
+    static final class Path2DWrapper implements PathConsumer2D {
+        private Path2D.Float p2d;
+
+        Path2DWrapper() {}
+
+        Path2DWrapper init(Path2D.Float p2d) {
+            this.p2d = p2d;
+            return this;
+        }
+
+        @Override
+        public void moveTo(float x0, float y0) {
+            p2d.moveTo(x0, y0);
+        }
+
+        @Override
+        public void lineTo(float x1, float y1) {
+            p2d.lineTo(x1, y1);
+        }
+
+        @Override
+        public void closePath() {
+            p2d.closePath();
+        }
+
+        @Override
+        public void pathDone() {}
+
+        @Override
+        public void curveTo(float x1, float y1,
+                            float x2, float y2,
+                            float x3, float y3)
+        {
+            p2d.curveTo(x1, y1, x2, y2, x3, y3);
+        }
+
+        @Override
+        public void quadTo(float x1, float y1, float x2, float y2) {
+            p2d.quadTo(x1, y1, x2, y2);
+        }
+
+        @Override
+        public long getNativeConsumer() {
+            throw new InternalError("Not using a native peer");
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Version.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+public final class Version {
+
+    private static final String version = "marlin-0.7.2-Unsafe-OpenJDK";
+
+    public static String getVersion() {
+        return version;
+    }
+
+    private Version() {
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/stats/Histogram.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin.stats;
+
+import java.util.Arrays;
+
+/**
+ * Generic histogram based on long statistics
+ */
+public final class Histogram extends StatLong {
+
+    static final int BUCKET = 2;
+    static final int MAX = 20;
+    static final int LAST = MAX - 1;
+    static final int[] STEPS = new int[MAX];
+
+    static {
+            STEPS[0] = 0;
+            STEPS[1] = 1;
+
+            for (int i = 2; i < MAX; i++) {
+                STEPS[i] = STEPS[i - 1] * BUCKET;
+            }
+//            System.out.println("Histogram.STEPS = " + Arrays.toString(STEPS));
+    }
+
+    static int bucket(int val) {
+        for (int i = 1; i < MAX; i++) {
+            if (val < STEPS[i]) {
+                return i - 1;
+            }
+        }
+        return LAST;
+    }
+
+    private final StatLong[] stats = new StatLong[MAX];
+
+    public Histogram(final String name) {
+        super(name);
+        for (int i = 0; i < MAX; i++) {
+            stats[i] = new StatLong(String.format("%5s .. %5s", STEPS[i],
+                                    ((i + 1 < MAX) ? STEPS[i + 1] : "~")));
+        }
+    }
+
+    @Override
+    public void reset() {
+        super.reset();
+        for (int i = 0; i < MAX; i++) {
+            stats[i].reset();
+        }
+    }
+
+    @Override
+    public void add(int val) {
+        super.add(val);
+        stats[bucket(val)].add(val);
+    }
+
+    @Override
+    public void add(long val) {
+        add((int) val);
+    }
+
+    @Override
+    public String toString() {
+        final StringBuilder sb = new StringBuilder(2048);
+        super.toString(sb).append(" { ");
+
+        for (int i = 0; i < MAX; i++) {
+            if (stats[i].count != 0l) {
+                sb.append("\n        ").append(stats[i].toString());
+            }
+        }
+
+        return sb.append(" }").toString();
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/stats/Monitor.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin.stats;
+
+/**
+ * Generic monitor ie gathers time statistics as nanos.
+ */
+public final class Monitor extends StatLong {
+
+    private static final long INVALID = -1L;
+
+    private long start = INVALID;
+
+    public Monitor(final String name) {
+        super(name);
+    }
+
+    public void start() {
+        start = System.nanoTime();
+    }
+
+    public void stop() {
+        final long elapsed = System.nanoTime() - start;
+        if (start != INVALID && elapsed > 0l) {
+            add(elapsed);
+        }
+        start = INVALID;
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/stats/StatLong.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin.stats;
+
+/**
+ * Statistics as long values
+ */
+public class StatLong {
+
+    public final String name;
+    public long count = 0l;
+    public long sum = 0l;
+    public long min = Integer.MAX_VALUE;
+    public long max = Integer.MIN_VALUE;
+
+    public StatLong(final String name) {
+        this.name = name;
+    }
+
+    public void reset() {
+        count = 0l;
+        sum = 0l;
+        min = Integer.MAX_VALUE;
+        max = Integer.MIN_VALUE;
+    }
+
+    public void add(final int val) {
+        count++;
+        sum += val;
+        if (val < min) {
+            min = val;
+        }
+        if (val > max) {
+            max = val;
+        }
+    }
+
+    public void add(final long val) {
+        count++;
+        sum += val;
+        if (val < min) {
+            min = val;
+        }
+        if (val > max) {
+            max = val;
+        }
+    }
+
+    @Override
+    public String toString() {
+        final StringBuilder sb = new StringBuilder(128);
+        toString(sb);
+        return sb.toString();
+    }
+
+    public final StringBuilder toString(final StringBuilder sb) {
+        sb.append(name).append('[').append(count);
+        sb.append("] sum: ").append(sum).append(" avg: ");
+        sb.append(trimTo3Digits(((double) sum) / count));
+        sb.append(" [").append(min).append(" | ").append(max).append("]");
+        return sb;
+    }
+
+    /**
+     * Adjust the given double value to keep only 3 decimal digits
+     *
+     * @param value value to adjust
+     * @return double value with only 3 decimal digits
+     */
+    public static double trimTo3Digits(final double value) {
+        return ((long) (1e3d * value)) / 1e3d;
+    }
+}
+
--- a/jdk/src/java.desktop/share/classes/sun/java2d/pipe/AAShapePipe.java	Mon Nov 23 14:56:43 2015 -0800
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/pipe/AAShapePipe.java	Mon Nov 23 15:02:19 2015 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,14 +22,12 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
-
 package sun.java2d.pipe;
 
 import java.awt.BasicStroke;
 import java.awt.Rectangle;
 import java.awt.Shape;
 import java.awt.geom.Rectangle2D;
-import java.awt.geom.PathIterator;
 import sun.awt.SunHints;
 import sun.java2d.SunGraphics2D;
 
@@ -45,6 +43,15 @@
 {
     static RenderingEngine renderengine = RenderingEngine.getInstance();
 
+    // Per-thread TileState (~1K very small so do not use any Weak Reference)
+    private static final ThreadLocal<TileState> tileStateThreadLocal =
+            new ThreadLocal<TileState>() {
+        @Override
+        protected TileState initialValue() {
+            return new TileState();
+        }
+    };
+
     CompositePipe outpipe;
 
     public AAShapePipe(CompositePipe pipe) {
@@ -68,20 +75,6 @@
         renderPath(sg, s, null);
     }
 
-    private static Rectangle2D computeBBox(double ux1, double uy1,
-                                           double ux2, double uy2)
-    {
-        if ((ux2 -= ux1) < 0) {
-            ux1 += ux2;
-            ux2 = -ux2;
-        }
-        if ((uy2 -= uy1) < 0) {
-            uy1 += uy2;
-            uy2 = -uy2;
-        }
-        return new Rectangle2D.Double(ux1, uy1, ux2, uy2);
-    }
-
     public void fillParallelogram(SunGraphics2D sg,
                                   double ux1, double uy1,
                                   double ux2, double uy2,
@@ -90,7 +83,9 @@
                                   double dx2, double dy2)
     {
         Region clip = sg.getCompClip();
-        int abox[] = new int[4];
+        final TileState ts = tileStateThreadLocal.get();
+        final int[] abox = ts.abox;
+
         AATileGenerator aatg =
             renderengine.getAATileGenerator(x, y, dx1, dy1, dx2, dy2, 0, 0,
                                             clip, abox);
@@ -99,7 +94,7 @@
             return;
         }
 
-        renderTiles(sg, computeBBox(ux1, uy1, ux2, uy2), aatg, abox);
+        renderTiles(sg, ts.computeBBox(ux1, uy1, ux2, uy2), aatg, abox, ts);
     }
 
     public void drawParallelogram(SunGraphics2D sg,
@@ -111,7 +106,9 @@
                                   double lw1, double lw2)
     {
         Region clip = sg.getCompClip();
-        int abox[] = new int[4];
+        final TileState ts = tileStateThreadLocal.get();
+        final int[] abox = ts.abox;
+
         AATileGenerator aatg =
             renderengine.getAATileGenerator(x, y, dx1, dy1, dx2, dy2, lw1, lw2,
                                             clip, abox);
@@ -122,23 +119,7 @@
 
         // Note that bbox is of the original shape, not the wide path.
         // This is appropriate for handing to Paint methods...
-        renderTiles(sg, computeBBox(ux1, uy1, ux2, uy2), aatg, abox);
-    }
-
-    private static byte[] theTile;
-
-    private static synchronized byte[] getAlphaTile(int len) {
-        byte[] t = theTile;
-        if (t == null || t.length < len) {
-            t = new byte[len];
-        } else {
-            theTile = null;
-        }
-        return t;
-    }
-
-    private static synchronized void dropAlphaTile(byte[] t) {
-        theTile = t;
+        renderTiles(sg, ts.computeBBox(ux1, uy1, ux2, uy2), aatg, abox, ts);
     }
 
     public void renderPath(SunGraphics2D sg, Shape s, BasicStroke bs) {
@@ -147,7 +128,9 @@
         boolean thin = (sg.strokeState <= SunGraphics2D.STROKE_THINDASHED);
 
         Region clip = sg.getCompClip();
-        int abox[] = new int[4];
+        final TileState ts = tileStateThreadLocal.get();
+        final int[] abox = ts.abox;
+
         AATileGenerator aatg =
             renderengine.getAATileGenerator(s, sg.transform, clip,
                                             bs, thin, adjust, abox);
@@ -156,31 +139,30 @@
             return;
         }
 
-        renderTiles(sg, s, aatg, abox);
+        renderTiles(sg, s, aatg, abox, ts);
     }
 
     public void renderTiles(SunGraphics2D sg, Shape s,
-                            AATileGenerator aatg, int abox[])
+                            AATileGenerator aatg, int abox[], TileState ts)
     {
         Object context = null;
-        byte alpha[] = null;
         try {
             context = outpipe.startSequence(sg, s,
-                                            new Rectangle(abox[0], abox[1],
-                                                          abox[2] - abox[0],
-                                                          abox[3] - abox[1]),
+                                            ts.computeDevBox(abox),
                                             abox);
 
-            int tw = aatg.getTileWidth();
-            int th = aatg.getTileHeight();
-            alpha = getAlphaTile(tw * th);
+            final int tw = aatg.getTileWidth();
+            final int th = aatg.getTileHeight();
 
+            // get tile from thread local storage:
+            final byte[] alpha = ts.getAlphaTile(tw * th);
             byte[] atile;
 
             for (int y = abox[1]; y < abox[3]; y += th) {
+                int h = Math.min(th, abox[3] - y);
+
                 for (int x = abox[0]; x < abox[2]; x += tw) {
                     int w = Math.min(tw, abox[2] - x);
-                    int h = Math.min(th, abox[3] - y);
 
                     int a = aatg.getTypicalAlpha();
                     if (a == 0x00 ||
@@ -207,9 +189,56 @@
             if (context != null) {
                 outpipe.endSequence(context);
             }
-            if (alpha != null) {
-                dropAlphaTile(alpha);
-            }
         }
     }
+
+    // Tile state used by AAShapePipe
+    static final class TileState {
+        // cached tile (32 x 32 tile by default)
+        private byte[] theTile = new byte[32 * 32];
+        // dirty aabox array
+        final int[] abox = new int[4];
+        // dirty bbox rectangle
+        private final Rectangle dev = new Rectangle();
+        // dirty bbox rectangle2D.Double
+        private final Rectangle2D.Double bbox2D = new Rectangle2D.Double();
+
+        byte[] getAlphaTile(int len) {
+            byte[] t = theTile;
+            if (t.length < len) {
+                // create a larger tile and may free current theTile (too small)
+                theTile = t = new byte[len];
+            }
+            return t;
+        }
+
+        Rectangle computeDevBox(final int[] abox) {
+            final Rectangle box = this.dev;
+            box.x = abox[0];
+            box.y = abox[1];
+            box.width = abox[2] - abox[0];
+            box.height = abox[3] - abox[1];
+            return box;
+        }
+
+        Rectangle2D computeBBox(double ux1, double uy1,
+                                double ux2, double uy2)
+        {
+            if ((ux2 -= ux1) < 0.0) {
+                ux1 += ux2;
+                ux2 = -ux2;
+            }
+            if ((uy2 -= uy1) < 0.0) {
+                uy1 += uy2;
+                uy2 = -uy2;
+            }
+            final Rectangle2D.Double box = this.bbox2D;
+            box.x = ux1;
+            box.y = uy1;
+            box.width = ux2;
+            box.height = uy2;
+            return box;
+        }
+    }
+
 }
--- a/jdk/src/java.desktop/share/classes/sun/java2d/pipe/RenderingEngine.java	Mon Nov 23 14:56:43 2015 -0800
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/pipe/RenderingEngine.java	Mon Nov 23 15:02:19 2015 -0800
@@ -96,9 +96,14 @@
      * </pre>
      *
      * If no specific {@code RenderingEngine} is specified on the command
-     * or Ductus renderer is specified, it will first attempt loading the
-     * sun.dc.DuctusRenderingEngine class using Class.forName, if that
-     * is not found, then it will look for Pisces.
+     * line or the requested class fails to load, then the Marlin
+     * renderer will be used as the default.
+     * <p>
+     * A printout of which RenderingEngine is loaded and used can be
+     * enabled by specifying the runtime flag:
+     * <pre>
+     *     java -Dsun.java2d.renderer.verbose=true
+     * </pre>
      * <p>
      * Runtime tracing of the actions of the {@code RenderingEngine}
      * can be enabled by specifying the runtime flag:
@@ -113,20 +118,23 @@
             return reImpl;
         }
 
-        /* Look first for ductus or an app-override renderer,
-         * if not specified or present, then look for pisces.
+        /* Look first for an app-override renderer,
+         * if not specified or present, then look for marlin.
          */
-        final String ductusREClass = "sun.dc.DuctusRenderingEngine";
-        final String piscesREClass = "sun.java2d.pisces.PiscesRenderingEngine";
         GetPropertyAction gpa =
-            new GetPropertyAction("sun.java2d.renderer", ductusREClass);
+            new GetPropertyAction("sun.java2d.renderer");
         String reClass = AccessController.doPrivileged(gpa);
-        try {
-            Class<?> cls = Class.forName(reClass);
-            reImpl = (RenderingEngine) cls.newInstance();
-        } catch (ReflectiveOperationException ignored0) {
+        if (reClass != null) {
             try {
-                Class<?> cls = Class.forName(piscesREClass);
+                Class<?> cls = Class.forName(reClass);
+                reImpl = (RenderingEngine) cls.newInstance();
+            } catch (ReflectiveOperationException ignored0) {
+            }
+        }
+        if (reImpl == null) {
+            final String marlinREClass = "sun.java2d.marlin.MarlinRenderingEngine";
+            try {
+                Class<?> cls = Class.forName(marlinREClass);
                 reImpl = (RenderingEngine) cls.newInstance();
             } catch (ReflectiveOperationException ignored1) {
             }
@@ -136,6 +144,12 @@
             throw new InternalError("No RenderingEngine module found");
         }
 
+        gpa = new GetPropertyAction("sun.java2d.renderer.verbose");
+        String verbose = AccessController.doPrivileged(gpa);
+        if (verbose != null && verbose.startsWith("t")) {
+            System.out.println("RenderingEngine = "+reImpl);
+        }
+
         gpa = new GetPropertyAction("sun.java2d.renderer.trace");
         String reTrace = AccessController.doPrivileged(gpa);
         if (reTrace != null) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/java2d/marlin/CeilAndFloorTests.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import sun.java2d.marlin.FloatMath;
+
+/*
+ * @test
+ * @summary Check for correct implementation of FloatMath.ceil/floor
+ * @run main CeilAndFloorTests
+ */
+public class CeilAndFloorTests {
+
+    public static String toHexString(float f) {
+        if (!Float.isNaN(f))
+            return Float.toHexString(f);
+        else
+            return "NaN(0x" + Integer.toHexString(Float.floatToRawIntBits(f)) + ")";
+    }
+
+    public static int test(String testName, float input,
+                           float result, float expected) {
+        if (Float.compare(expected, result) != 0) {
+            System.err.println("Failure for " + testName + ":\n" +
+                               "\tFor input " + input    + "\t(" + toHexString(input) + ")\n" +
+                               "\texpected  " + expected + "\t(" + toHexString(expected) + ")\n" +
+                               "\tgot       " + result   + "\t(" + toHexString(result) + ").");
+            return 1;
+        }
+        else
+            return 0;
+    }
+
+    public static int test_skip_0(String testName, float input,
+                           float result, float expected)
+    {
+        // floor_int does not distinguish +0f and -0f
+        // but it is not critical for Marlin
+        if (Float.compare(expected, result) != 0 && (expected != 0f))
+        {
+            System.err.println("Failure for " + testName + ":\n" +
+                               "\tFor input " + input    + "\t(" + toHexString(input) + ")\n" +
+                               "\texpected  " + expected + "\t(" + toHexString(expected) + ")\n" +
+                               "\tgot       " + result   + "\t(" + toHexString(result) + ").");
+            return 1;
+        }
+        else
+            return 0;
+    }
+
+    private static int testCeilCase(float input, float expected) {
+        int failures = 0;
+        // float result:
+        failures += test("FloatMath.ceil_f", input, FloatMath.ceil_f(input), expected);
+        // int result:
+        failures += test("FloatMath.ceil_int", input, FloatMath.ceil_int(input), (int)expected);
+        failures += test("FloatMath.ceil_f (int)", input, (int)FloatMath.ceil_f(input), (int)expected);
+        return failures;
+    }
+
+    private static int testFloorCase(float input, float expected) {
+        int failures = 0;
+        // float result:
+        failures += test       ("FloatMath.floor_f", input, FloatMath.floor_f(input), expected);
+        // ignore difference between +0f and -0f:
+        failures += test_skip_0("FloatMath.floor_int", input, FloatMath.floor_int(input), (int)expected);
+        failures += test_skip_0("FloatMath.floor_f (int)", input, (int)FloatMath.floor_f(input), (int)expected);
+        return failures;
+    }
+
+    private static int nearIntegerTests() {
+        int failures = 0;
+
+        float [] fixedPoints = {
+            -0.0f,
+             0.0f,
+            -1.0f,
+             1.0f,
+            -0x1.0p52f,
+             0x1.0p52f,
+            -Float.MAX_VALUE,
+             Float.MAX_VALUE,
+             Float.NEGATIVE_INFINITY,
+             Float.POSITIVE_INFINITY,
+             Float.NaN,
+        };
+
+        for(float fixedPoint : fixedPoints) {
+            failures += testCeilCase(fixedPoint, fixedPoint);
+            failures += testFloorCase(fixedPoint, fixedPoint);
+        }
+
+        for(int i = Float.MIN_EXPONENT; i <= Float.MAX_EXPONENT; i++) {
+            float powerOfTwo   = Math.scalb(1.0f, i);
+            float neighborDown = Math.nextDown(powerOfTwo);
+            float neighborUp   = Math.nextUp(powerOfTwo);
+
+            if (i < 0) {
+                failures += testCeilCase( powerOfTwo,  1.0f);
+                failures += testCeilCase(-powerOfTwo, -0.0f);
+
+                failures += testFloorCase( powerOfTwo,  0.0f);
+                failures += testFloorCase(-powerOfTwo, -1.0f);
+
+                failures += testCeilCase( neighborDown, 1.0f);
+                failures += testCeilCase(-neighborDown, -0.0f);
+
+                failures += testFloorCase( neighborUp,  0.0f);
+                failures += testFloorCase(-neighborUp, -1.0f);
+            } else {
+                failures += testCeilCase(powerOfTwo, powerOfTwo);
+                failures += testFloorCase(powerOfTwo, powerOfTwo);
+
+                if (neighborDown==Math.rint(neighborDown)) {
+                    failures += testCeilCase( neighborDown,  neighborDown);
+                    failures += testCeilCase(-neighborDown, -neighborDown);
+
+                    failures += testFloorCase( neighborDown, neighborDown);
+                    failures += testFloorCase(-neighborDown,-neighborDown);
+                } else {
+                    failures += testCeilCase( neighborDown, powerOfTwo);
+                    failures += testFloorCase(-neighborDown, -powerOfTwo);
+                }
+
+                if (neighborUp==Math.rint(neighborUp)) {
+                    failures += testCeilCase(neighborUp, neighborUp);
+                    failures += testCeilCase(-neighborUp, -neighborUp);
+
+                    failures += testFloorCase(neighborUp, neighborUp);
+                    failures += testFloorCase(-neighborUp, -neighborUp);
+                } else {
+                    failures += testFloorCase(neighborUp, powerOfTwo);
+                    failures += testCeilCase(-neighborUp, -powerOfTwo);
+                }
+            }
+        }
+
+        for(int i = -(0x10000); i <= 0x10000; i++) {
+            float f = (float) i;
+            float neighborDown = Math.nextDown(f);
+            float neighborUp   = Math.nextUp(f);
+
+            failures += testCeilCase( f, f);
+            failures += testCeilCase(-f, -f);
+
+            failures += testFloorCase( f, f);
+            failures += testFloorCase(-f, -f);
+
+            if (Math.abs(f) > 1.0) {
+                failures += testCeilCase( neighborDown, f);
+                failures += testCeilCase(-neighborDown, -f+1);
+
+                failures += testFloorCase( neighborUp, f);
+                failures += testFloorCase(-neighborUp, -f-1);
+            }
+        }
+
+        return failures;
+    }
+
+    public static int roundingTests() {
+        int failures = 0;
+        float [][] testCases = {
+            { Float.MIN_VALUE,                           1.0f},
+            {-Float.MIN_VALUE,                          -0.0f},
+            { Math.nextDown(Float.MIN_NORMAL),           1.0f},
+            {-Math.nextDown(Float.MIN_NORMAL),          -0.0f},
+            { Float.MIN_NORMAL,                          1.0f},
+            {-Float.MIN_NORMAL,                         -0.0f},
+
+            { 0.1f,                                        1.0f},
+            {-0.1f,                                       -0.0f},
+
+            { 0.5f,                                        1.0f},
+            {-0.5f,                                       -0.0f},
+
+            { 1.5f,                                        2.0f},
+            {-1.5f,                                       -1.0f},
+
+            { 2.5f,                                        3.0f},
+            {-2.5f,                                       -2.0f},
+
+            { 12.3456789f,                                13.0f},
+            {-12.3456789f,                               -12.0f},
+
+            { Math.nextDown(1.0f),                         1.0f},
+            { Math.nextDown(-1.0f),                       -1.0f},
+
+            { Math.nextUp(1.0f),                           2.0f},
+            { Math.nextUp(-1.0f),                         -0.0f},
+
+            { 0x1.0p22f,                                 0x1.0p22f},
+            {-0x1.0p22f,                                -0x1.0p22f},
+
+            { Math.nextDown(0x1.0p22f),                  0x1.0p22f},
+            {-Math.nextUp(0x1.0p22f),                   -0x1.0p22f},
+
+            { Math.nextUp(0x1.0p22f),                    0x1.0p22f+1f},
+            {-Math.nextDown(0x1.0p22f),                 -0x1.0p22f+1f},
+
+            { Math.nextDown(0x1.0p23f),                  0x1.0p23f},
+            {-Math.nextUp(0x1.0p23f),                   -0x1.0p23f-1f},
+
+            { Math.nextUp(0x1.0p23f),                    0x1.0p23f+1f},
+            {-Math.nextDown(0x1.0p23f),                 -0x1.0p23f+1f},
+        };
+
+        for(float[] testCase : testCases) {
+            failures += testCeilCase(testCase[0], testCase[1]);
+            failures += testFloorCase(-testCase[0], -testCase[1]);
+        }
+        return failures;
+    }
+
+    public static void main(String... args) {
+        int failures = 0;
+
+        System.out.println("nearIntegerTests");
+        failures += nearIntegerTests();
+
+        System.out.println("roundingTests");
+        failures += roundingTests();
+
+        if (failures > 0) {
+            System.err.println("Testing {FloatMath}.ceil/floor incurred "
+                               + failures + " failures.");
+            throw new RuntimeException();
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/sun/java2d/marlin/CrashTest.java	Mon Nov 23 15:02:19 2015 -0800
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.awt.BasicStroke;
+import java.awt.Color;
+import java.awt.Graphics2D;
+import java.awt.RenderingHints;
+import java.awt.geom.Path2D;
+import static java.awt.geom.Path2D.WIND_NON_ZERO;
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.IOException;
+import javax.imageio.ImageIO;
+import sun.java2d.pipe.RenderingEngine;
+
+/**
+ * Simple crash rendering test using huge GeneralPaths with marlin renderer
+ *
+ * run it with large heap (2g):
+ * java -Dsun.java2d.renderer=sun.java2d.marlin.MarlinRenderingEngine marlin.CrashTest
+ *
+ * @author bourgesl
+ */
+public class CrashTest {
+
+    static final boolean SAVE_IMAGE = false;
+    static boolean USE_ROUND_CAPS_AND_JOINS = true;
+
+    public static void main(String[] args) {
+        // try insane image sizes:
+
+        // subpixel coords may overflow:
+//        testHugeImage((Integer.MAX_VALUE >> 3) + 1, 6);
+        // larger than 23 bits: (RLE)
+        testHugeImage(8388608 + 1, 10);
+
+        test(0.1f, false, 0);
+        test(0.1f, true, 7f);
+
+        // Exceed 2Gb OffHeap buffer for edges:
+        try {
+            USE_ROUND_CAPS_AND_JOINS = true;
+            test(0.1f, true, 0.1f);
+            System.out.println("Exception MISSING.");
+        }
+        catch (Throwable th) {
+            if (th instanceof ArrayIndexOutOfBoundsException) {
+                System.out.println("ArrayIndexOutOfBoundsException expected.");
+            } else {
+                System.out.println("Exception occured:");
+                th.printStackTrace();
+            }
+        }
+
+    }
+
+    private static void test(final float lineStroke,
+                             final boolean useDashes,
+                             final float dashMinLen)
+    throws ArrayIndexOutOfBoundsException
+    {
+        System.out.println("---\n" + "test: "
+            + "lineStroke=" + lineStroke
+            + ", useDashes=" + useDashes
+            +", dashMinLen=" + dashMinLen
+        );
+
+        final String renderer = RenderingEngine.getInstance().getClass().getSimpleName();
+        System.out.println("Testing renderer = " + renderer);
+
+        final BasicStroke stroke = createStroke(lineStroke, useDashes, dashMinLen);
+
+        // TODO: test Dasher.firstSegmentsBuffer resizing ?
+// array.dasher.firstSegmentsBuffer.d_float[2] sum: 6 avg: 3.0 [3 | 3]
+        /*
+         // Marlin growable arrays:
+         = new StatLong("array.dasher.firstSegmentsBuffer.d_float");
+         = new StatLong("array.stroker.polystack.curves.d_float");
+         = new StatLong("array.stroker.polystack.curveTypes.d_byte");
+         = new StatLong("array.marlincache.rowAAChunk.d_byte");
+         = new StatLong("array.marlincache.touchedTile.int");
+         = new StatLong("array.renderer.alphaline.int");
+         = new StatLong("array.renderer.crossings.int");
+         = new StatLong("array.renderer.aux_crossings.int");
+         = new StatLong("array.renderer.edgeBuckets.int");
+         = new StatLong("array.renderer.edgeBucketCounts.int");
+         = new StatLong("array.renderer.edgePtrs.int");
+         = new StatLong("array.renderer.aux_edgePtrs.int");
+         */
+        // size > 8192 (exceed both tile and buckets arrays)
+        final int size = 9000;
+        System.out.println("image size = " + size);
+
+        final BufferedImage image = new BufferedImage(size, size, BufferedImage.TYPE_INT_ARGB);
+
+        final Graphics2D g2d = (Graphics2D) image.getGraphics();
+        try {
+            g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
+            g2d.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
+
+            g2d.setClip(0, 0, size, size);
+            g2d.setBackground(Color.WHITE);
+            g2d.clearRect(0, 0, size, size);
+
+            g2d.setStroke(stroke);
+            g2d.setColor(Color.BLACK);
+
+            final long start = System.nanoTime();
+
+            paint(g2d, size - 10f);
+
+            final long time = System.nanoTime() - start;
+
+            System.out.println("paint: duration= " + (1e-6 * time) + " ms.");
+
+            if (SAVE_IMAGE) {
+                try {
+                    final File file = new File("CrashTest-" + renderer + "-dash-" + useDashes + ".bmp");
+
+                    System.out.println("Writing file: " + file.getAbsolutePath());
+                    ImageIO.write(image, "BMP", file);
+                } catch (IOException ex) {
+                    System.out.println("Writing file failure:");
+                    ex.printStackTrace();
+                }
+            }
+        } finally {
+            g2d.dispose();
+        }
+    }
+
+    private static void testHugeImage(final int width, final int height)
+    throws ArrayIndexOutOfBoundsException
+    {
+        System.out.println("---\n" + "testHugeImage: "
+            + "width=" + width
+            + ", height=" + height
+        );
+
+        final String renderer = RenderingEngine.getInstance().getClass().getSimpleName();
+        System.out.println("Testing renderer = " + renderer);
+
+        final BasicStroke stroke = createStroke(2.5f, false, 0);
+
+        // size > 24bits (exceed both tile and buckets arrays)
+        System.out.println("image size = " + width + " x "+height);
+
+        final BufferedImage image = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY);
+
+        final Graphics2D g2d = (Graphics2D) image.getGraphics();
+        try {
+            g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
+            g2d.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
+
+            g2d.setBackground(Color.WHITE);
+            g2d.clearRect(0, 0, width, height);
+
+            g2d.setStroke(stroke);
+            g2d.setColor(Color.BLACK);
+
+            final Path2D.Float path = new Path2D.Float(WIND_NON_ZERO, 32);
+            path.moveTo(0, 0);
+            path.lineTo(width, 0);
+            path.lineTo(width, height);
+            path.lineTo(0, height);
+            path.lineTo(0, 0);
+
+            final long start = System.nanoTime();
+
+            g2d.draw(path);
+
+            final long time = System.nanoTime() - start;
+
+            System.out.println("paint: duration= " + (1e-6 * time) + " ms.");
+
+            if (SAVE_IMAGE) {
+                try {
+                    final File file = new File("CrashTest-" + renderer +
+                                               "-huge-" + width + "x" +height + ".bmp");
+
+                    System.out.println("Writing file: " + file.getAbsolutePath());
+                    ImageIO.write(image, "BMP", file);
+                } catch (IOException ex) {
+                    System.out.println("Writing file failure:");
+                    ex.printStackTrace();
+                }
+            }
+        } finally {
+            g2d.dispose();
+        }
+    }
+
+    private static void paint(final Graphics2D g2d, final float size) {
+        final double halfSize = size / 2.0;
+
+        final Path2D.Float path = new Path2D.Float(WIND_NON_ZERO, 32 * 1024);
+
+        // show cross:
+        path.moveTo(0, 0);
+        path.lineTo(size, size);
+
+        path.moveTo(size, 0);
+        path.lineTo(0, size);
+
+        path.moveTo(0, 0);
+        path.lineTo(size, 0);
+
+        path.moveTo(0, 0);
+        path.lineTo(0, size);
+
+        path.moveTo(0, 0);
+
+        double r = size;
+
+        final int ratio = 100;
+        int repeats = 1;
+
+        int n = 0;
+
+        while (r > 1.0) {
+            repeats *= ratio;
+
+            if (repeats > 10000) {
+                repeats = 10000;
+            }
+
+            for (int i = 0; i < repeats; i++) {
+                path.lineTo(halfSize - 0.5 * r + i * r / repeats,
+                            halfSize - 0.5 * r);
+                n++;
+                path.lineTo(halfSize - 0.5 * r + i * r / repeats + 0.1,
+                            halfSize + 0.5 * r);
+                n++;
+            }
+
+            r -= halfSize;
+        }
+        System.out.println("draw : " + n + " lines.");
+        g2d.draw(path);
+    }
+
+    private static BasicStroke createStroke(final float width,
+                                            final boolean useDashes,
+                                            final float dashMinLen) {
+        final float[] dashes;
+
+        if (useDashes) {
+            // huge dash array (exceed Dasher.INITIAL_ARRAY)
+            dashes = new float[512];
+
+            float cur = dashMinLen;
+            float step = 0.01f;
+
+            for (int i = 0; i < dashes.length; i += 2) {
+                dashes[i] = cur;
+                dashes[i + 1] = cur;
+                cur += step;
+            }
+        } else {
+            dashes = null;
+        }
+
+        if (USE_ROUND_CAPS_AND_JOINS) {
+            // Use both round Caps & Joins:
+            return new BasicStroke(width, BasicStroke.CAP_ROUND, BasicStroke.JOIN_ROUND, 100.0f, dashes, 0.0f);
+        }
+        return new BasicStroke(width, BasicStroke.CAP_BUTT, BasicStroke.JOIN_MITER, 100.0f, dashes, 0.0f);
+    }
+}