7016856: dashing performance was reduced during latest changes to the OpenJDK rasterizer
authordlila
Tue, 08 Feb 2011 09:22:49 -0500 (2011-02-08)
changeset 8131 e2932d8114cb
parent 8130 c582063ba5bb
child 8137 efd894717b2c
child 8352 956b231fed62
7016856: dashing performance was reduced during latest changes to the OpenJDK rasterizer Summary: Optimized dashing, rasterizing, and the flow of transformed coordinates Reviewed-by: flar
jdk/src/share/classes/sun/java2d/pisces/Curve.java
jdk/src/share/classes/sun/java2d/pisces/Dasher.java
jdk/src/share/classes/sun/java2d/pisces/Helpers.java
jdk/src/share/classes/sun/java2d/pisces/PiscesCache.java
jdk/src/share/classes/sun/java2d/pisces/PiscesRenderingEngine.java
jdk/src/share/classes/sun/java2d/pisces/PiscesTileGenerator.java
jdk/src/share/classes/sun/java2d/pisces/Renderer.java
jdk/src/share/classes/sun/java2d/pisces/Stroker.java
jdk/src/share/classes/sun/java2d/pisces/TransformingPathConsumer2D.java
--- a/jdk/src/share/classes/sun/java2d/pisces/Curve.java	Thu Feb 03 19:15:30 2011 -0800
+++ b/jdk/src/share/classes/sun/java2d/pisces/Curve.java	Tue Feb 08 09:22:49 2011 -0500
@@ -27,7 +27,7 @@
 
 import java.util.Iterator;
 
-class Curve {
+final class Curve {
 
     float ax, ay, bx, by, cx, cy, dx, dy;
     float dax, day, dbx, dby;
@@ -101,14 +101,6 @@
         return t * (t * day + dby) + cy;
     }
 
-    private float ddxat(float t) {
-        return 2 * dax * t + dbx;
-    }
-
-    private float ddyat(float t) {
-        return 2 * day * t + dby;
-    }
-
     int dxRoots(float[] roots, int off) {
         return Helpers.quadraticRoots(dax, dbx, cx, roots, off);
     }
@@ -131,17 +123,17 @@
     // finds points where the first and second derivative are
     // perpendicular. This happens when g(t) = f'(t)*f''(t) == 0 (where
     // * is a dot product). Unfortunately, we have to solve a cubic.
-    private int perpendiculardfddf(float[] pts, int off, final float err) {
+    private int perpendiculardfddf(float[] pts, int off) {
         assert pts.length >= off + 4;
 
-        // these are the coefficients of g(t):
+        // these are the coefficients of some multiple of g(t) (not g(t),
+        // because the roots of a polynomial are not changed after multiplication
+        // by a constant, and this way we save a few multiplications).
         final float a = 2*(dax*dax + day*day);
         final float b = 3*(dax*dbx + day*dby);
         final float c = 2*(dax*cx + day*cy) + dbx*dbx + dby*dby;
         final float d = dbx*cx + dby*cy;
-        // TODO: We might want to divide the polynomial by a to make the
-        // coefficients smaller. This won't change the roots.
-        return Helpers.cubicRootsInAB(a, b, c, d, pts, off, err, 0f, 1f);
+        return Helpers.cubicRootsInAB(a, b, c, d, pts, off, 0f, 1f);
     }
 
     // Tries to find the roots of the function ROC(t)-w in [0, 1). It uses
@@ -161,7 +153,7 @@
         // no OOB exception, because by now off<=6, and roots.length >= 10
         assert off <= 6 && roots.length >= 10;
         int ret = off;
-        int numPerpdfddf = perpendiculardfddf(roots, off, err);
+        int numPerpdfddf = perpendiculardfddf(roots, off);
         float t0 = 0, ft0 = ROCsq(t0) - w*w;
         roots[off + numPerpdfddf] = 1f; // always check interval end points
         numPerpdfddf++;
@@ -189,8 +181,9 @@
     // A slight modification of the false position algorithm on wikipedia.
     // This only works for the ROCsq-x functions. It might be nice to have
     // the function as an argument, but that would be awkward in java6.
-    // It is something to consider for java7, depending on how closures
-    // and function objects turn out. Same goes for the newton's method
+    // TODO: It is something to consider for java8 (or whenever lambda
+    // expressions make it into the language), depending on how closures
+    // and turn out. Same goes for the newton's method
     // algorithm in Helpers.java
     private float falsePositionROCsqMinusX(float x0, float x1,
                                            final float x, final float err)
@@ -203,7 +196,7 @@
         for (int i = 0; i < iterLimit && Math.abs(t - s) > err * Math.abs(t + s); i++) {
             r = (fs * t - ft * s) / (fs - ft);
             fr = ROCsq(r) - x;
-            if (fr * ft > 0) {// have the same sign
+            if (sameSign(fr, ft)) {
                 ft = fr; t = r;
                 if (side < 0) {
                     fs /= (1 << (-side));
@@ -226,55 +219,65 @@
         return r;
     }
 
+    private static boolean sameSign(double x, double y) {
+        // another way is to test if x*y > 0. This is bad for small x, y.
+        return (x < 0 && y < 0) || (x > 0 && y > 0);
+    }
+
     // returns the radius of curvature squared at t of this curve
     // see http://en.wikipedia.org/wiki/Radius_of_curvature_(applications)
     private float ROCsq(final float t) {
-        final float dx = dxat(t);
-        final float dy = dyat(t);
-        final float ddx = ddxat(t);
-        final float ddy = ddyat(t);
+        // dx=xat(t) and dy=yat(t). These calls have been inlined for efficiency
+        final float dx = t * (t * dax + dbx) + cx;
+        final float dy = t * (t * day + dby) + cy;
+        final float ddx = 2 * dax * t + dbx;
+        final float ddy = 2 * day * t + dby;
         final float dx2dy2 = dx*dx + dy*dy;
         final float ddx2ddy2 = ddx*ddx + ddy*ddy;
         final float ddxdxddydy = ddx*dx + ddy*dy;
-        float ret = ((dx2dy2*dx2dy2) / (dx2dy2 * ddx2ddy2 - ddxdxddydy*ddxdxddydy))*dx2dy2;
-        return ret;
+        return dx2dy2*((dx2dy2*dx2dy2) / (dx2dy2 * ddx2ddy2 - ddxdxddydy*ddxdxddydy));
     }
 
-    // curve to be broken should be in pts[0]
-    // this will change the contents of both pts and Ts
+    // curve to be broken should be in pts
+    // this will change the contents of pts but not Ts
     // TODO: There's no reason for Ts to be an array. All we need is a sequence
     // of t values at which to subdivide. An array statisfies this condition,
     // but is unnecessarily restrictive. Ts should be an Iterator<Float> instead.
     // Doing this will also make dashing easier, since we could easily make
     // LengthIterator an Iterator<Float> and feed it to this function to simplify
     // the loop in Dasher.somethingTo.
-    static Iterator<float[]> breakPtsAtTs(final float[][] pts, final int type,
+    static Iterator<Integer> breakPtsAtTs(final float[] pts, final int type,
                                           final float[] Ts, final int numTs)
     {
-        assert pts.length >= 2 && pts[0].length >= 8 && numTs <= Ts.length;
-        return new Iterator<float[]>() {
-            int nextIdx = 0;
+        assert pts.length >= 2*type && numTs <= Ts.length;
+        return new Iterator<Integer>() {
+            // these prevent object creation and destruction during autoboxing.
+            // Because of this, the compiler should be able to completely
+            // eliminate the boxing costs.
+            final Integer i0 = 0;
+            final Integer itype = type;
             int nextCurveIdx = 0;
+            Integer curCurveOff = i0;
             float prevT = 0;
 
             @Override public boolean hasNext() {
                 return nextCurveIdx < numTs + 1;
             }
 
-            @Override public float[] next() {
-                float[] ret;
+            @Override public Integer next() {
+                Integer ret;
                 if (nextCurveIdx < numTs) {
                     float curT = Ts[nextCurveIdx];
                     float splitT = (curT - prevT) / (1 - prevT);
                     Helpers.subdivideAt(splitT,
-                                        pts[nextIdx], 0,
-                                        pts[nextIdx], 0,
-                                        pts[1-nextIdx], 0, type);
-                    updateTs(Ts, Ts[nextCurveIdx], nextCurveIdx + 1, numTs - nextCurveIdx - 1);
-                    ret = pts[nextIdx];
-                    nextIdx = 1 - nextIdx;
+                                        pts, curCurveOff,
+                                        pts, 0,
+                                        pts, type, type);
+                    prevT = curT;
+                    ret = i0;
+                    curCurveOff = itype;
                 } else {
-                    ret = pts[nextIdx];
+                    ret = curCurveOff;
                 }
                 nextCurveIdx++;
                 return ret;
@@ -283,12 +286,5 @@
             @Override public void remove() {}
         };
     }
-
-    // precondition: ts[off]...ts[off+len-1] must all be greater than t.
-    private static void updateTs(float[] ts, final float t, final int off, final int len) {
-        for (int i = off; i < off + len; i++) {
-            ts[i] = (ts[i] - t) / (1 - t);
-        }
-    }
 }
 
--- a/jdk/src/share/classes/sun/java2d/pisces/Dasher.java	Thu Feb 03 19:15:30 2011 -0800
+++ b/jdk/src/share/classes/sun/java2d/pisces/Dasher.java	Tue Feb 08 09:22:49 2011 -0500
@@ -38,7 +38,7 @@
  * semantics are unclear.
  *
  */
-public class Dasher implements sun.awt.geom.PathConsumer2D {
+final class Dasher implements sun.awt.geom.PathConsumer2D {
 
     private final PathConsumer2D out;
     private final float[] dash;
@@ -169,7 +169,7 @@
         float dx = x1 - x0;
         float dy = y1 - y0;
 
-        float len = (float) Math.hypot(dx, dy);
+        float len = (float) Math.sqrt(dx*dx + dy*dy);
 
         if (len == 0) {
             return;
@@ -226,7 +226,7 @@
             return;
         }
         if (li == null) {
-            li = new LengthIterator(4, 0.0001f);
+            li = new LengthIterator(4, 0.01f);
         }
         li.initializeIterationOnCurve(curCurvepts, type);
 
@@ -237,9 +237,9 @@
         while ((t = li.next(leftInThisDashSegment)) < 1) {
             if (t != 0) {
                 Helpers.subdivideAt((t - lastSplitT) / (1 - lastSplitT),
-                        curCurvepts, curCurveoff,
-                        curCurvepts, 0,
-                        curCurvepts, type, type);
+                                    curCurvepts, curCurveoff,
+                                    curCurvepts, 0,
+                                    curCurvepts, type, type);
                 lastSplitT = t;
                 goTo(curCurvepts, 2, type);
                 curCurveoff = type;
@@ -307,6 +307,11 @@
         private int recLevel;
         private boolean done;
 
+        // the lengths of the lines of the control polygon. Only its first
+        // curveType/2 - 1 elements are valid. This is an optimization. See
+        // next(float) for more detail.
+        private float[] curLeafCtrlPolyLengths = new float[3];
+
         public LengthIterator(int reclimit, float err) {
             this.limit = reclimit;
             this.minTincrement = 1f / (1 << limit);
@@ -344,11 +349,52 @@
             this.lastSegLen = 0;
         }
 
+        // 0 == false, 1 == true, -1 == invalid cached value.
+        private int cachedHaveLowAcceleration = -1;
+
+        private boolean haveLowAcceleration(float err) {
+            if (cachedHaveLowAcceleration == -1) {
+                final float len1 = curLeafCtrlPolyLengths[0];
+                final float len2 = curLeafCtrlPolyLengths[1];
+                // the test below is equivalent to !within(len1/len2, 1, err).
+                // It is using a multiplication instead of a division, so it
+                // should be a bit faster.
+                if (!Helpers.within(len1, len2, err*len2)) {
+                    cachedHaveLowAcceleration = 0;
+                    return false;
+                }
+                if (curveType == 8) {
+                    final float len3 = curLeafCtrlPolyLengths[2];
+                    // if len1 is close to 2 and 2 is close to 3, that probably
+                    // means 1 is close to 3 so the second part of this test might
+                    // not be needed, but it doesn't hurt to include it.
+                    if (!(Helpers.within(len2, len3, err*len3) &&
+                          Helpers.within(len1, len3, err*len3))) {
+                        cachedHaveLowAcceleration = 0;
+                        return false;
+                    }
+                }
+                cachedHaveLowAcceleration = 1;
+                return true;
+            }
+
+            return (cachedHaveLowAcceleration == 1);
+        }
+
+        // we want to avoid allocations/gc so we keep this array so we
+        // can put roots in it,
+        private float[] nextRoots = new float[4];
+
+        // caches the coefficients of the current leaf in its flattened
+        // form (see inside next() for what that means). The cache is
+        // invalid when it's third element is negative, since in any
+        // valid flattened curve, this would be >= 0.
+        private float[] flatLeafCoefCache = new float[] {0, 0, -1, 0};
         // returns the t value where the remaining curve should be split in
         // order for the left subdivided curve to have length len. If len
         // is >= than the length of the uniterated curve, it returns 1.
-        public float next(float len) {
-            float targetLength = lenAtLastSplit + len;
+        public float next(final float len) {
+            final float targetLength = lenAtLastSplit + len;
             while(lenAtNextT < targetLength) {
                 if (done) {
                     lastSegLen = lenAtNextT - lenAtLastSplit;
@@ -357,8 +403,46 @@
                 goToNextLeaf();
             }
             lenAtLastSplit = targetLength;
-            float t = binSearchForLen(lenAtLastSplit - lenAtLastT,
-                    recCurveStack[recLevel], curveType, lenAtNextT - lenAtLastT, ERR);
+            final float leaflen = lenAtNextT - lenAtLastT;
+            float t = (targetLength - lenAtLastT) / leaflen;
+
+            // cubicRootsInAB is a fairly expensive call, so we just don't do it
+            // if the acceleration in this section of the curve is small enough.
+            if (!haveLowAcceleration(0.05f)) {
+                // We flatten the current leaf along the x axis, so that we're
+                // left with a, b, c which define a 1D Bezier curve. We then
+                // solve this to get the parameter of the original leaf that
+                // gives us the desired length.
+
+                if (flatLeafCoefCache[2] < 0) {
+                    float x = 0+curLeafCtrlPolyLengths[0],
+                          y = x+curLeafCtrlPolyLengths[1];
+                    if (curveType == 8) {
+                        float z = y + curLeafCtrlPolyLengths[2];
+                        flatLeafCoefCache[0] = 3*(x - y) + z;
+                        flatLeafCoefCache[1] = 3*(y - 2*x);
+                        flatLeafCoefCache[2] = 3*x;
+                        flatLeafCoefCache[3] = -z;
+                    } else if (curveType == 6) {
+                        flatLeafCoefCache[0] = 0f;
+                        flatLeafCoefCache[1] = y - 2*x;
+                        flatLeafCoefCache[2] = 2*x;
+                        flatLeafCoefCache[3] = -y;
+                    }
+                }
+                float a = flatLeafCoefCache[0];
+                float b = flatLeafCoefCache[1];
+                float c = flatLeafCoefCache[2];
+                float d = t*flatLeafCoefCache[3];
+
+                // we use cubicRootsInAB here, because we want only roots in 0, 1,
+                // and our quadratic root finder doesn't filter, so it's just a
+                // matter of convenience.
+                int n = Helpers.cubicRootsInAB(a, b, c, d, nextRoots, 0, 0, 1);
+                if (n == 1 && !Float.isNaN(nextRoots[0])) {
+                    t = nextRoots[0];
+                }
+            }
             // t is relative to the current leaf, so we must make it a valid parameter
             // of the original curve.
             t = t * (nextT - lastT) + lastT;
@@ -379,36 +463,6 @@
             return lastSegLen;
         }
 
-        // Returns t such that if leaf is subdivided at t the left
-        // curve will have length len. leafLen must be the length of leaf.
-        private static Curve bsc = new Curve();
-        private static float binSearchForLen(float len, float[] leaf, int type,
-                                             float leafLen, float err)
-        {
-            assert len <= leafLen;
-            bsc.set(leaf, type);
-            float errBound = err*len;
-            float left = 0, right = 1;
-            while (left < right) {
-                float m = (left + right) / 2;
-                if (m == left || m == right) {
-                    return m;
-                }
-                float x = bsc.xat(m);
-                float y = bsc.yat(m);
-                float leftLen = Helpers.linelen(leaf[0], leaf[1], x, y);
-                if (Math.abs(leftLen - len) < errBound) {
-                    return m;
-                }
-                if (leftLen < len) {
-                    left = m;
-                } else {
-                    right = m;
-                }
-            }
-            return left;
-        }
-
         // go to the next leaf (in an inorder traversal) in the recursion tree
         // preconditions: must be on a leaf, and that leaf must not be the root.
         private void goToNextLeaf() {
@@ -437,6 +491,9 @@
                 lenAtLastT = lenAtNextT;
                 nextT += (1 << (limit - recLevel)) * minTincrement;
                 lenAtNextT += len;
+                // invalidate caches
+                flatLeafCoefCache[2] = -1;
+                cachedHaveLowAcceleration = -1;
             } else {
                 Helpers.subdivide(recCurveStack[recLevel], 0,
                                   recCurveStack[recLevel+1], 0,
@@ -450,11 +507,24 @@
         // this is a bit of a hack. It returns -1 if we're not on a leaf, and
         // the length of the leaf if we are on a leaf.
         private float onLeaf() {
-            float polylen = Helpers.polyLineLength(recCurveStack[recLevel], 0, curveType);
-            float linelen = Helpers.linelen(recCurveStack[recLevel][0], recCurveStack[recLevel][1],
-                    recCurveStack[recLevel][curveType - 2], recCurveStack[recLevel][curveType - 1]);
-            return (polylen - linelen < ERR || recLevel == limit) ?
-                   (polylen + linelen)/2 : -1;
+            float[] curve = recCurveStack[recLevel];
+            float polyLen = 0;
+
+            float x0 = curve[0], y0 = curve[1];
+            for (int i = 2; i < curveType; i += 2) {
+                final float x1 = curve[i], y1 = curve[i+1];
+                final float len = Helpers.linelen(x0, y0, x1, y1);
+                polyLen += len;
+                curLeafCtrlPolyLengths[i/2 - 1] = len;
+                x0 = x1;
+                y0 = y1;
+            }
+
+            final float lineLen = Helpers.linelen(curve[0], curve[1], curve[curveType-2], curve[curveType-1]);
+            if (polyLen - lineLen < ERR || recLevel == limit) {
+                return (polyLen + lineLen)/2;
+            }
+            return -1;
         }
     }
 
--- a/jdk/src/share/classes/sun/java2d/pisces/Helpers.java	Thu Feb 03 19:15:30 2011 -0800
+++ b/jdk/src/share/classes/sun/java2d/pisces/Helpers.java	Tue Feb 08 09:22:49 2011 -0500
@@ -26,6 +26,12 @@
 package sun.java2d.pisces;
 
 import java.util.Arrays;
+import static java.lang.Math.PI;
+import static java.lang.Math.cos;
+import static java.lang.Math.sqrt;
+import static java.lang.Math.cbrt;
+import static java.lang.Math.acos;
+
 
 final class Helpers {
     private Helpers() {
@@ -75,100 +81,74 @@
         return ret - off;
     }
 
-    // find the roots of g(t) = a*t^3 + b*t^2 + c*t + d in [A,B)
-    // We will not use Cardano's method, since it is complicated and
-    // involves too many square and cubic roots. We will use Newton's method.
-    // TODO: this should probably return ALL roots. Then the user can do
-    // his own filtering of roots outside [A,B).
-    static int cubicRootsInAB(final float a, final float b,
-                              final float c, final float d,
-                              float[] pts, final int off, final float E,
+    // find the roots of g(t) = d*t^3 + a*t^2 + b*t + c in [A,B)
+    static int cubicRootsInAB(float d, float a, float b, float c,
+                              float[] pts, final int off,
                               final float A, final float B)
     {
-        if (a == 0) {
-            return quadraticRoots(b, c, d, pts, off);
+        if (d == 0) {
+            int num = quadraticRoots(a, b, c, pts, off);
+            return filterOutNotInAB(pts, off, num, A, B) - off;
         }
-        // the coefficients of g'(t). no dc variable because dc=c
-        // we use these to get the critical points of g(t), which
-        // we then use to chose starting points for Newton's method. These
-        // should be very close to the actual roots.
-        final float da = 3 * a;
-        final float db = 2 * b;
-        int numCritPts = quadraticRoots(da, db, c, pts, off+1);
-        numCritPts = filterOutNotInAB(pts, off+1, numCritPts, A, B) - off - 1;
-        // need them sorted.
-        if (numCritPts == 2 && pts[off+1] > pts[off+2]) {
-            float tmp = pts[off+1];
-            pts[off+1] = pts[off+2];
-            pts[off+2] = tmp;
+        // From Graphics Gems:
+        // http://tog.acm.org/resources/GraphicsGems/gems/Roots3And4.c
+        // (also from awt.geom.CubicCurve2D. But here we don't need as
+        // much accuracy and we don't want to create arrays so we use
+        // our own customized version).
+
+        /* normal form: x^3 + ax^2 + bx + c = 0 */
+        a /= d;
+        b /= d;
+        c /= d;
+
+        //  substitute x = y - A/3 to eliminate quadratic term:
+        //     x^3 +Px + Q = 0
+        //
+        // Since we actually need P/3 and Q/2 for all of the
+        // calculations that follow, we will calculate
+        // p = P/3
+        // q = Q/2
+        // instead and use those values for simplicity of the code.
+        double sq_A = a * a;
+        double p = 1.0/3 * (-1.0/3 * sq_A + b);
+        double q = 1.0/2 * (2.0/27 * a * sq_A - 1.0/3 * a * b + c);
+
+        /* use Cardano's formula */
+
+        double cb_p = p * p * p;
+        double D = q * q + cb_p;
+
+        int num;
+        if (D < 0) {
+            // see: http://en.wikipedia.org/wiki/Cubic_function#Trigonometric_.28and_hyperbolic.29_method
+            final double phi = 1.0/3 * acos(-q / sqrt(-cb_p));
+            final double t = 2 * sqrt(-p);
+
+            pts[ off+0 ] =  (float)( t * cos(phi));
+            pts[ off+1 ] =  (float)(-t * cos(phi + PI / 3));
+            pts[ off+2 ] =  (float)(-t * cos(phi - PI / 3));
+            num = 3;
+        } else {
+            final double sqrt_D = sqrt(D);
+            final double u = cbrt(sqrt_D - q);
+            final double v = - cbrt(sqrt_D + q);
+
+            pts[ off ] = (float)(u + v);
+            num = 1;
+
+            if (within(D, 0, 1e-8)) {
+                pts[off+1] = -(pts[off] / 2);
+                num = 2;
+            }
         }
 
-        int ret = off;
-
-        // we don't actually care much about the extrema themselves. We
-        // only use them to ensure that g(t) is monotonic in each
-        // interval [pts[i],pts[i+1] (for i in off...off+numCritPts+1).
-        // This will allow us to determine intervals containing exactly
-        // one root.
-        // The end points of the interval are always local extrema.
-        pts[off] = A;
-        pts[off + numCritPts + 1] = B;
-        numCritPts += 2;
-
-        float x0 = pts[off], fx0 = evalCubic(a, b, c, d, x0);
-        for (int i = off; i < off + numCritPts - 1; i++) {
-            float x1 = pts[i+1], fx1 = evalCubic(a, b, c, d, x1);
-            if (fx0 == 0f) {
-                pts[ret++] = x0;
-            } else if (fx1 * fx0 < 0f) { // have opposite signs
-                pts[ret++] = CubicNewton(a, b, c, d,
-                        x0 + fx0 * (x1 - x0) / (fx0 - fx1), E);
-            }
-            x0 = x1;
-            fx0 = fx1;
-        }
-        return ret - off;
-    }
+        final float sub = 1.0f/3 * a;
 
-    // precondition: the polynomial to be evaluated must not be 0 at x0.
-    static float CubicNewton(final float a, final float b,
-                             final float c, final float d,
-                             float x0, final float err)
-    {
-        // considering how this function is used, 10 should be more than enough
-        final int itlimit = 10;
-        float fx0 = evalCubic(a, b, c, d, x0);
-        float x1;
-        int count = 0;
-        while(true) {
-            x1 = x0 - (fx0 / evalCubic(0, 3 * a, 2 * b, c, x0));
-            if (Math.abs(x1 - x0) < err * Math.abs(x1 + x0) || count == itlimit) {
-                break;
-            }
-            x0 = x1;
-            fx0 = evalCubic(a, b, c, d, x0);
-            count++;
+        for (int i = 0; i < num; ++i) {
+            pts[ off+i ] -= sub;
         }
-        return x1;
-    }
 
-    // fills the input array with numbers 0, INC, 2*INC, ...
-    static void fillWithIdxes(final float[] data, final int[] idxes) {
-        if (idxes.length > 0) {
-            idxes[0] = 0;
-            for (int i = 1; i < idxes.length; i++) {
-                idxes[i] = idxes[i-1] + (int)data[idxes[i-1]];
-            }
-        }
-    }
-
-    static void fillWithIdxes(final int[] idxes, final int inc) {
-        if (idxes.length > 0) {
-            idxes[0] = 0;
-            for (int i = 1; i < idxes.length; i++) {
-                idxes[i] = idxes[i-1] + inc;
-            }
-        }
+        return filterOutNotInAB(pts, off, num, A, B) - off;
     }
 
     // These use a hardcoded factor of 2 for increasing sizes. Perhaps this
@@ -182,6 +162,7 @@
         }
         return Arrays.copyOf(in, 2 * (cursize + numToAdd));
     }
+
     static int[] widenArray(int[] in, final int cursize, final int numToAdd) {
         if (in.length >= cursize + numToAdd) {
             return in;
@@ -208,7 +189,7 @@
     {
         int ret = off;
         for (int i = off; i < off + len; i++) {
-            if (nums[i] > a && nums[i] < b) {
+            if (nums[i] >= a && nums[i] < b) {
                 nums[ret++] = nums[i];
             }
         }
@@ -225,7 +206,9 @@
     }
 
     static float linelen(float x1, float y1, float x2, float y2) {
-        return (float)Math.hypot(x2 - x1, y2 - y1);
+        final float dx = x2 - x1;
+        final float dy = y2 - y1;
+        return (float)Math.sqrt(dx*dx + dy*dy);
     }
 
     static void subdivide(float[] src, int srcoff, float[] left, int leftoff,
--- a/jdk/src/share/classes/sun/java2d/pisces/PiscesCache.java	Thu Feb 03 19:15:30 2011 -0800
+++ b/jdk/src/share/classes/sun/java2d/pisces/PiscesCache.java	Tue Feb 08 09:22:49 2011 -0500
@@ -32,7 +32,7 @@
  *
  * @see PiscesRenderer#render
  */
-public final class PiscesCache {
+final class PiscesCache {
 
     final int bboxX0, bboxY0, bboxX1, bboxY1;
 
--- a/jdk/src/share/classes/sun/java2d/pisces/PiscesRenderingEngine.java	Thu Feb 03 19:15:30 2011 -0800
+++ b/jdk/src/share/classes/sun/java2d/pisces/PiscesRenderingEngine.java	Tue Feb 08 09:22:49 2011 -0500
@@ -27,7 +27,6 @@
 
 import java.awt.Shape;
 import java.awt.BasicStroke;
-import java.awt.geom.NoninvertibleTransformException;
 import java.awt.geom.Path2D;
 import java.awt.geom.AffineTransform;
 import java.awt.geom.PathIterator;
@@ -250,7 +249,7 @@
                   float dashphase,
                   PathConsumer2D pc2d)
     {
-        // We use inat and outat so that in Stroker and Dasher we can work only
+        // We use strokerat and outat so that in Stroker and Dasher we can work only
         // with the pre-transformation coordinates. This will repeat a lot of
         // computations done in the path iterator, but the alternative is to
         // work with transformed paths and compute untransformed coordinates
@@ -265,7 +264,7 @@
         // transformation after the path processing has been done.
         // We can't do this if normalization is on, because it isn't a good
         // idea to normalize before the transformation is applied.
-        AffineTransform inat = null;
+        AffineTransform strokerat = null;
         AffineTransform outat = null;
 
         PathIterator pi = null;
@@ -284,9 +283,9 @@
                 // again so, nothing can be drawn.
 
                 // Every path needs an initial moveTo and a pathDone. If these
-                // aren't there this causes a SIGSEV in libawt.so (at the time
+                // are not there this causes a SIGSEGV in libawt.so (at the time
                 // of writing of this comment (September 16, 2010)). Actually,
-                // I'm not sure if the moveTo is necessary to avoid the SIGSEV
+                // I am not sure if the moveTo is necessary to avoid the SIGSEGV
                 // but the pathDone is definitely needed.
                 pc2d.moveTo(0, 0);
                 pc2d.pathDone();
@@ -313,25 +312,32 @@
                 if (normalize != NormMode.OFF) {
                     pi = new NormalizingPathIterator(pi, normalize);
                 }
-                // leave inat and outat null.
+                // by now strokerat == null && outat == null. Input paths to
+                // stroker (and maybe dasher) will have the full transform at
+                // applied to them and nothing will happen to the output paths.
             } else {
-                // We only need the inverse if normalization is on. Otherwise
-                // we just don't transform the input paths, do all the stroking
-                // and then transform out output (instead of making PathIterator
-                // apply the transformation, us applying the inverse, and then
-                // us applying the transform again to our output).
-                outat = at;
                 if (normalize != NormMode.OFF) {
-                    try {
-                        inat = outat.createInverse();
-                    } catch (NoninvertibleTransformException e) {
-                        // we made sure this can't happen
-                        e.printStackTrace();
-                    }
+                    strokerat = at;
                     pi = src.getPathIterator(at);
                     pi = new NormalizingPathIterator(pi, normalize);
+                    // by now strokerat == at && outat == null. Input paths to
+                    // stroker (and maybe dasher) will have the full transform at
+                    // applied to them, then they will be normalized, and then
+                    // the inverse of *only the non translation part of at* will
+                    // be applied to the normalized paths. This won't cause problems
+                    // in stroker, because, suppose at = T*A, where T is just the
+                    // translation part of at, and A is the rest. T*A has already
+                    // been applied to Stroker/Dasher's input. Then Ainv will be
+                    // applied. Ainv*T*A is not equal to T, but it is a translation,
+                    // which means that none of stroker's assumptions about its
+                    // input will be violated. After all this, A will be applied
+                    // to stroker's output.
                 } else {
+                    outat = at;
                     pi = src.getPathIterator(null);
+                    // outat == at && strokerat == null. This is because if no
+                    // normalization is done, we can just apply all our
+                    // transformations to stroker's output.
                 }
             }
         } else {
@@ -343,13 +349,17 @@
             }
         }
 
+        // by now, at least one of outat and strokerat will be null. Unless at is not
+        // a constant multiple of an orthogonal transformation, they will both be
+        // null. In other cases, outat == at if normalization is off, and if
+        // normalization is on, strokerat == at.
         pc2d = TransformingPathConsumer2D.transformConsumer(pc2d, outat);
+        pc2d = TransformingPathConsumer2D.deltaTransformConsumer(pc2d, strokerat);
         pc2d = new Stroker(pc2d, width, caps, join, miterlimit);
         if (dashes != null) {
             pc2d = new Dasher(pc2d, dashes, dashphase);
         }
-        pc2d = TransformingPathConsumer2D.transformConsumer(pc2d, inat);
-
+        pc2d = TransformingPathConsumer2D.inverseDeltaTransformConsumer(pc2d, strokerat);
         pathTo(pi, pc2d);
     }
 
@@ -588,9 +598,9 @@
         }
 
         Renderer r = new Renderer(3, 3,
-                                  clip.getLoX(), clip.getLoY(),
-                                  clip.getWidth(), clip.getHeight(),
-                                  PathIterator.WIND_EVEN_ODD);
+                clip.getLoX(), clip.getLoY(),
+                clip.getWidth(), clip.getHeight(),
+                PathIterator.WIND_EVEN_ODD);
 
         r.moveTo((float) x, (float) y);
         r.lineTo((float) (x+dx1), (float) (y+dy1));
--- a/jdk/src/share/classes/sun/java2d/pisces/PiscesTileGenerator.java	Thu Feb 03 19:15:30 2011 -0800
+++ b/jdk/src/share/classes/sun/java2d/pisces/PiscesTileGenerator.java	Tue Feb 08 09:22:49 2011 -0500
@@ -30,7 +30,7 @@
 
 import sun.java2d.pipe.AATileGenerator;
 
-public final class PiscesTileGenerator implements AATileGenerator {
+final class PiscesTileGenerator implements AATileGenerator {
     public static final int TILE_SIZE = PiscesCache.TILE_SIZE;
 
     // perhaps we should be using weak references here, but right now
--- a/jdk/src/share/classes/sun/java2d/pisces/Renderer.java	Thu Feb 03 19:15:30 2011 -0800
+++ b/jdk/src/share/classes/sun/java2d/pisces/Renderer.java	Tue Feb 08 09:22:49 2011 -0500
@@ -25,12 +25,9 @@
 
 package sun.java2d.pisces;
 
-import java.util.Arrays;
-import java.util.Iterator;
-
 import sun.awt.geom.PathConsumer2D;
 
-public class Renderer implements PathConsumer2D {
+final class Renderer implements PathConsumer2D {
 
     private class ScanlineIterator {
 
@@ -39,115 +36,81 @@
         // crossing bounds. The bounds are not necessarily tight (the scan line
         // at minY, for example, might have no crossings). The x bounds will
         // be accumulated as crossings are computed.
-        private int minY, maxY;
+        private final int maxY;
         private int nextY;
 
         // indices into the segment pointer lists. They indicate the "active"
         // sublist in the segment lists (the portion of the list that contains
         // all the segments that cross the next scan line).
-        private int elo, ehi;
-        private final int[] edgePtrs;
-        private int qlo, qhi;
-        private final int[] quadPtrs;
-        private int clo, chi;
-        private final int[] curvePtrs;
+        private int edgeCount;
+        private int[] edgePtrs;
 
         private static final int INIT_CROSSINGS_SIZE = 10;
 
         private ScanlineIterator() {
             crossings = new int[INIT_CROSSINGS_SIZE];
-
-            edgePtrs = new int[numEdges];
-            Helpers.fillWithIdxes(edgePtrs, SIZEOF_EDGE);
-            qsort(edges, edgePtrs, YMIN, 0, numEdges - 1);
-
-            quadPtrs = new int[numQuads];
-            Helpers.fillWithIdxes(quadPtrs, SIZEOF_QUAD);
-            qsort(quads, quadPtrs, YMIN, 0, numQuads - 1);
-
-            curvePtrs = new int[numCurves];
-            Helpers.fillWithIdxes(curvePtrs, SIZEOF_CURVE);
-            qsort(curves, curvePtrs, YMIN, 0, numCurves - 1);
+            edgePtrs = new int[INIT_CROSSINGS_SIZE];
 
             // We don't care if we clip some of the line off with ceil, since
             // no scan line crossings will be eliminated (in fact, the ceil is
             // the y of the first scan line crossing).
-            nextY = minY = Math.max(boundsMinY, (int)Math.ceil(edgeMinY));
-            maxY = Math.min(boundsMaxY, (int)Math.ceil(edgeMaxY));
-
-            for (elo = 0; elo < numEdges && edges[edgePtrs[elo]+YMAX] <= minY; elo++)
-                ;
-            // the active list is *edgePtrs[lo] (inclusive) *edgePtrs[hi] (exclusive)
-            for (ehi = elo; ehi < numEdges && edges[edgePtrs[ehi]+YMIN] <= minY; ehi++)
-                edgeSetCurY(edgePtrs[ehi], minY);// TODO: make minY a float to avoid casts
-
-            for (qlo = 0; qlo < numQuads && quads[quadPtrs[qlo]+YMAX] <= minY; qlo++)
-                ;
-            for (qhi = qlo; qhi < numQuads && quads[quadPtrs[qhi]+YMIN] <= minY; qhi++)
-                quadSetCurY(quadPtrs[qhi], minY);
-
-            for (clo = 0; clo < numCurves && curves[curvePtrs[clo]+YMAX] <= minY; clo++)
-                ;
-            for (chi = clo; chi < numCurves && curves[curvePtrs[chi]+YMIN] <= minY; chi++)
-                curveSetCurY(curvePtrs[chi], minY);
+            final int minY = getFirstScanLineCrossing();
+            nextY = minY;
+            maxY = getScanLineCrossingEnd()-1;
+            edgeCount = 0;
         }
 
         private int next() {
-            // we go through the active lists and remove segments that don't cross
-            // the nextY scanline.
-            int crossingIdx = 0;
-            for (int i = elo; i < ehi; i++) {
-                if (edges[edgePtrs[i]+YMAX] <= nextY) {
-                    edgePtrs[i] = edgePtrs[elo++];
+            int cury = nextY++;
+            int bucket = cury - boundsMinY;
+            int count = this.edgeCount;
+            int ptrs[] = this.edgePtrs;
+            int bucketcount = edgeBucketCounts[bucket];
+            if ((bucketcount & 0x1) != 0) {
+                int newCount = 0;
+                for (int i = 0; i < count; i++) {
+                    int ecur = ptrs[i];
+                    if (edges[ecur+YMAX] > cury) {
+                        ptrs[newCount++] = ecur;
+                    }
                 }
+                count = newCount;
             }
-            for (int i = qlo; i < qhi; i++) {
-                if (quads[quadPtrs[i]+YMAX] <= nextY) {
-                    quadPtrs[i] = quadPtrs[qlo++];
-                }
-            }
-            for (int i = clo; i < chi; i++) {
-                if (curves[curvePtrs[i]+YMAX] <= nextY) {
-                    curvePtrs[i] = curvePtrs[clo++];
-                }
+            ptrs = Helpers.widenArray(ptrs, count, bucketcount >> 1);
+            for (int ecur = edgeBuckets[bucket]; ecur != NULL; ecur = (int)edges[ecur+NEXT]) {
+                ptrs[count++] = ecur;
+                // REMIND: Adjust start Y if necessary
             }
-
-            crossings = Helpers.widenArray(crossings, 0, ehi-elo+qhi-qlo+chi-clo);
-
-            // Now every edge between lo and hi crosses nextY. Compute it's
-            // crossing and put it in the crossings array.
-            for (int i = elo; i < ehi; i++) {
-                int ptr = edgePtrs[i];
-                addCrossing(nextY, (int)edges[ptr+CURX], edges[ptr+OR], crossingIdx);
-                edgeGoToNextY(ptr);
-                crossingIdx++;
-            }
-            for (int i = qlo; i < qhi; i++) {
-                int ptr = quadPtrs[i];
-                addCrossing(nextY, (int)quads[ptr+CURX], quads[ptr+OR], crossingIdx);
-                quadGoToNextY(ptr);
-                crossingIdx++;
+            this.edgePtrs = ptrs;
+            this.edgeCount = count;
+//            if ((count & 0x1) != 0) {
+//                System.out.println("ODD NUMBER OF EDGES!!!!");
+//            }
+            int xings[] = this.crossings;
+            if (xings.length < count) {
+                this.crossings = xings = new int[ptrs.length];
             }
-            for (int i = clo; i < chi; i++) {
-                int ptr = curvePtrs[i];
-                addCrossing(nextY, (int)curves[ptr+CURX], curves[ptr+OR], crossingIdx);
-                curveGoToNextY(ptr);
-                crossingIdx++;
+            for (int i = 0; i < count; i++) {
+                int ecur = ptrs[i];
+                float curx = edges[ecur+CURX];
+                int cross = ((int) curx) << 1;
+                edges[ecur+CURX] = curx + edges[ecur+SLOPE];
+                if (edges[ecur+OR] > 0) {
+                    cross |= 1;
+                }
+                int j = i;
+                while (--j >= 0) {
+                    int jcross = xings[j];
+                    if (jcross <= cross) {
+                        break;
+                    }
+                    xings[j+1] = jcross;
+                    ptrs[j+1] = ptrs[j];
+                }
+                xings[j+1] = cross;
+                ptrs[j+1] = ecur;
             }
-
-            nextY++;
-            // Expand active lists to include new edges.
-            for (; ehi < numEdges && edges[edgePtrs[ehi]+YMIN] <= nextY; ehi++) {
-                edgeSetCurY(edgePtrs[ehi], nextY);
-            }
-            for (; qhi < numQuads && quads[quadPtrs[qhi]+YMIN] <= nextY; qhi++) {
-                quadSetCurY(quadPtrs[qhi], nextY);
-            }
-            for (; chi < numCurves && curves[curvePtrs[chi]+YMIN] <= nextY; chi++) {
-                curveSetCurY(curvePtrs[chi], nextY);
-            }
-            Arrays.sort(crossings, 0, crossingIdx);
-            return crossingIdx;
+            return count;
         }
 
         private boolean hasNext() {
@@ -157,51 +120,7 @@
         private int curY() {
             return nextY - 1;
         }
-
-        private void addCrossing(int y, int x, float or, int idx) {
-            x <<= 1;
-            crossings[idx] = ((or > 0) ? (x | 0x1) : x);
-        }
     }
-    // quicksort implementation for sorting the edge indices ("pointers")
-    // by increasing y0. first, last are indices into the "pointer" array
-    // It sorts the pointer array from first (inclusive) to last (inclusive)
-    private static void qsort(final float[] data, final int[] ptrs,
-                              final int fieldForCmp, int first, int last)
-    {
-        if (last > first) {
-            int p = partition(data, ptrs, fieldForCmp, first, last);
-            if (first < p - 1) {
-                qsort(data, ptrs, fieldForCmp, first, p - 1);
-            }
-            if (p < last) {
-                qsort(data, ptrs, fieldForCmp, p, last);
-            }
-        }
-    }
-
-    // i, j are indices into edgePtrs.
-    private static int partition(final float[] data, final int[] ptrs,
-                                 final int fieldForCmp, int i, int j)
-    {
-        int pivotValFieldForCmp = ptrs[i]+fieldForCmp;
-        while (i <= j) {
-            // edges[edgePtrs[i]+1] is equivalent to (*(edgePtrs[i])).y0 in C
-            while (data[ptrs[i]+fieldForCmp] < data[pivotValFieldForCmp])
-                i++;
-            while (data[ptrs[j]+fieldForCmp] > data[pivotValFieldForCmp])
-                j--;
-            if (i <= j) {
-                int tmp = ptrs[i];
-                ptrs[i] = ptrs[j];
-                ptrs[j] = tmp;
-                i++;
-                j--;
-            }
-        }
-        return i;
-    }
-//============================================================================
 
 
 //////////////////////////////////////////////////////////////////////////////
@@ -209,269 +128,89 @@
 //////////////////////////////////////////////////////////////////////////////
 // TODO(maybe): very tempting to use fixed point here. A lot of opportunities
 // for shifts and just removing certain operations altogether.
-// TODO: it might be worth it to make an EdgeList class. It would probably
-// clean things up a bit and not impact performance much.
 
     // common to all types of input path segments.
-    private static final int YMIN = 0;
-    private static final int YMAX = 1;
-    private static final int CURX = 2;
-    // this and OR are meant to be indeces into "int" fields, but arrays must
+    private static final int YMAX = 0;
+    private static final int CURX = 1;
+    // NEXT and OR are meant to be indices into "int" fields, but arrays must
     // be homogenous, so every field is a float. However floats can represent
     // exactly up to 26 bit ints, so we're ok.
-    private static final int CURY = 3;
-    private static final int OR   = 4;
-
-    // for straight lines only:
-    private static final int SLOPE = 5;
-
-    // for quads and cubics:
-    private static final int X0 = 5;
-    private static final int Y0 = 6;
-    private static final int XL = 7;
-    private static final int COUNT = 8;
-    private static final int CURSLOPE = 9;
-    private static final int DX = 10;
-    private static final int DY = 11;
-    private static final int DDX = 12;
-    private static final int DDY = 13;
-
-    // for cubics only
-    private static final int DDDX = 14;
-    private static final int DDDY = 15;
+    private static final int OR   = 2;
+    private static final int SLOPE = 3;
+    private static final int NEXT = 4;
 
     private float edgeMinY = Float.POSITIVE_INFINITY;
     private float edgeMaxY = Float.NEGATIVE_INFINITY;
     private float edgeMinX = Float.POSITIVE_INFINITY;
     private float edgeMaxX = Float.NEGATIVE_INFINITY;
 
-    private static final int SIZEOF_EDGE = 6;
+    private static final int SIZEOF_EDGE = 5;
+    // don't just set NULL to -1, because we want NULL+NEXT to be negative.
+    private static final int NULL = -SIZEOF_EDGE;
     private float[] edges = null;
+    private int[] edgeBuckets = null;
+    private int[] edgeBucketCounts = null; // 2*newedges + (1 if pruning needed)
     private int numEdges;
-    // these are static because we need them to be usable from ScanlineIterator
-    private void edgeSetCurY(final int idx, int y) {
-        edges[idx+CURX] += (y - edges[idx+CURY]) * edges[idx+SLOPE];
-        edges[idx+CURY] = y;
-    }
-    private void edgeGoToNextY(final int idx) {
-        edges[idx+CURY] += 1;
-        edges[idx+CURX] += edges[idx+SLOPE];
-    }
-
-
-    private static final int SIZEOF_QUAD = 14;
-    private float[] quads = null;
-    private int numQuads;
-    // This function should be called exactly once, to set the first scanline
-    // of the curve. Before it is called, the curve should think its first
-    // scanline is CEIL(YMIN).
-    private void quadSetCurY(final int idx, final int y) {
-        assert y < quads[idx+YMAX];
-        assert (quads[idx+CURY] > y);
-        assert (quads[idx+CURY] == Math.ceil(quads[idx+CURY]));
-
-        while (quads[idx+CURY] < ((float)y)) {
-            quadGoToNextY(idx);
-        }
-    }
-    private void quadGoToNextY(final int idx) {
-        quads[idx+CURY] += 1;
-        // this will get overriden if the while executes.
-        quads[idx+CURX] += quads[idx+CURSLOPE];
-        int count = (int)quads[idx+COUNT];
-        // this loop should never execute more than once because our
-        // curve is monotonic in Y. Still we put it in because you can
-        // never be too sure when dealing with floating point.
-        while(quads[idx+CURY] >= quads[idx+Y0] && count > 0) {
-            float x0 = quads[idx+X0], y0 = quads[idx+Y0];
-            count = executeQuadAFDIteration(idx);
-            float x1 = quads[idx+X0], y1 = quads[idx+Y0];
-            // our quads are monotonic, so this shouldn't happen, but
-            // it is conceivable that for very flat quads with different
-            // y values at their endpoints AFD might give us a horizontal
-            // segment.
-            if (y1 == y0) {
-                continue;
-            }
-            quads[idx+CURSLOPE] = (x1 - x0) / (y1 - y0);
-            quads[idx+CURX] = x0 + (quads[idx+CURY] - y0) * quads[idx+CURSLOPE];
-        }
-    }
-
-
-    private static final int SIZEOF_CURVE = 16;
-    private float[] curves = null;
-    private int numCurves;
-    private void curveSetCurY(final int idx, final int y) {
-        assert y < curves[idx+YMAX];
-        assert (curves[idx+CURY] > y);
-        assert (curves[idx+CURY] == Math.ceil(curves[idx+CURY]));
-
-        while (curves[idx+CURY] < ((float)y)) {
-            curveGoToNextY(idx);
-        }
-    }
-    private void curveGoToNextY(final int idx) {
-        curves[idx+CURY] += 1;
-        // this will get overriden if the while executes.
-        curves[idx+CURX] += curves[idx+CURSLOPE];
-        int count = (int)curves[idx+COUNT];
-        // this loop should never execute more than once because our
-        // curve is monotonic in Y. Still we put it in because you can
-        // never be too sure when dealing with floating point.
-        while(curves[idx+CURY] >= curves[idx+Y0] && count > 0) {
-            float x0 = curves[idx+X0], y0 = curves[idx+Y0];
-            count = executeCurveAFDIteration(idx);
-            float x1 = curves[idx+X0], y1 = curves[idx+Y0];
-            // our curves are monotonic, so this shouldn't happen, but
-            // it is conceivable that for very flat curves with different
-            // y values at their endpoints AFD might give us a horizontal
-            // segment.
-            if (y1 == y0) {
-                continue;
-            }
-            curves[idx+CURSLOPE] = (x1 - x0) / (y1 - y0);
-            curves[idx+CURX] = x0 + (curves[idx+CURY] - y0) * curves[idx+CURSLOPE];
-        }
-    }
-
 
     private static final float DEC_BND = 20f;
     private static final float INC_BND = 8f;
+
+    // each bucket is a linked list. this method adds eptr to the
+    // start "bucket"th linked list.
+    private void addEdgeToBucket(final int eptr, final int bucket) {
+        edges[eptr+NEXT] = edgeBuckets[bucket];
+        edgeBuckets[bucket] = eptr;
+        edgeBucketCounts[bucket] += 2;
+    }
+
     // Flattens using adaptive forward differencing. This only carries out
     // one iteration of the AFD loop. All it does is update AFD variables (i.e.
     // X0, Y0, D*[X|Y], COUNT; not variables used for computing scanline crossings).
-    private int executeQuadAFDIteration(int idx) {
-        int count = (int)quads[idx+COUNT];
-        float ddx = quads[idx+DDX];
-        float ddy = quads[idx+DDY];
-        float dx = quads[idx+DX];
-        float dy = quads[idx+DY];
-
-        while (Math.abs(ddx) > DEC_BND || Math.abs(ddy) > DEC_BND) {
-            ddx = ddx / 4;
-            ddy = ddy / 4;
-            dx = (dx - ddx) / 2;
-            dy = (dy - ddy) / 2;
+    private void quadBreakIntoLinesAndAdd(float x0, float y0,
+                                          final Curve c,
+                                          final float x2, final float y2) {
+        final float QUAD_DEC_BND = 32;
+        final int countlg = 4;
+        int count = 1 << countlg;
+        int countsq = count * count;
+        float maxDD = Math.max(c.dbx / countsq, c.dby / countsq);
+        while (maxDD > QUAD_DEC_BND) {
+            maxDD /= 4;
             count <<= 1;
         }
-        // can only do this on even "count" values, because we must divide count by 2
-        while (count % 2 == 0 && Math.abs(dx) <= INC_BND && Math.abs(dy) <= INC_BND) {
-            dx = 2 * dx + ddx;
-            dy = 2 * dy + ddy;
-            ddx = 4 * ddx;
-            ddy = 4 * ddy;
-            count >>= 1;
-        }
-        count--;
-        if (count > 0) {
-            quads[idx+X0] += dx;
-            dx += ddx;
-            quads[idx+Y0] += dy;
-            dy += ddy;
-        } else {
-            quads[idx+X0] = quads[idx+XL];
-            quads[idx+Y0] = quads[idx+YMAX];
-        }
-        quads[idx+COUNT] = count;
-        quads[idx+DDX] = ddx;
-        quads[idx+DDY] = ddy;
-        quads[idx+DX] = dx;
-        quads[idx+DY] = dy;
-        return count;
-    }
-    private int executeCurveAFDIteration(int idx) {
-        int count = (int)curves[idx+COUNT];
-        float ddx = curves[idx+DDX];
-        float ddy = curves[idx+DDY];
-        float dx = curves[idx+DX];
-        float dy = curves[idx+DY];
-        float dddx = curves[idx+DDDX];
-        float dddy = curves[idx+DDDY];
+
+        countsq = count * count;
+        final float ddx = c.dbx / countsq;
+        final float ddy = c.dby / countsq;
+        float dx = c.bx / countsq + c.cx / count;
+        float dy = c.by / countsq + c.cy / count;
 
-        while (Math.abs(ddx) > DEC_BND || Math.abs(ddy) > DEC_BND) {
-            dddx /= 8;
-            dddy /= 8;
-            ddx = ddx/4 - dddx;
-            ddy = ddy/4 - dddy;
-            dx = (dx - ddx) / 2;
-            dy = (dy - ddy) / 2;
-            count <<= 1;
-        }
-        // can only do this on even "count" values, because we must divide count by 2
-        while (count % 2 == 0 && Math.abs(dx) <= INC_BND && Math.abs(dy) <= INC_BND) {
-            dx = 2 * dx + ddx;
-            dy = 2 * dy + ddy;
-            ddx = 4 * (ddx + dddx);
-            ddy = 4 * (ddy + dddy);
-            dddx = 8 * dddx;
-            dddy = 8 * dddy;
-            count >>= 1;
+        while (count-- > 1) {
+            float x1 = x0 + dx;
+            dx += ddx;
+            float y1 = y0 + dy;
+            dy += ddy;
+            addLine(x0, y0, x1, y1);
+            x0 = x1;
+            y0 = y1;
         }
-        count--;
-        if (count > 0) {
-            curves[idx+X0] += dx;
-            dx += ddx;
-            ddx += dddx;
-            curves[idx+Y0] += dy;
-            dy += ddy;
-            ddy += dddy;
-        } else {
-            curves[idx+X0] = curves[idx+XL];
-            curves[idx+Y0] = curves[idx+YMAX];
-        }
-        curves[idx+COUNT] = count;
-        curves[idx+DDDX] = dddx;
-        curves[idx+DDDY] = dddy;
-        curves[idx+DDX] = ddx;
-        curves[idx+DDY] = ddy;
-        curves[idx+DX] = dx;
-        curves[idx+DY] = dy;
-        return count;
+        addLine(x0, y0, x2, y2);
     }
 
-
-    private void initLine(final int idx, float[] pts, int or) {
-        edges[idx+SLOPE] = (pts[2] - pts[0]) / (pts[3] - pts[1]);
-        edges[idx+CURX] = pts[0] + (edges[idx+CURY] - pts[1]) * edges[idx+SLOPE];
-    }
-
-    private void initQuad(final int idx, float[] points, int or) {
+    // x0, y0 and x3,y3 are the endpoints of the curve. We could compute these
+    // using c.xat(0),c.yat(0) and c.xat(1),c.yat(1), but this might introduce
+    // numerical errors, and our callers already have the exact values.
+    // Another alternative would be to pass all the control points, and call c.set
+    // here, but then too many numbers are passed around.
+    private void curveBreakIntoLinesAndAdd(float x0, float y0,
+                                           final Curve c,
+                                           final float x3, final float y3) {
         final int countlg = 3;
-        final int count = 1 << countlg;
+        int count = 1 << countlg;
 
         // the dx and dy refer to forward differencing variables, not the last
         // coefficients of the "points" polynomial
-        final float ddx, ddy, dx, dy;
-        c.set(points, 6);
-
-        ddx = c.dbx / (1 << (2 * countlg));
-        ddy = c.dby / (1 << (2 * countlg));
-        dx = c.bx / (1 << (2 * countlg)) + c.cx / (1 << countlg);
-        dy = c.by / (1 << (2 * countlg)) + c.cy / (1 << countlg);
-
-        quads[idx+DDX] = ddx;
-        quads[idx+DDY] = ddy;
-        quads[idx+DX] = dx;
-        quads[idx+DY] = dy;
-        quads[idx+COUNT] = count;
-        quads[idx+XL] = points[4];
-        quads[idx+X0] = points[0];
-        quads[idx+Y0] = points[1];
-        executeQuadAFDIteration(idx);
-        float x1 = quads[idx+X0], y1 = quads[idx+Y0];
-        quads[idx+CURSLOPE] = (x1 - points[0]) / (y1 - points[1]);
-        quads[idx+CURX] = points[0] + (quads[idx+CURY] - points[1])*quads[idx+CURSLOPE];
-    }
-
-    private void initCurve(final int idx, float[] points, int or) {
-        final int countlg = 3;
-        final int count = 1 << countlg;
-
-        // the dx and dy refer to forward differencing variables, not the last
-        // coefficients of the "points" polynomial
-        final float dddx, dddy, ddx, ddy, dx, dy;
-        c.set(points, 8);
+        float dddx, dddy, ddx, ddy, dx, dy;
         dddx = 2f * c.dax / (1 << (3 * countlg));
         dddy = 2f * c.day / (1 << (3 * countlg));
 
@@ -480,93 +219,100 @@
         dx = c.ax / (1 << (3 * countlg)) + c.bx / (1 << (2 * countlg)) + c.cx / (1 << countlg);
         dy = c.ay / (1 << (3 * countlg)) + c.by / (1 << (2 * countlg)) + c.cy / (1 << countlg);
 
-        curves[idx+DDDX] = dddx;
-        curves[idx+DDDY] = dddy;
-        curves[idx+DDX] = ddx;
-        curves[idx+DDY] = ddy;
-        curves[idx+DX] = dx;
-        curves[idx+DY] = dy;
-        curves[idx+COUNT] = count;
-        curves[idx+XL] = points[6];
-        curves[idx+X0] = points[0];
-        curves[idx+Y0] = points[1];
-        executeCurveAFDIteration(idx);
-        float x1 = curves[idx+X0], y1 = curves[idx+Y0];
-        curves[idx+CURSLOPE] = (x1 - points[0]) / (y1 - points[1]);
-        curves[idx+CURX] = points[0] + (curves[idx+CURY] - points[1])*curves[idx+CURSLOPE];
-    }
-
-    private void addPathSegment(float[] pts, final int type, final int or) {
-        int idx;
-        float[] addTo;
-        switch (type) {
-        case 4:
-            idx = numEdges * SIZEOF_EDGE;
-            addTo = edges = Helpers.widenArray(edges, numEdges*SIZEOF_EDGE, SIZEOF_EDGE);
-            numEdges++;
-            break;
-        case 6:
-            idx = numQuads * SIZEOF_QUAD;
-            addTo = quads = Helpers.widenArray(quads, numQuads*SIZEOF_QUAD, SIZEOF_QUAD);
-            numQuads++;
-            break;
-        case 8:
-            idx = numCurves * SIZEOF_CURVE;
-            addTo = curves = Helpers.widenArray(curves, numCurves*SIZEOF_CURVE, SIZEOF_CURVE);
-            numCurves++;
-            break;
-        default:
-            throw new InternalError();
-        }
-        // set the common fields, except CURX, for which we must know the kind
-        // of curve. NOTE: this must be done before the type specific fields
-        // are initialized, because those depend on the common ones.
-        addTo[idx+YMIN] = pts[1];
-        addTo[idx+YMAX] = pts[type-1];
-        addTo[idx+OR] = or;
-        addTo[idx+CURY] = (float)Math.ceil(pts[1]);
-        switch (type) {
-        case 4:
-            initLine(idx, pts, or);
-            break;
-        case 6:
-            initQuad(idx, pts, or);
-            break;
-        case 8:
-            initCurve(idx, pts, or);
-            break;
-        default:
-            throw new InternalError();
+        // we use x0, y0 to walk the line
+        float x1 = x0, y1 = y0;
+        while (count > 0) {
+            while (Math.abs(ddx) > DEC_BND || Math.abs(ddy) > DEC_BND) {
+                dddx /= 8;
+                dddy /= 8;
+                ddx = ddx/4 - dddx;
+                ddy = ddy/4 - dddy;
+                dx = (dx - ddx) / 2;
+                dy = (dy - ddy) / 2;
+                count <<= 1;
+            }
+            // can only do this on even "count" values, because we must divide count by 2
+            while (count % 2 == 0 && Math.abs(dx) <= INC_BND && Math.abs(dy) <= INC_BND) {
+                dx = 2 * dx + ddx;
+                dy = 2 * dy + ddy;
+                ddx = 4 * (ddx + dddx);
+                ddy = 4 * (ddy + dddy);
+                dddx = 8 * dddx;
+                dddy = 8 * dddy;
+                count >>= 1;
+            }
+            count--;
+            if (count > 0) {
+                x1 += dx;
+                dx += ddx;
+                ddx += dddx;
+                y1 += dy;
+                dy += ddy;
+                ddy += dddy;
+            } else {
+                x1 = x3;
+                y1 = y3;
+            }
+            addLine(x0, y0, x1, y1);
+            x0 = x1;
+            y0 = y1;
         }
     }
 
-    // precondition: the curve in pts must be monotonic and increasing in y.
-    private void somethingTo(float[] pts, final int type, final int or) {
-        // NOTE: it's very important that we check for or >= 0 below (as
-        // opposed to or == 1, or or > 0, or anything else). That's
-        // because if we check for or==1, when the curve being added
-        // is a horizontal line, or will be 0 so or==1 will be false and
-        // x0 and y0 will be updated to pts[0] and pts[1] instead of pts[type-2]
-        // and pts[type-1], which is the correct thing to do.
-        this.x0 = or >= 0 ? pts[type - 2] : pts[0];
-        this.y0 = or >= 0 ? pts[type - 1] : pts[1];
-
-        float minY = pts[1], maxY = pts[type - 1];
-        if (Math.ceil(minY) >= Math.ceil(maxY) ||
-            Math.ceil(minY) >= boundsMaxY || maxY < boundsMinY)
-        {
+    // Preconditions: y2 > y1 and the curve must cross some scanline
+    // i.e.: y1 <= y < y2 for some y such that boundsMinY <= y < boundsMaxY
+    private void addLine(float x1, float y1, float x2, float y2) {
+        float or = 1; // orientation of the line. 1 if y increases, 0 otherwise.
+        if (y2 < y1) {
+            or = y2; // no need to declare a temp variable. We have or.
+            y2 = y1;
+            y1 = or;
+            or = x2;
+            x2 = x1;
+            x1 = or;
+            or = 0;
+        }
+        final int firstCrossing = Math.max((int) Math.ceil(y1), boundsMinY);
+        final int lastCrossing = Math.min((int)Math.ceil(y2), boundsMaxY);
+        if (firstCrossing >= lastCrossing) {
             return;
         }
 
-        if (minY < edgeMinY) { edgeMinY = minY; }
-        if (maxY > edgeMaxY) { edgeMaxY = maxY; }
+        if (y1 < edgeMinY) { edgeMinY = y1; }
+        if (y2 > edgeMaxY) { edgeMaxY = y2; }
+
+        final float slope = (x2 - x1) / (y2 - y1);
+
+        if (slope > 0) { // <==> x1 < x2
+            if (x1 < edgeMinX) { edgeMinX = x1; }
+            if (x2 > edgeMaxX) { edgeMaxX = x2; }
+        } else {
+            if (x2 < edgeMinX) { edgeMinX = x2; }
+            if (x1 > edgeMaxX) { edgeMaxX = x1; }
+        }
 
-        int minXidx = (pts[0] < pts[type-2] ? 0 : type - 2);
-        float minX = pts[minXidx];
-        float maxX = pts[type - 2 - minXidx];
-        if (minX < edgeMinX) { edgeMinX = minX; }
-        if (maxX > edgeMaxX) { edgeMaxX = maxX; }
-        addPathSegment(pts, type, or);
+        final int ptr = numEdges * SIZEOF_EDGE;
+        edges = Helpers.widenArray(edges, ptr, SIZEOF_EDGE);
+        numEdges++;
+        edges[ptr+OR] = or;
+        edges[ptr+CURX] = x1 + (firstCrossing - y1) * slope;
+        edges[ptr+SLOPE] = slope;
+        edges[ptr+YMAX] = y2;
+        final int bucketIdx = firstCrossing - boundsMinY;
+        addEdgeToBucket(ptr, bucketIdx);
+        if (lastCrossing < boundsMaxY) {
+            edgeBucketCounts[lastCrossing - boundsMinY] |= 1;
+        }
+    }
+
+    // preconditions: should not be called before the last line has been added
+    // to the edge list (even though it will return a correct answer at that
+    // point in time, it's not meant to be used that way).
+    private int getFirstScanLineCrossing() {
+        return Math.max(boundsMinY, (int)Math.ceil(edgeMinY));
+    }
+    private int getScanLineCrossingEnd() {
+        return Math.min(boundsMaxY, (int)Math.ceil(edgeMaxY));
     }
 
 // END EDGE LIST
@@ -619,6 +365,10 @@
         this.boundsMinY = pix_boundsY * SUBPIXEL_POSITIONS_Y;
         this.boundsMaxX = (pix_boundsX + pix_boundsWidth) * SUBPIXEL_POSITIONS_X;
         this.boundsMaxY = (pix_boundsY + pix_boundsHeight) * SUBPIXEL_POSITIONS_Y;
+
+        edgeBuckets = new int[boundsMaxY - boundsMinY];
+        java.util.Arrays.fill(edgeBuckets, NULL);
+        edgeBucketCounts = new int[edgeBuckets.length];
     }
 
     private float tosubpixx(float pix_x) {
@@ -636,74 +386,34 @@
         this.x0 = tosubpixx(pix_x0);
     }
 
-    public void lineJoin() { /* do nothing */ }
-
-    private final float[][] pts = new float[2][8];
-    private final float[] ts = new float[4];
-
-    private static void invertPolyPoints(float[] pts, int off, int type) {
-        for (int i = off, j = off + type - 2; i < j; i += 2, j -= 2) {
-            float tmp = pts[i];
-            pts[i] = pts[j];
-            pts[j] = tmp;
-            tmp = pts[i+1];
-            pts[i+1] = pts[j+1];
-            pts[j+1] = tmp;
-        }
-    }
-
-    // return orientation before making the curve upright.
-    private static int makeMonotonicCurveUpright(float[] pts, int off, int type) {
-        float y0 = pts[off + 1];
-        float y1 = pts[off + type - 1];
-        if (y0 > y1) {
-            invertPolyPoints(pts, off, type);
-            return -1;
-        } else if (y0 < y1) {
-            return 1;
-        }
-        return 0;
-    }
-
     public void lineTo(float pix_x1, float pix_y1) {
-        pts[0][0] = x0; pts[0][1] = y0;
-        pts[0][2] = tosubpixx(pix_x1); pts[0][3] = tosubpixy(pix_y1);
-        int or = makeMonotonicCurveUpright(pts[0], 0, 4);
-        somethingTo(pts[0], 4, or);
+        float x1 = tosubpixx(pix_x1);
+        float y1 = tosubpixy(pix_y1);
+        addLine(x0, y0, x1, y1);
+        x0 = x1;
+        y0 = y1;
     }
 
     Curve c = new Curve();
-    private void curveOrQuadTo(int type) {
-        c.set(pts[0], type);
-        int numTs = c.dxRoots(ts, 0);
-        numTs += c.dyRoots(ts, numTs);
-        numTs = Helpers.filterOutNotInAB(ts, 0, numTs, 0, 1);
-        Helpers.isort(ts, 0, numTs);
-
-        Iterator<float[]> it = Curve.breakPtsAtTs(pts, type, ts, numTs);
-        while(it.hasNext()) {
-            float[] curCurve = it.next();
-            int or = makeMonotonicCurveUpright(curCurve, 0, type);
-            somethingTo(curCurve, type, or);
-        }
-    }
-
     @Override public void curveTo(float x1, float y1,
                                   float x2, float y2,
                                   float x3, float y3)
     {
-        pts[0][0] = x0; pts[0][1] = y0;
-        pts[0][2] = tosubpixx(x1); pts[0][3] = tosubpixy(y1);
-        pts[0][4] = tosubpixx(x2); pts[0][5] = tosubpixy(y2);
-        pts[0][6] = tosubpixx(x3); pts[0][7] = tosubpixy(y3);
-        curveOrQuadTo(8);
+        final float xe = tosubpixx(x3);
+        final float ye = tosubpixy(y3);
+        c.set(x0, y0, tosubpixx(x1), tosubpixy(y1), tosubpixx(x2), tosubpixy(y2), xe, ye);
+        curveBreakIntoLinesAndAdd(x0, y0, c, xe, ye);
+        x0 = xe;
+        y0 = ye;
     }
 
     @Override public void quadTo(float x1, float y1, float x2, float y2) {
-        pts[0][0] = x0; pts[0][1] = y0;
-        pts[0][2] = tosubpixx(x1); pts[0][3] = tosubpixy(y1);
-        pts[0][4] = tosubpixx(x2); pts[0][5] = tosubpixy(y2);
-        curveOrQuadTo(6);
+        final float xe = tosubpixx(x2);
+        final float ye = tosubpixy(y2);
+        c.set(x0, y0, tosubpixx(x1), tosubpixy(y1), xe, ye);
+        quadBreakIntoLinesAndAdd(x0, y0, c, xe, ye);
+        x0 = xe;
+        y0 = ye;
     }
 
     public void closePath() {
@@ -728,9 +438,9 @@
         // 0x1 if EVEN_ODD, all bits if NON_ZERO
         int mask = (windingRule == WIND_EVEN_ODD) ? 0x1 : ~0x0;
 
-        // add 1 to better deal with the last pixel in a pixel row.
-        int width = pix_bboxx1 - pix_bboxx0 + 1;
-        int[] alpha = new int[width+1];
+        // add 2 to better deal with the last pixel in a pixel row.
+        int width = pix_bboxx1 - pix_bboxx0;
+        int[] alpha = new int[width+2];
 
         int bboxx0 = pix_bboxx0 << SUBPIXEL_LG_POSITIONS_X;
         int bboxx1 = pix_bboxx1 << SUBPIXEL_LG_POSITIONS_X;
@@ -766,7 +476,8 @@
             for (int i = 0; i < numCrossings; i++) {
                 int curxo = crossings[i];
                 int curx = curxo >> 1;
-                int crorientation = ((curxo & 0x1) == 0x1) ? 1 : -1;
+                // to turn {0, 1} into {-1, 1}, multiply by 2 and subtract 1.
+                int crorientation = ((curxo & 0x1) << 1) -1;
                 if ((sum & mask) != 0) {
                     int x0 = Math.max(prev, bboxx0);
                     int x1 = Math.min(curx, bboxx1);
@@ -811,26 +522,26 @@
     }
 
     public void endRendering() {
-        final int bminx = boundsMinX >> SUBPIXEL_LG_POSITIONS_X;
-        final int bmaxx = boundsMaxX >> SUBPIXEL_LG_POSITIONS_X;
-        final int bminy = boundsMinY >> SUBPIXEL_LG_POSITIONS_Y;
-        final int bmaxy = boundsMaxY >> SUBPIXEL_LG_POSITIONS_Y;
-        final int eminx = ((int)Math.floor(edgeMinX)) >> SUBPIXEL_LG_POSITIONS_X;
-        final int emaxx = ((int)Math.ceil(edgeMaxX)) >> SUBPIXEL_LG_POSITIONS_X;
-        final int eminy = ((int)Math.floor(edgeMinY)) >> SUBPIXEL_LG_POSITIONS_Y;
-        final int emaxy = ((int)Math.ceil(edgeMaxY)) >> SUBPIXEL_LG_POSITIONS_Y;
+        int spminX = Math.max((int)Math.ceil(edgeMinX), boundsMinX);
+        int spmaxX = Math.min((int)Math.ceil(edgeMaxX), boundsMaxX);
+        int spminY = Math.max((int)Math.ceil(edgeMinY), boundsMinY);
+        int spmaxY = Math.min((int)Math.ceil(edgeMaxY), boundsMaxY);
 
-        final int minX = Math.max(bminx, eminx);
-        final int maxX = Math.min(bmaxx, emaxx);
-        final int minY = Math.max(bminy, eminy);
-        final int maxY = Math.min(bmaxy, emaxy);
-        if (minX > maxX || minY > maxY) {
-            this.cache = new PiscesCache(bminx, bminy, bmaxx, bmaxy);
+        int pminX = spminX >> SUBPIXEL_LG_POSITIONS_X;
+        int pmaxX = (spmaxX + SUBPIXEL_MASK_X) >> SUBPIXEL_LG_POSITIONS_X;
+        int pminY = spminY >> SUBPIXEL_LG_POSITIONS_Y;
+        int pmaxY = (spmaxY + SUBPIXEL_MASK_Y) >> SUBPIXEL_LG_POSITIONS_Y;
+
+        if (pminX > pmaxX || pminY > pmaxY) {
+            this.cache = new PiscesCache(boundsMinX >> SUBPIXEL_LG_POSITIONS_X,
+                                         boundsMinY >> SUBPIXEL_LG_POSITIONS_Y,
+                                         boundsMaxX >> SUBPIXEL_LG_POSITIONS_X,
+                                         boundsMaxY >> SUBPIXEL_LG_POSITIONS_Y);
             return;
         }
 
-        this.cache = new PiscesCache(minX, minY, maxX, maxY);
-        _endRendering(minX, minY, maxX, maxY);
+        this.cache = new PiscesCache(pminX, pminY, pmaxX, pmaxY);
+        _endRendering(pminX, pminY, pmaxX, pmaxY);
     }
 
     public PiscesCache getCache() {
--- a/jdk/src/share/classes/sun/java2d/pisces/Stroker.java	Thu Feb 03 19:15:30 2011 -0800
+++ b/jdk/src/share/classes/sun/java2d/pisces/Stroker.java	Tue Feb 08 09:22:49 2011 -0500
@@ -33,7 +33,7 @@
 // TODO: some of the arithmetic here is too verbose and prone to hard to
 // debug typos. We should consider making a small Point/Vector class that
 // has methods like plus(Point), minus(Point), dot(Point), cross(Point)and such
-public class Stroker implements PathConsumer2D {
+final class Stroker implements PathConsumer2D {
 
     private static final int MOVE_TO = 0;
     private static final int DRAWING_OP_TO = 1; // ie. curve, line, or quad
@@ -130,7 +130,7 @@
     private static void computeOffset(final float lx, final float ly,
                                       final float w, final float[] m)
     {
-        final float len = (float)Math.hypot(lx, ly);
+        final float len = (float)Math.sqrt(lx*lx + ly*ly);
         if (len == 0) {
             m[0] = m[1] = 0;
         } else {
@@ -758,7 +758,7 @@
     // This is where the curve to be processed is put. We give it
     // enough room to store 2 curves: one for the current subdivision, the
     // other for the rest of the curve.
-    private float[][] middle = new float[2][8];
+    private float[] middle = new float[2*8];
     private float[] lp = new float[8];
     private float[] rp = new float[8];
     private static final int MAX_N_CURVES = 11;
@@ -766,55 +766,55 @@
 
     private void somethingTo(final int type) {
         // need these so we can update the state at the end of this method
-        final float xf = middle[0][type-2], yf = middle[0][type-1];
-        float dxs = middle[0][2] - middle[0][0];
-        float dys = middle[0][3] - middle[0][1];
-        float dxf = middle[0][type - 2] - middle[0][type - 4];
-        float dyf = middle[0][type - 1] - middle[0][type - 3];
+        final float xf = middle[type-2], yf = middle[type-1];
+        float dxs = middle[2] - middle[0];
+        float dys = middle[3] - middle[1];
+        float dxf = middle[type - 2] - middle[type - 4];
+        float dyf = middle[type - 1] - middle[type - 3];
         switch(type) {
         case 6:
             if ((dxs == 0f && dys == 0f) ||
                 (dxf == 0f && dyf == 0f)) {
-               dxs = dxf = middle[0][4] - middle[0][0];
-               dys = dyf = middle[0][5] - middle[0][1];
+               dxs = dxf = middle[4] - middle[0];
+               dys = dyf = middle[5] - middle[1];
             }
             break;
         case 8:
             boolean p1eqp2 = (dxs == 0f && dys == 0f);
             boolean p3eqp4 = (dxf == 0f && dyf == 0f);
             if (p1eqp2) {
-                dxs = middle[0][4] - middle[0][0];
-                dys = middle[0][5] - middle[0][1];
+                dxs = middle[4] - middle[0];
+                dys = middle[5] - middle[1];
                 if (dxs == 0f && dys == 0f) {
-                    dxs = middle[0][6] - middle[0][0];
-                    dys = middle[0][7] - middle[0][1];
+                    dxs = middle[6] - middle[0];
+                    dys = middle[7] - middle[1];
                 }
             }
             if (p3eqp4) {
-                dxf = middle[0][6] - middle[0][2];
-                dyf = middle[0][7] - middle[0][3];
+                dxf = middle[6] - middle[2];
+                dyf = middle[7] - middle[3];
                 if (dxf == 0f && dyf == 0f) {
-                    dxf = middle[0][6] - middle[0][0];
-                    dyf = middle[0][7] - middle[0][1];
+                    dxf = middle[6] - middle[0];
+                    dyf = middle[7] - middle[1];
                 }
             }
         }
         if (dxs == 0f && dys == 0f) {
             // this happens iff the "curve" is just a point
-            lineTo(middle[0][0], middle[0][1]);
+            lineTo(middle[0], middle[1]);
             return;
         }
         // if these vectors are too small, normalize them, to avoid future
         // precision problems.
         if (Math.abs(dxs) < 0.1f && Math.abs(dys) < 0.1f) {
-            double len = Math.hypot(dxs, dys);
-            dxs = (float)(dxs / len);
-            dys = (float)(dys / len);
+            float len = (float)Math.sqrt(dxs*dxs + dys*dys);
+            dxs /= len;
+            dys /= len;
         }
         if (Math.abs(dxf) < 0.1f && Math.abs(dyf) < 0.1f) {
-            double len = Math.hypot(dxf, dyf);
-            dxf = (float)(dxf / len);
-            dyf = (float)(dyf / len);
+            float len = (float)Math.sqrt(dxf*dxf + dyf*dyf);
+            dxf /= len;
+            dyf /= len;
         }
 
         computeOffset(dxs, dys, lineWidth2, offset[0]);
@@ -822,20 +822,20 @@
         final float my = offset[0][1];
         drawJoin(cdx, cdy, cx0, cy0, dxs, dys, cmx, cmy, mx, my);
 
-        int nSplits = findSubdivPoints(middle[0], subdivTs, type,lineWidth2);
+        int nSplits = findSubdivPoints(middle, subdivTs, type, lineWidth2);
 
         int kind = 0;
-        Iterator<float[]> it = Curve.breakPtsAtTs(middle, type, subdivTs, nSplits);
+        Iterator<Integer> it = Curve.breakPtsAtTs(middle, type, subdivTs, nSplits);
         while(it.hasNext()) {
-            float[] curCurve = it.next();
+            int curCurveOff = it.next();
 
             kind = 0;
             switch (type) {
             case 8:
-                kind = computeOffsetCubic(curCurve, 0, lp, rp);
+                kind = computeOffsetCubic(middle, curCurveOff, lp, rp);
                 break;
             case 6:
-                kind = computeOffsetQuad(curCurve, 0, lp, rp);
+                kind = computeOffsetQuad(middle, curCurveOff, lp, rp);
                 break;
             }
             if (kind != 0) {
@@ -871,8 +871,7 @@
     // to get good offset curves a distance of w away from the middle curve.
     // Stores the points in ts, and returns how many of them there were.
     private static Curve c = new Curve();
-    private static int findSubdivPoints(float[] pts, float[] ts,
-                                        final int type, final float w)
+    private static int findSubdivPoints(float[] pts, float[] ts, final int type, final float w)
     {
         final float x12 = pts[2] - pts[0];
         final float y12 = pts[3] - pts[1];
@@ -919,6 +918,7 @@
         // now we must subdivide at points where one of the offset curves will have
         // a cusp. This happens at ts where the radius of curvature is equal to w.
         ret += c.rootsOfROCMinusW(ts, ret, w, 0.0001f);
+
         ret = Helpers.filterOutNotInAB(ts, 0, ret, 0.0001f, 0.9999f);
         Helpers.isort(ts, 0, ret);
         return ret;
@@ -928,10 +928,10 @@
                                   float x2, float y2,
                                   float x3, float y3)
     {
-        middle[0][0] = cx0; middle[0][1] = cy0;
-        middle[0][2] = x1; middle[0][3] = y1;
-        middle[0][4] = x2; middle[0][5] = y2;
-        middle[0][6] = x3; middle[0][7] = y3;
+        middle[0] = cx0; middle[1] = cy0;
+        middle[2] = x1;  middle[3] = y1;
+        middle[4] = x2;  middle[5] = y2;
+        middle[6] = x3;  middle[7] = y3;
         somethingTo(8);
     }
 
@@ -940,9 +940,9 @@
     }
 
     @Override public void quadTo(float x1, float y1, float x2, float y2) {
-        middle[0][0] = cx0; middle[0][1] = cy0;
-        middle[0][2] = x1; middle[0][3] = y1;
-        middle[0][4] = x2; middle[0][5] = y2;
+        middle[0] = cx0; middle[1] = cy0;
+        middle[2] = x1;  middle[3] = y1;
+        middle[4] = x2;  middle[5] = y2;
         somethingTo(6);
     }
 
--- a/jdk/src/share/classes/sun/java2d/pisces/TransformingPathConsumer2D.java	Thu Feb 03 19:15:30 2011 -0800
+++ b/jdk/src/share/classes/sun/java2d/pisces/TransformingPathConsumer2D.java	Tue Feb 08 09:22:49 2011 -0500
@@ -28,7 +28,7 @@
 import sun.awt.geom.PathConsumer2D;
 import java.awt.geom.AffineTransform;
 
-public class TransformingPathConsumer2D {
+final class TransformingPathConsumer2D {
     public static PathConsumer2D
         transformConsumer(PathConsumer2D out,
                           AffineTransform at)
@@ -50,17 +50,72 @@
                     return new TranslateFilter(out, Mxt, Myt);
                 }
             } else {
-                return new ScaleFilter(out, Mxx, Myy, Mxt, Myt);
+                if (Mxt == 0f && Myt == 0f) {
+                    return new DeltaScaleFilter(out, Mxx, Myy);
+                } else {
+                    return new ScaleFilter(out, Mxx, Myy, Mxt, Myt);
+                }
             }
+        } else if (Mxt == 0f && Myt == 0f) {
+            return new DeltaTransformFilter(out, Mxx, Mxy, Myx, Myy);
         } else {
             return new TransformFilter(out, Mxx, Mxy, Mxt, Myx, Myy, Myt);
         }
     }
 
-    static class TranslateFilter implements PathConsumer2D {
-        PathConsumer2D out;
-        float tx;
-        float ty;
+    public static PathConsumer2D
+        deltaTransformConsumer(PathConsumer2D out,
+                               AffineTransform at)
+    {
+        if (at == null) {
+            return out;
+        }
+        float Mxx = (float) at.getScaleX();
+        float Mxy = (float) at.getShearX();
+        float Myx = (float) at.getShearY();
+        float Myy = (float) at.getScaleY();
+        if (Mxy == 0f && Myx == 0f) {
+            if (Mxx == 1f && Myy == 1f) {
+                return out;
+            } else {
+                return new DeltaScaleFilter(out, Mxx, Myy);
+            }
+        } else {
+            return new DeltaTransformFilter(out, Mxx, Mxy, Myx, Myy);
+        }
+    }
+
+    public static PathConsumer2D
+        inverseDeltaTransformConsumer(PathConsumer2D out,
+                                      AffineTransform at)
+    {
+        if (at == null) {
+            return out;
+        }
+        float Mxx = (float) at.getScaleX();
+        float Mxy = (float) at.getShearX();
+        float Myx = (float) at.getShearY();
+        float Myy = (float) at.getScaleY();
+        if (Mxy == 0f && Myx == 0f) {
+            if (Mxx == 1f && Myy == 1f) {
+                return out;
+            } else {
+                return new DeltaScaleFilter(out, 1.0f/Mxx, 1.0f/Myy);
+            }
+        } else {
+            float det = Mxx * Myy - Mxy * Myx;
+            return new DeltaTransformFilter(out,
+                                            Myy / det,
+                                            -Mxy / det,
+                                            -Myx / det,
+                                            Mxx / det);
+        }
+    }
+
+    static final class TranslateFilter implements PathConsumer2D {
+        private final PathConsumer2D out;
+        private final float tx;
+        private final float ty;
 
         TranslateFilter(PathConsumer2D out,
                         float tx, float ty)
@@ -107,12 +162,12 @@
         }
     }
 
-    static class ScaleFilter implements PathConsumer2D {
-        PathConsumer2D out;
-        float sx;
-        float sy;
-        float tx;
-        float ty;
+    static final class ScaleFilter implements PathConsumer2D {
+        private final PathConsumer2D out;
+        private final float sx;
+        private final float sy;
+        private final float tx;
+        private final float ty;
 
         ScaleFilter(PathConsumer2D out,
                     float sx, float sy, float tx, float ty)
@@ -161,14 +216,14 @@
         }
     }
 
-    static class TransformFilter implements PathConsumer2D {
-        PathConsumer2D out;
-        float Mxx;
-        float Mxy;
-        float Mxt;
-        float Myx;
-        float Myy;
-        float Myt;
+    static final class TransformFilter implements PathConsumer2D {
+        private final PathConsumer2D out;
+        private final float Mxx;
+        private final float Mxy;
+        private final float Mxt;
+        private final float Myx;
+        private final float Myy;
+        private final float Myt;
 
         TransformFilter(PathConsumer2D out,
                         float Mxx, float Mxy, float Mxt,
@@ -226,4 +281,113 @@
             return 0;
         }
     }
+
+    static final class DeltaScaleFilter implements PathConsumer2D {
+        private final float sx, sy;
+        private final PathConsumer2D out;
+
+        public DeltaScaleFilter(PathConsumer2D out, float Mxx, float Myy) {
+            sx = Mxx;
+            sy = Myy;
+            this.out = out;
+        }
+
+        public void moveTo(float x0, float y0) {
+            out.moveTo(x0 * sx, y0 * sy);
+        }
+
+        public void lineTo(float x1, float y1) {
+            out.lineTo(x1 * sx, y1 * sy);
+        }
+
+        public void quadTo(float x1, float y1,
+                           float x2, float y2)
+        {
+            out.quadTo(x1 * sx, y1 * sy,
+                       x2 * sx, y2 * sy);
+        }
+
+        public void curveTo(float x1, float y1,
+                            float x2, float y2,
+                            float x3, float y3)
+        {
+            out.curveTo(x1 * sx, y1 * sy,
+                        x2 * sx, y2 * sy,
+                        x3 * sx, y3 * sy);
+        }
+
+        public void closePath() {
+            out.closePath();
+        }
+
+        public void pathDone() {
+            out.pathDone();
+        }
+
+        public long getNativeConsumer() {
+            return 0;
+        }
+    }
+
+    static final class DeltaTransformFilter implements PathConsumer2D {
+        private PathConsumer2D out;
+        private final float Mxx;
+        private final float Mxy;
+        private final float Myx;
+        private final float Myy;
+
+        DeltaTransformFilter(PathConsumer2D out,
+                             float Mxx, float Mxy,
+                             float Myx, float Myy)
+        {
+            this.out = out;
+            this.Mxx = Mxx;
+            this.Mxy = Mxy;
+            this.Myx = Myx;
+            this.Myy = Myy;
+        }
+
+        public void moveTo(float x0, float y0) {
+            out.moveTo(x0 * Mxx + y0 * Mxy,
+                       x0 * Myx + y0 * Myy);
+        }
+
+        public void lineTo(float x1, float y1) {
+            out.lineTo(x1 * Mxx + y1 * Mxy,
+                       x1 * Myx + y1 * Myy);
+        }
+
+        public void quadTo(float x1, float y1,
+                           float x2, float y2)
+        {
+            out.quadTo(x1 * Mxx + y1 * Mxy,
+                       x1 * Myx + y1 * Myy,
+                       x2 * Mxx + y2 * Mxy,
+                       x2 * Myx + y2 * Myy);
+        }
+
+        public void curveTo(float x1, float y1,
+                            float x2, float y2,
+                            float x3, float y3)
+        {
+            out.curveTo(x1 * Mxx + y1 * Mxy,
+                        x1 * Myx + y1 * Myy,
+                        x2 * Mxx + y2 * Mxy,
+                        x2 * Myx + y2 * Myy,
+                        x3 * Mxx + y3 * Mxy,
+                        x3 * Myx + y3 * Myy);
+        }
+
+        public void closePath() {
+            out.closePath();
+        }
+
+        public void pathDone() {
+            out.pathDone();
+        }
+
+        public long getNativeConsumer() {
+            return 0;
+        }
+    }
 }