8006572: DoubleStream.sum() & DoubleSummaryStats implementations that reduce numerical errors
authordarcy
Sun, 01 Dec 2013 23:35:28 -0800
changeset 21946 b4cb3bbeb52a
parent 21945 8dd75cf24379
child 21947 2818a9404890
8006572: DoubleStream.sum() & DoubleSummaryStats implementations that reduce numerical errors Reviewed-by: psandoz, mduigou
jdk/src/share/classes/java/util/DoubleSummaryStatistics.java
jdk/src/share/classes/java/util/stream/Collectors.java
jdk/src/share/classes/java/util/stream/DoublePipeline.java
jdk/test/java/util/stream/TestDoubleSumAverage.java
--- a/jdk/src/share/classes/java/util/DoubleSummaryStatistics.java	Fri Nov 29 09:29:25 2013 +0000
+++ b/jdk/src/share/classes/java/util/DoubleSummaryStatistics.java	Sun Dec 01 23:35:28 2013 -0800
@@ -63,6 +63,7 @@
 public class DoubleSummaryStatistics implements DoubleConsumer {
     private long count;
     private double sum;
+    private double sumCompensation; // Low order bits of sum
     private double min = Double.POSITIVE_INFINITY;
     private double max = Double.NEGATIVE_INFINITY;
 
@@ -81,7 +82,7 @@
     @Override
     public void accept(double value) {
         ++count;
-        sum += value;
+        sumWithCompensation(value);
         min = Math.min(min, value);
         max = Math.max(max, value);
     }
@@ -95,12 +96,24 @@
      */
     public void combine(DoubleSummaryStatistics other) {
         count += other.count;
-        sum += other.sum;
+        sumWithCompensation(other.sum);
+        sumWithCompensation(other.sumCompensation);
         min = Math.min(min, other.min);
         max = Math.max(max, other.max);
     }
 
     /**
+     * Incorporate a new double value using Kahan summation /
+     * compensated summation.
+     */
+    private void sumWithCompensation(double value) {
+        double tmp = value - sumCompensation;
+        double velvel = sum + tmp; // Little wolf of rounding error
+        sumCompensation = (velvel - sum) - tmp;
+        sum = velvel;
+    }
+
+    /**
      * Return the count of values recorded.
      *
      * @return the count of values
@@ -133,7 +146,8 @@
      * @return the sum of values, or zero if none
      */
     public final double getSum() {
-        return sum;
+        // Better error bounds to add both terms as the final sum
+        return sum + sumCompensation;
     }
 
     /**
--- a/jdk/src/share/classes/java/util/stream/Collectors.java	Fri Nov 29 09:29:25 2013 +0000
+++ b/jdk/src/share/classes/java/util/stream/Collectors.java	Sun Dec 01 23:35:28 2013 -0800
@@ -505,14 +505,43 @@
      */
     public static <T> Collector<T, ?, Double>
     summingDouble(ToDoubleFunction<? super T> mapper) {
+        /*
+         * In the arrays allocated for the collect operation, index 0
+         * holds the high-order bits of the running sum and index 1
+         * holds the low-order bits of the sum computed via
+         * compensated summation.
+         */
         return new CollectorImpl<>(
-                () -> new double[1],
-                (a, t) -> { a[0] += mapper.applyAsDouble(t); },
-                (a, b) -> { a[0] += b[0]; return a; },
-                a -> a[0], CH_NOID);
+                () -> new double[2],
+                (a, t) -> { sumWithCompensation(a, mapper.applyAsDouble(t)); },
+                (a, b) -> { sumWithCompensation(a, b[0]); return sumWithCompensation(a, b[1]); },
+                // Better error bounds to add both terms as the final sum
+                a -> a[0] + a[1],
+                CH_NOID);
     }
 
     /**
+     * Incorporate a new double value using Kahan summation /
+     * compensation summation.
+     *
+     * High-order bits of the sum are in intermediateSum[0], low-order
+     * bits of the sum are in intermediateSum[1], any additional
+     * elements are application-specific.
+     *
+     * @param intermediateSum the high-order and low-order words of the intermediate sum
+     * @param value the name value to be included in the running sum
+     */
+    static double[] sumWithCompensation(double[] intermediateSum, double value) {
+        double tmp = value - intermediateSum[1];
+        double sum = intermediateSum[0];
+        double velvel = sum + tmp; // Little wolf of rounding error
+        intermediateSum[1] = (velvel - sum) - tmp;
+        intermediateSum[0] = velvel;
+        return intermediateSum;
+    }
+
+
+    /**
      * Returns a {@code Collector} that produces the arithmetic mean of an integer-valued
      * function applied to the input elements.  If no elements are present,
      * the result is 0.
@@ -560,17 +589,31 @@
      * value is a {@code NaN} or the sum is at any point a {@code NaN} then the
      * average will be {@code NaN}.
      *
+     * @implNote The {@code double} format can represent all
+     * consecutive integers in the range -2<sup>53</sup> to
+     * 2<sup>53</sup>. If the pipeline has more than 2<sup>53</sup>
+     * values, the divisor in the average computation will saturate at
+     * 2<sup>53</sup>, leading to additional numerical errors.
+     *
      * @param <T> the type of the input elements
      * @param mapper a function extracting the property to be summed
      * @return a {@code Collector} that produces the sum of a derived property
      */
     public static <T> Collector<T, ?, Double>
     averagingDouble(ToDoubleFunction<? super T> mapper) {
+        /*
+         * In the arrays allocated for the collect operation, index 0
+         * holds the high-order bits of the running sum, index 1 holds
+         * the low-order bits of the sum computed via compensated
+         * summation, and index 2 holds the number of values seen.
+         */
         return new CollectorImpl<>(
-                () -> new double[2],
-                (a, t) -> { a[0] += mapper.applyAsDouble(t); a[1]++; },
-                (a, b) -> { a[0] += b[0]; a[1] += b[1]; return a; },
-                a -> (a[1] == 0) ? 0.0d : a[0] / a[1], CH_NOID);
+                () -> new double[3],
+                (a, t) -> { sumWithCompensation(a, mapper.applyAsDouble(t)); a[2]++; },
+                (a, b) -> { sumWithCompensation(a, b[0]); sumWithCompensation(a, b[1]); a[2] += b[2]; return a; },
+                // Better error bounds to add both terms as the final sum to compute average
+                a -> (a[2] == 0) ? 0.0d : ((a[0] + a[1]) / a[2]),
+                CH_NOID);
     }
 
     /**
--- a/jdk/src/share/classes/java/util/stream/DoublePipeline.java	Fri Nov 29 09:29:25 2013 +0000
+++ b/jdk/src/share/classes/java/util/stream/DoublePipeline.java	Sun Dec 01 23:35:28 2013 -0800
@@ -377,8 +377,23 @@
 
     @Override
     public final double sum() {
-        // TODO: better algorithm to compensate for errors
-        return reduce(0.0, Double::sum);
+        /*
+         * In the arrays allocated for the collect operation, index 0
+         * holds the high-order bits of the running sum and index 1
+         * holds the low-order bits of the sum computed via
+         * compensated summation.
+         */
+        double[] summation = collect(() -> new double[2],
+                               (ll, d) -> {
+                                   Collectors.sumWithCompensation(ll, d);
+                               },
+                               (ll, rr) -> {
+                                   Collectors.sumWithCompensation(ll, rr[0]);
+                                   Collectors.sumWithCompensation(ll, rr[1]);
+                               });
+
+        // Better error bounds to add both terms as the final sum
+        return summation[0] + summation[1];
     }
 
     @Override
@@ -391,20 +406,37 @@
         return reduce(Math::max);
     }
 
+    /**
+     * {@inheritDoc}
+     *
+     * @implNote The {@code double} format can represent all
+     * consecutive integers in the range -2<sup>53</sup> to
+     * 2<sup>53</sup>. If the pipeline has more than 2<sup>53</sup>
+     * values, the divisor in the average computation will saturate at
+     * 2<sup>53</sup>, leading to additional numerical errors.
+     */
     @Override
     public final OptionalDouble average() {
-        double[] avg = collect(() -> new double[2],
-                               (ll, i) -> {
-                                   ll[0]++;
-                                   ll[1] += i;
+        /*
+         * In the arrays allocated for the collect operation, index 0
+         * holds the high-order bits of the running sum, index 1 holds
+         * the low-order bits of the sum computed via compensated
+         * summation, and index 2 holds the number of values seen.
+         */
+        double[] avg = collect(() -> new double[3],
+                               (ll, d) -> {
+                                   ll[2]++;
+                                   Collectors.sumWithCompensation(ll, d);
                                },
                                (ll, rr) -> {
-                                   ll[0] += rr[0];
-                                   ll[1] += rr[1];
+                                   Collectors.sumWithCompensation(ll, rr[0]);
+                                   Collectors.sumWithCompensation(ll, rr[1]);
+                                   ll[2] += rr[2];
                                });
-        return avg[0] > 0
-               ? OptionalDouble.of(avg[1] / avg[0])
-               : OptionalDouble.empty();
+        return avg[2] > 0
+            // Better error bounds to add both terms as the final sum to compute average
+            ? OptionalDouble.of((avg[0] + avg[1]) / avg[2])
+            : OptionalDouble.empty();
     }
 
     @Override
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/util/stream/TestDoubleSumAverage.java	Sun Dec 01 23:35:28 2013 -0800
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.util.*;
+import java.util.function.*;
+import java.util.stream.*;
+
+/*
+ * @test
+ * @bug 8006572
+ * @summary Test for use of non-naive summation in stream-related sum and average operations.
+ */
+public class TestDoubleSumAverage {
+    public static void main(String... args) {
+        int failures = 0;
+
+        failures += testForCompenstation();
+        failures += testZeroAverageOfNonEmptyStream();
+
+        if (failures > 0) {
+            throw new RuntimeException("Found " + failures + " numerical failure(s).");
+        }
+    }
+
+    /**
+     * Compute the sum and average of a sequence of double values in
+     * various ways and report an error if naive summation is used.
+     */
+    private static int testForCompenstation() {
+        int failures = 0;
+
+        /*
+         * The exact sum of the test stream is 1 + 1e6*ulp(1.0) but a
+         * naive summation algorithm will return 1.0 since (1.0 +
+         * ulp(1.0)/2) will round to 1.0 again.
+         */
+        double base = 1.0;
+        double increment = Math.ulp(base)/2.0;
+        int count = 1_000_001;
+
+        double expectedSum = base + (increment * (count - 1));
+        double expectedAvg = expectedSum / count;
+
+        // Factory for double a stream of [base, increment, ..., increment] limited to a size of count
+        Supplier<DoubleStream> ds = () -> DoubleStream.iterate(base, e -> increment).limit(count);
+
+        DoubleSummaryStatistics stats = ds.get().collect(DoubleSummaryStatistics::new,
+                                                         DoubleSummaryStatistics::accept,
+                                                         DoubleSummaryStatistics::combine);
+
+        failures += compareUlpDifference(expectedSum, stats.getSum(), 3);
+        failures += compareUlpDifference(expectedAvg, stats.getAverage(), 3);
+
+        failures += compareUlpDifference(expectedSum,
+                                         ds.get().sum(), 3);
+        failures += compareUlpDifference(expectedAvg,
+                                         ds.get().average().getAsDouble(), 3);
+
+        failures += compareUlpDifference(expectedSum,
+                                         ds.get().boxed().collect(Collectors.summingDouble(d -> d)), 3);
+        failures += compareUlpDifference(expectedAvg,
+                                         ds.get().boxed().collect(Collectors.averagingDouble(d -> d)),3);
+        return failures;
+    }
+
+    /**
+     * Test to verify that a non-empty stream with a zero average is non-empty.
+     */
+    private static int testZeroAverageOfNonEmptyStream() {
+        Supplier<DoubleStream> ds = () -> DoubleStream.iterate(0.0, e -> 0.0).limit(10);
+
+        return  compareUlpDifference(0.0, ds.get().average().getAsDouble(), 0);
+    }
+
+    /**
+     * Compute the ulp difference of two double values and compare against an error threshold.
+     */
+    private static int compareUlpDifference(double expected, double computed, double threshold) {
+        double ulpDifference = Math.abs(expected - computed) / Math.ulp(expected);
+
+        if (ulpDifference > threshold) {
+            System.err.printf("Numerical summation error too large, %g ulps rather than %g.%n",
+                              ulpDifference, threshold);
+            return 1;
+        } else
+            return 0;
+    }
+}