jdk/src/java.base/share/classes/java/util/stream/package-info.java
author chegar
Sun, 17 Aug 2014 15:54:13 +0100
changeset 25859 3317bb8137f4
parent 25224 jdk/src/share/classes/java/util/stream/package-info.java@7c7e029826e1
child 32017 f58d32041dd8
permissions -rw-r--r--
8054834: Modular Source Code Reviewed-by: alanb, chegar, ihse, mduigou Contributed-by: alan.bateman@oracle.com, alex.buckley@oracle.com, chris.hegarty@oracle.com, erik.joelsson@oracle.com, jonathan.gibbons@oracle.com, karen.kinnear@oracle.com, magnus.ihse.bursie@oracle.com, mandy.chung@oracle.com, mark.reinhold@oracle.com, paul.sandoz@oracle.com
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
     1
/*
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
     2
 * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
     4
 *
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    10
 *
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    15
 * accompanied this code).
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    16
 *
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    20
 *
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    23
 * questions.
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    24
 */
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    25
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    26
/**
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    27
 * Classes to support functional-style operations on streams of elements, such
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    28
 * as map-reduce transformations on collections.  For example:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    29
 *
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    30
 * <pre>{@code
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    31
 *     int sum = widgets.stream()
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    32
 *                      .filter(b -> b.getColor() == RED)
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    33
 *                      .mapToInt(b -> b.getWeight())
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    34
 *                      .sum();
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    35
 * }</pre>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    36
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    37
 * <p>Here we use {@code widgets}, a {@code Collection<Widget>},
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    38
 * as a source for a stream, and then perform a filter-map-reduce on the stream
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    39
 * to obtain the sum of the weights of the red widgets.  (Summation is an
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    40
 * example of a <a href="package-summary.html#Reduction">reduction</a>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    41
 * operation.)
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    42
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    43
 * <p>The key abstraction introduced in this package is <em>stream</em>.  The
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    44
 * classes {@link java.util.stream.Stream}, {@link java.util.stream.IntStream},
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    45
 * {@link java.util.stream.LongStream}, and {@link java.util.stream.DoubleStream}
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    46
 * are streams over objects and the primitive {@code int}, {@code long} and
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    47
 * {@code double} types.  Streams differ from collections in several ways:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    48
 *
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    49
 * <ul>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    50
 *     <li>No storage.  A stream is not a data structure that stores elements;
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    51
 *     instead, it conveys elements from a source such as a data structure,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    52
 *     an array, a generator function, or an I/O channel, through a pipeline of
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    53
 *     computational operations.</li>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    54
 *     <li>Functional in nature.  An operation on a stream produces a result,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    55
 *     but does not modify its source.  For example, filtering a {@code Stream}
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    56
 *     obtained from a collection produces a new {@code Stream} without the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    57
 *     filtered elements, rather than removing elements from the source
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    58
 *     collection.</li>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    59
 *     <li>Laziness-seeking.  Many stream operations, such as filtering, mapping,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    60
 *     or duplicate removal, can be implemented lazily, exposing opportunities
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    61
 *     for optimization.  For example, "find the first {@code String} with
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    62
 *     three consecutive vowels" need not examine all the input strings.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    63
 *     Stream operations are divided into intermediate ({@code Stream}-producing)
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    64
 *     operations and terminal (value- or side-effect-producing) operations.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    65
 *     Intermediate operations are always lazy.</li>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    66
 *     <li>Possibly unbounded.  While collections have a finite size, streams
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
    67
 *     need not.  Short-circuiting operations such as {@code limit(n)} or
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    68
 *     {@code findFirst()} can allow computations on infinite streams to
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    69
 *     complete in finite time.</li>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    70
 *     <li>Consumable. The elements of a stream are only visited once during
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    71
 *     the life of a stream. Like an {@link java.util.Iterator}, a new stream
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    72
 *     must be generated to revisit the same elements of the source.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    73
 *     </li>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    74
 * </ul>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    75
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    76
 * Streams can be obtained in a number of ways. Some examples include:
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    77
 * <ul>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    78
 *     <li>From a {@link java.util.Collection} via the {@code stream()} and
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    79
 *     {@code parallelStream()} methods;</li>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    80
 *     <li>From an array via {@link java.util.Arrays#stream(Object[])};</li>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    81
 *     <li>From static factory methods on the stream classes, such as
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    82
 *     {@link java.util.stream.Stream#of(Object[])},
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    83
 *     {@link java.util.stream.IntStream#range(int, int)}
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    84
 *     or {@link java.util.stream.Stream#iterate(Object, UnaryOperator)};</li>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    85
 *     <li>The lines of a file can be obtained from {@link java.io.BufferedReader#lines()};</li>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    86
 *     <li>Streams of file paths can be obtained from methods in {@link java.nio.file.Files};</li>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    87
 *     <li>Streams of random numbers can be obtained from {@link java.util.Random#ints()};</li>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    88
 *     <li>Numerous other stream-bearing methods in the JDK, including
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    89
 *     {@link java.util.BitSet#stream()},
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    90
 *     {@link java.util.regex.Pattern#splitAsStream(java.lang.CharSequence)},
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    91
 *     and {@link java.util.jar.JarFile#stream()}.</li>
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    92
 * </ul>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    93
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    94
 * <p>Additional stream sources can be provided by third-party libraries using
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    95
 * <a href="package-summary.html#StreamSources">these techniques</a>.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    96
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
    97
 * <h2><a name="StreamOps">Stream operations and pipelines</a></h2>
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
    98
 *
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
    99
 * <p>Stream operations are divided into <em>intermediate</em> and
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   100
 * <em>terminal</em> operations, and are combined to form <em>stream
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   101
 * pipelines</em>.  A stream pipeline consists of a source (such as a
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   102
 * {@code Collection}, an array, a generator function, or an I/O channel);
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   103
 * followed by zero or more intermediate operations such as
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   104
 * {@code Stream.filter} or {@code Stream.map}; and a terminal operation such
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   105
 * as {@code Stream.forEach} or {@code Stream.reduce}.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   106
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   107
 * <p>Intermediate operations return a new stream.  They are always
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   108
 * <em>lazy</em>; executing an intermediate operation such as
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   109
 * {@code filter()} does not actually perform any filtering, but instead
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   110
 * creates a new stream that, when traversed, contains the elements of
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   111
 * the initial stream that match the given predicate.  Traversal
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   112
 * of the pipeline source does not begin until the terminal operation of the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   113
 * pipeline is executed.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   114
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   115
 * <p>Terminal operations, such as {@code Stream.forEach} or
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   116
 * {@code IntStream.sum}, may traverse the stream to produce a result or a
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   117
 * side-effect. After the terminal operation is performed, the stream pipeline
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   118
 * is considered consumed, and can no longer be used; if you need to traverse
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   119
 * the same data source again, you must return to the data source to get a new
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   120
 * stream.  In almost all cases, terminal operations are <em>eager</em>,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   121
 * completing their traversal of the data source and processing of the pipeline
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   122
 * before returning.  Only the terminal operations {@code iterator()} and
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   123
 * {@code spliterator()} are not; these are provided as an "escape hatch" to enable
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   124
 * arbitrary client-controlled pipeline traversals in the event that the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   125
 * existing operations are not sufficient to the task.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   126
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   127
 * <p> Processing streams lazily allows for significant efficiencies; in a
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   128
 * pipeline such as the filter-map-sum example above, filtering, mapping, and
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   129
 * summing can be fused into a single pass on the data, with minimal
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   130
 * intermediate state. Laziness also allows avoiding examining all the data
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   131
 * when it is not necessary; for operations such as "find the first string
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   132
 * longer than 1000 characters", it is only necessary to examine just enough
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   133
 * strings to find one that has the desired characteristics without examining
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   134
 * all of the strings available from the source. (This behavior becomes even
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   135
 * more important when the input stream is infinite and not merely large.)
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   136
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   137
 * <p>Intermediate operations are further divided into <em>stateless</em>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   138
 * and <em>stateful</em> operations. Stateless operations, such as {@code filter}
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   139
 * and {@code map}, retain no state from previously seen element when processing
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   140
 * a new element -- each element can be processed
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   141
 * independently of operations on other elements.  Stateful operations, such as
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   142
 * {@code distinct} and {@code sorted}, may incorporate state from previously
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   143
 * seen elements when processing new elements.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   144
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   145
 * <p>Stateful operations may need to process the entire input
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   146
 * before producing a result.  For example, one cannot produce any results from
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   147
 * sorting a stream until one has seen all elements of the stream.  As a result,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   148
 * under parallel computation, some pipelines containing stateful intermediate
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   149
 * operations may require multiple passes on the data or may need to buffer
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   150
 * significant data.  Pipelines containing exclusively stateless intermediate
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   151
 * operations can be processed in a single pass, whether sequential or parallel,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   152
 * with minimal data buffering.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   153
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   154
 * <p>Further, some operations are deemed <em>short-circuiting</em> operations.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   155
 * An intermediate operation is short-circuiting if, when presented with
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   156
 * infinite input, it may produce a finite stream as a result.  A terminal
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   157
 * operation is short-circuiting if, when presented with infinite input, it may
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   158
 * terminate in finite time.  Having a short-circuiting operation in the pipeline
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   159
 * is a necessary, but not sufficient, condition for the processing of an infinite
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   160
 * stream to terminate normally in finite time.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   161
 *
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   162
 * <h3>Parallelism</h3>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   163
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   164
 * <p>Processing elements with an explicit {@code for-}loop is inherently serial.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   165
 * Streams facilitate parallel execution by reframing the computation as a pipeline of
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   166
 * aggregate operations, rather than as imperative operations on each individual
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   167
 * element.  All streams operations can execute either in serial or in parallel.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   168
 * The stream implementations in the JDK create serial streams unless parallelism is
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   169
 * explicitly requested.  For example, {@code Collection} has methods
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   170
 * {@link java.util.Collection#stream} and {@link java.util.Collection#parallelStream},
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   171
 * which produce sequential and parallel streams respectively; other
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   172
 * stream-bearing methods such as {@link java.util.stream.IntStream#range(int, int)}
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   173
 * produce sequential streams but these streams can be efficiently parallelized by
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   174
 * invoking their {@link java.util.stream.BaseStream#parallel()} method.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   175
 * To execute the prior "sum of weights of widgets" query in parallel, we would
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   176
 * do:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   177
 *
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   178
 * <pre>{@code
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   179
 *     int sumOfWeights = widgets.}<code><b>parallelStream()</b></code>{@code
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   180
 *                               .filter(b -> b.getColor() == RED)
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   181
 *                               .mapToInt(b -> b.getWeight())
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   182
 *                               .sum();
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   183
 * }</pre>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   184
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   185
 * <p>The only difference between the serial and parallel versions of this
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   186
 * example is the creation of the initial stream, using "{@code parallelStream()}"
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   187
 * instead of "{@code stream()}".  When the terminal operation is initiated,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   188
 * the stream pipeline is executed sequentially or in parallel depending on the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   189
 * orientation of the stream on which it is invoked.  Whether a stream will execute in serial or
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   190
 * parallel can be determined with the {@code isParallel()} method, and the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   191
 * orientation of a stream can be modified with the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   192
 * {@link java.util.stream.BaseStream#sequential()} and
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   193
 * {@link java.util.stream.BaseStream#parallel()} operations.  When the terminal
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   194
 * operation is initiated, the stream pipeline is executed sequentially or in
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   195
 * parallel depending on the mode of the stream on which it is invoked.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   196
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   197
 * <p>Except for operations identified as explicitly nondeterministic, such
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   198
 * as {@code findAny()}, whether a stream executes sequentially or in parallel
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   199
 * should not change the result of the computation.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   200
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   201
 * <p>Most stream operations accept parameters that describe user-specified
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   202
 * behavior, which are often lambda expressions.  To preserve correct behavior,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   203
 * these <em>behavioral parameters</em> must be <em>non-interfering</em>, and in
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   204
 * most cases must be <em>stateless</em>.  Such parameters are always instances
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   205
 * of a <a href="../function/package-summary.html">functional interface</a> such
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   206
 * as {@link java.util.function.Function}, and are often lambda expressions or
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   207
 * method references.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   208
 *
22291
6106c1f013f1 8029696: Broken doc links to package-summary.html#NonInterference in java.util.stream
michaelm
parents: 21339
diff changeset
   209
 * <h3><a name="NonInterference">Non-interference</a></h3>
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   210
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   211
 * Streams enable you to execute possibly-parallel aggregate operations over a
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   212
 * variety of data sources, including even non-thread-safe collections such as
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   213
 * {@code ArrayList}. This is possible only if we can prevent
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   214
 * <em>interference</em> with the data source during the execution of a stream
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   215
 * pipeline.  Except for the escape-hatch operations {@code iterator()} and
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   216
 * {@code spliterator()}, execution begins when the terminal operation is
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   217
 * invoked, and ends when the terminal operation completes.  For most data
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   218
 * sources, preventing interference means ensuring that the data source is
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   219
 * <em>not modified at all</em> during the execution of the stream pipeline.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   220
 * The notable exception to this are streams whose sources are concurrent
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   221
 * collections, which are specifically designed to handle concurrent modification.
21339
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   222
 * Concurrent stream sources are those whose {@code Spliterator} reports the
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   223
 * {@code CONCURRENT} characteristic.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   224
 *
21339
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   225
 * <p>Accordingly, behavioral parameters in stream pipelines whose source might
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   226
 * not be concurrent should never modify the stream's data source.
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   227
 * A behavioral parameter is said to <em>interfere</em> with a non-concurrent
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   228
 * data source if it modifies, or causes to be
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   229
 * modified, the stream's data source.  The need for non-interference applies
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   230
 * to all pipelines, not just parallel ones.  Unless the stream source is
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   231
 * concurrent, modifying a stream's data source during execution of a stream
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   232
 * pipeline can cause exceptions, incorrect answers, or nonconformant behavior.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   233
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   234
 * For well-behaved stream sources, the source can be modified before the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   235
 * terminal operation commences and those modifications will be reflected in
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   236
 * the covered elements.  For example, consider the following code:
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   237
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   238
 * <pre>{@code
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   239
 *     List<String> l = new ArrayList(Arrays.asList("one", "two"));
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   240
 *     Stream<String> sl = l.stream();
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   241
 *     l.add("three");
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   242
 *     String s = sl.collect(joining(" "));
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   243
 * }</pre>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   244
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   245
 * First a list is created consisting of two strings: "one"; and "two". Then a
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   246
 * stream is created from that list. Next the list is modified by adding a third
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   247
 * string: "three". Finally the elements of the stream are collected and joined
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   248
 * together. Since the list was modified before the terminal {@code collect}
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   249
 * operation commenced the result will be a string of "one two three". All the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   250
 * streams returned from JDK collections, and most other JDK classes,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   251
 * are well-behaved in this manner; for streams generated by other libraries, see
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   252
 * <a href="package-summary.html#StreamSources">Low-level stream
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   253
 * construction</a> for requirements for building well-behaved streams.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   254
 *
21339
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   255
 * <h3><a name="Statelessness">Stateless behaviors</a></h3>
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   256
 *
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   257
 * Stream pipeline results may be nondeterministic or incorrect if the behavioral
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   258
 * parameters to the stream operations are <em>stateful</em>.  A stateful lambda
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   259
 * (or other object implementing the appropriate functional interface) is one
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   260
 * whose result depends on any state which might change during the execution
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   261
 * of the stream pipeline.  An example of a stateful lambda is the parameter
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   262
 * to {@code map()} in:
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   263
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   264
 * <pre>{@code
21339
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   265
 *     Set<Integer> seen = Collections.synchronizedSet(new HashSet<>());
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   266
 *     stream.parallel().map(e -> { if (seen.add(e)) return 0; else return e; })...
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   267
 * }</pre>
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   268
 *
21339
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   269
 * Here, if the mapping operation is performed in parallel, the results for the
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   270
 * same input could vary from run to run, due to thread scheduling differences,
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   271
 * whereas, with a stateless lambda expression the results would always be the
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   272
 * same.
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   273
 *
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   274
 * <p>Note also that attempting to access mutable state from behavioral parameters
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   275
 * presents you with a bad choice with respect to safety and performance; if
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   276
 * you do not synchronize access to that state, you have a data race and
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   277
 * therefore your code is broken, but if you do synchronize access to that
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   278
 * state, you risk having contention undermine the parallelism you are seeking
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   279
 * to benefit from.  The best approach is to avoid stateful behavioral
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   280
 * parameters to stream operations entirely; there is usually a way to
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   281
 * restructure the stream pipeline to avoid statefulness.
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   282
 *
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   283
 * <h3>Side-effects</h3>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   284
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   285
 * Side-effects in behavioral parameters to stream operations are, in general,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   286
 * discouraged, as they can often lead to unwitting violations of the
21339
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   287
 * statelessness requirement, as well as other thread-safety hazards.
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   288
 *
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   289
 * <p>If the behavioral parameters do have side-effects, unless explicitly
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   290
 * stated, there are no guarantees as to the
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   291
 * <a href="../concurrent/package-summary.html#MemoryVisibility"><i>visibility</i></a>
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   292
 * of those side-effects to other threads, nor are there any guarantees that
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   293
 * different operations on the "same" element within the same stream pipeline
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   294
 * are executed in the same thread.  Further, the ordering of those effects
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   295
 * may be surprising.  Even when a pipeline is constrained to produce a
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   296
 * <em>result</em> that is consistent with the encounter order of the stream
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   297
 * source (for example, {@code IntStream.range(0,5).parallel().map(x -> x*2).toArray()}
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   298
 * must produce {@code [0, 2, 4, 6, 8]}), no guarantees are made as to the order
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   299
 * in which the mapper function is applied to individual elements, or in what
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   300
 * thread any behavioral parameter is executed for a given element.
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   301
 *
20e8b81964d5 8025909: Lambda Library Spec Updates
henryjen
parents: 19859
diff changeset
   302
 * <p>Many computations where one might be tempted to use side effects can be more
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   303
 * safely and efficiently expressed without side-effects, such as using
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   304
 * <a href="package-summary.html#Reduction">reduction</a> instead of mutable
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   305
 * accumulators. However, side-effects such as using {@code println()} for debugging
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   306
 * purposes are usually harmless.  A small number of stream operations, such as
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   307
 * {@code forEach()} and {@code peek()}, can operate only via side-effects;
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   308
 * these should be used with care.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   309
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   310
 * <p>As an example of how to transform a stream pipeline that inappropriately
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   311
 * uses side-effects to one that does not, the following code searches a stream
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   312
 * of strings for those matching a given regular expression, and puts the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   313
 * matches in a list.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   314
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   315
 * <pre>{@code
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   316
 *     ArrayList<String> results = new ArrayList<>();
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   317
 *     stream.filter(s -> pattern.matcher(s).matches())
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   318
 *           .forEach(s -> results.add(s));  // Unnecessary use of side-effects!
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   319
 * }</pre>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   320
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   321
 * This code unnecessarily uses side-effects.  If executed in parallel, the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   322
 * non-thread-safety of {@code ArrayList} would cause incorrect results, and
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   323
 * adding needed synchronization would cause contention, undermining the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   324
 * benefit of parallelism.  Furthermore, using side-effects here is completely
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   325
 * unnecessary; the {@code forEach()} can simply be replaced with a reduction
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   326
 * operation that is safer, more efficient, and more amenable to
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   327
 * parallelization:
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   328
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   329
 * <pre>{@code
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   330
 *     List<String>results =
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   331
 *         stream.filter(s -> pattern.matcher(s).matches())
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   332
 *               .collect(Collectors.toList());  // No side-effects!
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   333
 * }</pre>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   334
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   335
 * <h3><a name="Ordering">Ordering</a></h3>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   336
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   337
 * <p>Streams may or may not have a defined <em>encounter order</em>.  Whether
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   338
 * or not a stream has an encounter order depends on the source and the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   339
 * intermediate operations.  Certain stream sources (such as {@code List} or
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   340
 * arrays) are intrinsically ordered, whereas others (such as {@code HashSet})
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   341
 * are not.  Some intermediate operations, such as {@code sorted()}, may impose
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   342
 * an encounter order on an otherwise unordered stream, and others may render an
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   343
 * ordered stream unordered, such as {@link java.util.stream.BaseStream#unordered()}.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   344
 * Further, some terminal operations may ignore encounter order, such as
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   345
 * {@code forEach()}.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   346
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   347
 * <p>If a stream is ordered, most operations are constrained to operate on the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   348
 * elements in their encounter order; if the source of a stream is a {@code List}
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   349
 * containing {@code [1, 2, 3]}, then the result of executing {@code map(x -> x*2)}
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   350
 * must be {@code [2, 4, 6]}.  However, if the source has no defined encounter
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   351
 * order, then any permutation of the values {@code [2, 4, 6]} would be a valid
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   352
 * result.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   353
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   354
 * <p>For sequential streams, the presence or absence of an encounter order does
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   355
 * not affect performance, only determinism.  If a stream is ordered, repeated
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   356
 * execution of identical stream pipelines on an identical source will produce
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   357
 * an identical result; if it is not ordered, repeated execution might produce
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   358
 * different results.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   359
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   360
 * <p>For parallel streams, relaxing the ordering constraint can sometimes enable
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   361
 * more efficient execution.  Certain aggregate operations,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   362
 * such as filtering duplicates ({@code distinct()}) or grouped reductions
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   363
 * ({@code Collectors.groupingBy()}) can be implemented more efficiently if ordering of elements
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   364
 * is not relevant.  Similarly, operations that are intrinsically tied to encounter order,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   365
 * such as {@code limit()}, may require
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   366
 * buffering to ensure proper ordering, undermining the benefit of parallelism.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   367
 * In cases where the stream has an encounter order, but the user does not
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   368
 * particularly <em>care</em> about that encounter order, explicitly de-ordering
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   369
 * the stream with {@link java.util.stream.BaseStream#unordered() unordered()} may
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   370
 * improve parallel performance for some stateful or terminal operations.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   371
 * However, most stream pipelines, such as the "sum of weight of blocks" example
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   372
 * above, still parallelize efficiently even under ordering constraints.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   373
 *
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   374
 * <h2><a name="Reduction">Reduction operations</a></h2>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   375
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   376
 * A <em>reduction</em> operation (also called a <em>fold</em>) takes a sequence
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   377
 * of input elements and combines them into a single summary result by repeated
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   378
 * application of a combining operation, such as finding the sum or maximum of
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   379
 * a set of numbers, or accumulating elements into a list.  The streams classes have
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   380
 * multiple forms of general reduction operations, called
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   381
 * {@link java.util.stream.Stream#reduce(java.util.function.BinaryOperator) reduce()}
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   382
 * and {@link java.util.stream.Stream#collect(java.util.stream.Collector) collect()},
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   383
 * as well as multiple specialized reduction forms such as
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   384
 * {@link java.util.stream.IntStream#sum() sum()}, {@link java.util.stream.IntStream#max() max()},
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   385
 * or {@link java.util.stream.IntStream#count() count()}.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   386
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   387
 * <p>Of course, such operations can be readily implemented as simple sequential
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   388
 * loops, as in:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   389
 * <pre>{@code
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   390
 *    int sum = 0;
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   391
 *    for (int x : numbers) {
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   392
 *       sum += x;
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   393
 *    }
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   394
 * }</pre>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   395
 * However, there are good reasons to prefer a reduce operation
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   396
 * over a mutative accumulation such as the above.  Not only is a reduction
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   397
 * "more abstract" -- it operates on the stream as a whole rather than individual
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   398
 * elements -- but a properly constructed reduce operation is inherently
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   399
 * parallelizable, so long as the function(s) used to process the elements
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   400
 * are <a href="package-summary.html#Associativity">associative</a> and
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   401
 * <a href="package-summary.html#NonInterfering">stateless</a>.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   402
 * For example, given a stream of numbers for which we want to find the sum, we
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   403
 * can write:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   404
 * <pre>{@code
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   405
 *    int sum = numbers.stream().reduce(0, (x,y) -> x+y);
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   406
 * }</pre>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   407
 * or:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   408
 * <pre>{@code
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   409
 *    int sum = numbers.stream().reduce(0, Integer::sum);
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   410
 * }</pre>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   411
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   412
 * <p>These reduction operations can run safely in parallel with almost no
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   413
 * modification:
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   414
 * <pre>{@code
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   415
 *    int sum = numbers.parallelStream().reduce(0, Integer::sum);
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   416
 * }</pre>
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   417
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   418
 * <p>Reduction parallellizes well because the implementation
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   419
 * can operate on subsets of the data in parallel, and then combine the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   420
 * intermediate results to get the final correct answer.  (Even if the language
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   421
 * had a "parallel for-each" construct, the mutative accumulation approach would
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   422
 * still required the developer to provide
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   423
 * thread-safe updates to the shared accumulating variable {@code sum}, and
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   424
 * the required synchronization would then likely eliminate any performance gain from
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   425
 * parallelism.)  Using {@code reduce()} instead removes all of the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   426
 * burden of parallelizing the reduction operation, and the library can provide
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   427
 * an efficient parallel implementation with no additional synchronization
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   428
 * required.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   429
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   430
 * <p>The "widgets" examples shown earlier shows how reduction combines with
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   431
 * other operations to replace for loops with bulk operations.  If {@code widgets}
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   432
 * is a collection of {@code Widget} objects, which have a {@code getWeight} method,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   433
 * we can find the heaviest widget with:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   434
 * <pre>{@code
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   435
 *     OptionalInt heaviest = widgets.parallelStream()
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   436
 *                                   .mapToInt(Widget::getWeight)
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   437
 *                                   .max();
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   438
 * }</pre>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   439
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   440
 * <p>In its more general form, a {@code reduce} operation on elements of type
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   441
 * {@code <T>} yielding a result of type {@code <U>} requires three parameters:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   442
 * <pre>{@code
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   443
 * <U> U reduce(U identity,
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   444
 *              BiFunction<U, ? super T, U> accumulator,
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   445
 *              BinaryOperator<U> combiner);
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   446
 * }</pre>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   447
 * Here, the <em>identity</em> element is both an initial seed value for the reduction
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   448
 * and a default result if there are no input elements. The <em>accumulator</em>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   449
 * function takes a partial result and the next element, and produces a new
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   450
 * partial result. The <em>combiner</em> function combines two partial results
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   451
 * to produce a new partial result.  (The combiner is necessary in parallel
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   452
 * reductions, where the input is partitioned, a partial accumulation computed
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   453
 * for each partition, and then the partial results are combined to produce a
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   454
 * final result.)
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   455
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   456
 * <p>More formally, the {@code identity} value must be an <em>identity</em> for
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   457
 * the combiner function. This means that for all {@code u},
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   458
 * {@code combiner.apply(identity, u)} is equal to {@code u}. Additionally, the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   459
 * {@code combiner} function must be <a href="package-summary.html#Associativity">associative</a> and
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   460
 * must be compatible with the {@code accumulator} function: for all {@code u}
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   461
 * and {@code t}, {@code combiner.apply(u, accumulator.apply(identity, t))} must
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   462
 * be {@code equals()} to {@code accumulator.apply(u, t)}.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   463
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   464
 * <p>The three-argument form is a generalization of the two-argument form,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   465
 * incorporating a mapping step into the accumulation step.  We could
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   466
 * re-cast the simple sum-of-weights example using the more general form as
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   467
 * follows:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   468
 * <pre>{@code
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   469
 *     int sumOfWeights = widgets.stream()
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   470
 *                               .reduce(0,
25224
7c7e029826e1 8043327: Collectors.toMap studentToGPA example uses Functions.identity()
psandoz
parents: 22291
diff changeset
   471
 *                                       (sum, b) -> sum + b.getWeight(),
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   472
 *                                       Integer::sum);
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   473
 * }</pre>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   474
 * though the explicit map-reduce form is more readable and therefore should
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   475
 * usually be preferred. The generalized form is provided for cases where
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   476
 * significant work can be optimized away by combining mapping and reducing
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   477
 * into a single function.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   478
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   479
 * <h3><a name="MutableReduction">Mutable reduction</a></h3>
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   480
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   481
 * A <em>mutable reduction operation</em> accumulates input elements into a
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   482
 * mutable result container, such as a {@code Collection} or {@code StringBuilder},
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   483
 * as it processes the elements in the stream.
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   484
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   485
 * <p>If we wanted to take a stream of strings and concatenate them into a
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   486
 * single long string, we <em>could</em> achieve this with ordinary reduction:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   487
 * <pre>{@code
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   488
 *     String concatenated = strings.reduce("", String::concat)
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   489
 * }</pre>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   490
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   491
 * <p>We would get the desired result, and it would even work in parallel.  However,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   492
 * we might not be happy about the performance!  Such an implementation would do
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   493
 * a great deal of string copying, and the run time would be <em>O(n^2)</em> in
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   494
 * the number of characters.  A more performant approach would be to accumulate
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   495
 * the results into a {@link java.lang.StringBuilder}, which is a mutable
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   496
 * container for accumulating strings.  We can use the same technique to
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   497
 * parallelize mutable reduction as we do with ordinary reduction.
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   498
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   499
 * <p>The mutable reduction operation is called
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   500
 * {@link java.util.stream.Stream#collect(Collector) collect()},
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   501
 * as it collects together the desired results into a result container such
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   502
 * as a {@code Collection}.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   503
 * A {@code collect} operation requires three functions:
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   504
 * a supplier function to construct new instances of the result container, an
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   505
 * accumulator function to incorporate an input element into a result
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   506
 * container, and a combining function to merge the contents of one result
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   507
 * container into another.  The form of this is very similar to the general
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   508
 * form of ordinary reduction:
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   509
 * <pre>{@code
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   510
 * <R> R collect(Supplier<R> supplier,
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   511
 *               BiConsumer<R, ? super T> accumulator,
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   512
 *               BiConsumer<R, R> combiner);
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   513
 * }</pre>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   514
 * <p>As with {@code reduce()}, a benefit of expressing {@code collect} in this
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   515
 * abstract way is that it is directly amenable to parallelization: we can
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   516
 * accumulate partial results in parallel and then combine them, so long as the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   517
 * accumulation and combining functions satisfy the appropriate requirements.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   518
 * For example, to collect the String representations of the elements in a
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   519
 * stream into an {@code ArrayList}, we could write the obvious sequential
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   520
 * for-each form:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   521
 * <pre>{@code
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   522
 *     ArrayList<String> strings = new ArrayList<>();
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   523
 *     for (T element : stream) {
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   524
 *         strings.add(element.toString());
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   525
 *     }
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   526
 * }</pre>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   527
 * Or we could use a parallelizable collect form:
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   528
 * <pre>{@code
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   529
 *     ArrayList<String> strings = stream.collect(() -> new ArrayList<>(),
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   530
 *                                                (c, e) -> c.add(e.toString()),
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   531
 *                                                (c1, c2) -> c1.addAll(c2));
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   532
 * }</pre>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   533
 * or, pulling the mapping operation out of the accumulator function, we could
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   534
 * express it more succinctly as:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   535
 * <pre>{@code
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   536
 *     List<String> strings = stream.map(Object::toString)
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   537
 *                                  .collect(ArrayList::new, ArrayList::add, ArrayList::addAll);
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   538
 * }</pre>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   539
 * Here, our supplier is just the {@link java.util.ArrayList#ArrayList()
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   540
 * ArrayList constructor}, the accumulator adds the stringified element to an
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   541
 * {@code ArrayList}, and the combiner simply uses {@link java.util.ArrayList#addAll addAll}
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   542
 * to copy the strings from one container into the other.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   543
 *
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   544
 * <p>The three aspects of {@code collect} -- supplier, accumulator, and
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   545
 * combiner -- are tightly coupled.  We can use the abstraction of a
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   546
 * {@link java.util.stream.Collector} to capture all three aspects.  The
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   547
 * above example for collecting strings into a {@code List} can be rewritten
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   548
 * using a standard {@code Collector} as:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   549
 * <pre>{@code
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   550
 *     List<String> strings = stream.map(Object::toString)
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   551
 *                                  .collect(Collectors.toList());
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   552
 * }</pre>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   553
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   554
 * <p>Packaging mutable reductions into a Collector has another advantage:
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   555
 * composability.  The class {@link java.util.stream.Collectors} contains a
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   556
 * number of predefined factories for collectors, including combinators
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   557
 * that transform one collector into another.  For example, suppose we have a
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   558
 * collector that computes the sum of the salaries of a stream of
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   559
 * employees, as follows:
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   560
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   561
 * <pre>{@code
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   562
 *     Collector<Employee, ?, Integer> summingSalaries
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   563
 *         = Collectors.summingInt(Employee::getSalary);
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   564
 * }</pre>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   565
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   566
 * (The {@code ?} for the second type parameter merely indicates that we don't
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   567
 * care about the intermediate representation used by this collector.)
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   568
 * If we wanted to create a collector to tabulate the sum of salaries by
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   569
 * department, we could reuse {@code summingSalaries} using
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   570
 * {@link java.util.stream.Collectors#groupingBy(java.util.function.Function, java.util.stream.Collector) groupingBy}:
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   571
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   572
 * <pre>{@code
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   573
 *     Map<Department, Integer> salariesByDept
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   574
 *         = employees.stream().collect(Collectors.groupingBy(Employee::getDepartment,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   575
 *                                                            summingSalaries));
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   576
 * }</pre>
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   577
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   578
 * <p>As with the regular reduction operation, {@code collect()} operations can
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   579
 * only be parallelized if appropriate conditions are met.  For any partially
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   580
 * accumulated result, combining it with an empty result container must
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   581
 * produce an equivalent result.  That is, for a partially accumulated result
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   582
 * {@code p} that is the result of any series of accumulator and combiner
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   583
 * invocations, {@code p} must be equivalent to
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   584
 * {@code combiner.apply(p, supplier.get())}.
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   585
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   586
 * <p>Further, however the computation is split, it must produce an equivalent
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   587
 * result.  For any input elements {@code t1} and {@code t2}, the results
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   588
 * {@code r1} and {@code r2} in the computation below must be equivalent:
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   589
 * <pre>{@code
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   590
 *     A a1 = supplier.get();
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   591
 *     accumulator.accept(a1, t1);
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   592
 *     accumulator.accept(a1, t2);
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   593
 *     R r1 = finisher.apply(a1);  // result without splitting
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   594
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   595
 *     A a2 = supplier.get();
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   596
 *     accumulator.accept(a2, t1);
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   597
 *     A a3 = supplier.get();
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   598
 *     accumulator.accept(a3, t2);
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   599
 *     R r2 = finisher.apply(combiner.apply(a2, a3));  // result with splitting
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   600
 * }</pre>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   601
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   602
 * <p>Here, equivalence generally means according to {@link java.lang.Object#equals(Object)}.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   603
 * but in some cases equivalence may be relaxed to account for differences in
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   604
 * order.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   605
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   606
 * <h3><a name="ConcurrentReduction">Reduction, concurrency, and ordering</a></h3>
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   607
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   608
 * With some complex reduction operations, for example a {@code collect()} that
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   609
 * produces a {@code Map}, such as:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   610
 * <pre>{@code
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   611
 *     Map<Buyer, List<Transaction>> salesByBuyer
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   612
 *         = txns.parallelStream()
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   613
 *               .collect(Collectors.groupingBy(Transaction::getBuyer));
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   614
 * }</pre>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   615
 * it may actually be counterproductive to perform the operation in parallel.
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   616
 * This is because the combining step (merging one {@code Map} into another by
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   617
 * key) can be expensive for some {@code Map} implementations.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   618
 *
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   619
 * <p>Suppose, however, that the result container used in this reduction
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   620
 * was a concurrently modifiable collection -- such as a
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   621
 * {@link java.util.concurrent.ConcurrentHashMap}. In that case, the parallel
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   622
 * invocations of the accumulator could actually deposit their results
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   623
 * concurrently into the same shared result container, eliminating the need for
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   624
 * the combiner to merge distinct result containers. This potentially provides
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   625
 * a boost to the parallel execution performance. We call this a
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   626
 * <em>concurrent</em> reduction.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   627
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   628
 * <p>A {@link java.util.stream.Collector} that supports concurrent reduction is
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   629
 * marked with the {@link java.util.stream.Collector.Characteristics#CONCURRENT}
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   630
 * characteristic.  However, a concurrent collection also has a downside.  If
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   631
 * multiple threads are depositing results concurrently into a shared container,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   632
 * the order in which results are deposited is non-deterministic. Consequently,
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   633
 * a concurrent reduction is only possible if ordering is not important for the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   634
 * stream being processed. The {@link java.util.stream.Stream#collect(Collector)}
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   635
 * implementation will only perform a concurrent reduction if
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   636
 * <ul>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   637
 * <li>The stream is parallel;</li>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   638
 * <li>The collector has the
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   639
 * {@link java.util.stream.Collector.Characteristics#CONCURRENT} characteristic,
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   640
 * and;</li>
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   641
 * <li>Either the stream is unordered, or the collector has the
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   642
 * {@link java.util.stream.Collector.Characteristics#UNORDERED} characteristic.
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   643
 * </ul>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   644
 * You can ensure the stream is unordered by using the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   645
 * {@link java.util.stream.BaseStream#unordered()} method.  For example:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   646
 * <pre>{@code
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   647
 *     Map<Buyer, List<Transaction>> salesByBuyer
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   648
 *         = txns.parallelStream()
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   649
 *               .unordered()
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   650
 *               .collect(groupingByConcurrent(Transaction::getBuyer));
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   651
 * }</pre>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   652
 * (where {@link java.util.stream.Collectors#groupingByConcurrent} is the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   653
 * concurrent equivalent of {@code groupingBy}).
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   654
 *
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   655
 * <p>Note that if it is important that the elements for a given key appear in
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   656
 * the order they appear in the source, then we cannot use a concurrent
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   657
 * reduction, as ordering is one of the casualties of concurrent insertion.
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   658
 * We would then be constrained to implement either a sequential reduction or
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   659
 * a merge-based parallel reduction.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   660
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   661
 * <h3><a name="Associativity">Associativity</a></h3>
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   662
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   663
 * An operator or function {@code op} is <em>associative</em> if the following
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   664
 * holds:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   665
 * <pre>{@code
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   666
 *     (a op b) op c == a op (b op c)
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   667
 * }</pre>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   668
 * The importance of this to parallel evaluation can be seen if we expand this
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   669
 * to four terms:
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   670
 * <pre>{@code
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   671
 *     a op b op c op d == (a op b) op (c op d)
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   672
 * }</pre>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   673
 * So we can evaluate {@code (a op b)} in parallel with {@code (c op d)}, and
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   674
 * then invoke {@code op} on the results.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   675
 *
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   676
 * <p>Examples of associative operations include numeric addition, min, and
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   677
 * max, and string concatenation.
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   678
 *
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   679
 * <h2><a name="StreamSources">Low-level stream construction</a></h2>
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   680
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   681
 * So far, all the stream examples have used methods like
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   682
 * {@link java.util.Collection#stream()} or {@link java.util.Arrays#stream(Object[])}
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   683
 * to obtain a stream.  How are those stream-bearing methods implemented?
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   684
 *
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   685
 * <p>The class {@link java.util.stream.StreamSupport} has a number of
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   686
 * low-level methods for creating a stream, all using some form of a
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   687
 * {@link java.util.Spliterator}. A spliterator is the parallel analogue of an
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   688
 * {@link java.util.Iterator}; it describes a (possibly infinite) collection of
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   689
 * elements, with support for sequentially advancing, bulk traversal, and
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   690
 * splitting off some portion of the input into another spliterator which can
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   691
 * be processed in parallel.  At the lowest level, all streams are driven by a
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   692
 * spliterator.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   693
 *
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   694
 * <p>There are a number of implementation choices in implementing a
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   695
 * spliterator, nearly all of which are tradeoffs between simplicity of
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   696
 * implementation and runtime performance of streams using that spliterator.
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   697
 * The simplest, but least performant, way to create a spliterator is to
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   698
 * create one from an iterator using
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   699
 * {@link java.util.Spliterators#spliteratorUnknownSize(java.util.Iterator, int)}.
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   700
 * While such a spliterator will work, it will likely offer poor parallel
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   701
 * performance, since we have lost sizing information (how big is the
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   702
 * underlying data set), as well as being constrained to a simplistic
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   703
 * splitting algorithm.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   704
 *
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   705
 * <p>A higher-quality spliterator will provide balanced and known-size
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   706
 * splits, accurate sizing information, and a number of other
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   707
 * {@link java.util.Spliterator#characteristics() characteristics} of the
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   708
 * spliterator or data that can be used by implementations to optimize
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   709
 * execution.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   710
 *
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   711
 * <p>Spliterators for mutable data sources have an additional challenge;
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   712
 * timing of binding to the data, since the data could change between the time
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   713
 * the spliterator is created and the time the stream pipeline is executed.
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   714
 * Ideally, a spliterator for a stream would report a characteristic of
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   715
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   716
 * {@code IMMUTABLE} or {@code CONCURRENT}; if not it should be
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   717
 * <a href="../Spliterator.html#binding"><em>late-binding</em></a>. If a source
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   718
 * cannot directly supply a recommended spliterator, it may indirectly supply
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   719
 * a spliterator using a {@code Supplier}, and construct a stream via the
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   720
 * {@code Supplier}-accepting versions of
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   721
 * {@link java.util.stream.StreamSupport#stream(Supplier, int, boolean) stream()}.
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   722
 * The spliterator is obtained from the supplier only after the terminal
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   723
 * operation of the stream pipeline commences.
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   724
 *
19859
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   725
 * <p>These requirements significantly reduce the scope of potential
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   726
 * interference between mutations of the stream source and execution of stream
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   727
 * pipelines. Streams based on spliterators with the desired characteristics,
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   728
 * or those using the Supplier-based factory forms, are immune to
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   729
 * modifications of the data source prior to commencement of the terminal
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   730
 * operation (provided the behavioral parameters to the stream operations meet
ac48498acd3a 8024825: Some fixes are missing from java.util.stream spec update
henryjen
parents: 19850
diff changeset
   731
 * the required criteria for non-interference and statelessness).  See
22291
6106c1f013f1 8029696: Broken doc links to package-summary.html#NonInterference in java.util.stream
michaelm
parents: 21339
diff changeset
   732
 * <a href="package-summary.html#NonInterference">Non-Interference</a>
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   733
 * for more details.
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   734
 *
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   735
 * @since 1.8
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   736
 */
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   737
package java.util.stream;
17167
87067e3340d3 8008682: Inital Streams public API
briangoetz
parents:
diff changeset
   738
19850
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   739
import java.util.function.BinaryOperator;
93b368e54c1c 8011916: Spec update for java.util.stream
henryjen
parents: 19214
diff changeset
   740
import java.util.function.UnaryOperator;