jdk/src/share/classes/java/util/stream/Collector.java
changeset 17167 87067e3340d3
child 19214 e5901820c3c1
equal deleted inserted replaced
17166:c83a0fa44906 17167:87067e3340d3
       
     1 /*
       
     2  * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 package java.util.stream;
       
    26 
       
    27 import java.util.Collections;
       
    28 import java.util.Set;
       
    29 import java.util.function.BiFunction;
       
    30 import java.util.function.BinaryOperator;
       
    31 import java.util.function.Supplier;
       
    32 
       
    33 /**
       
    34  * A <a href="package-summary.html#Reduction">reduction operation</a> that
       
    35  * supports folding input elements into a cumulative result.  The result may be
       
    36  * a value or may be a mutable result container.  Examples of operations
       
    37  * accumulating results into a mutable result container include: accumulating
       
    38  * input elements into a {@code Collection}; concatenating strings into a
       
    39  * {@code StringBuilder}; computing summary information about elements such as
       
    40  * sum, min, max, or average; computing "pivot table" summaries such as "maximum
       
    41  * valued transaction by seller", etc.  Reduction operations can be performed
       
    42  * either sequentially or in parallel.
       
    43  *
       
    44  * <p>The following are examples of using the predefined {@code Collector}
       
    45  * implementations in {@link Collectors} with the {@code Stream} API to perform
       
    46  * mutable reduction tasks:
       
    47  * <pre>{@code
       
    48  *     // Accumulate elements into a List
       
    49  *     List<String> list = stream.collect(Collectors.toList());
       
    50  *
       
    51  *     // Accumulate elements into a TreeSet
       
    52  *     Set<String> list = stream.collect(Collectors.toCollection(TreeSet::new));
       
    53  *
       
    54  *     // Convert elements to strings and concatenate them, separated by commas
       
    55  *     String joined = stream.map(Object::toString)
       
    56  *                           .collect(Collectors.toStringJoiner(", "))
       
    57  *                           .toString();
       
    58  *
       
    59  *     // Find highest-paid employee
       
    60  *     Employee highestPaid = employees.stream()
       
    61  *                                     .collect(Collectors.maxBy(Comparators.comparing(Employee::getSalary)));
       
    62  *
       
    63  *     // Group employees by department
       
    64  *     Map<Department, List<Employee>> byDept
       
    65  *         = employees.stream()
       
    66  *                    .collect(Collectors.groupingBy(Employee::getDepartment));
       
    67  *
       
    68  *     // Find highest-paid employee by department
       
    69  *     Map<Department, Employee> highestPaidByDept
       
    70  *         = employees.stream()
       
    71  *                    .collect(Collectors.groupingBy(Employee::getDepartment,
       
    72  *                                                   Collectors.maxBy(Comparators.comparing(Employee::getSalary))));
       
    73  *
       
    74  *     // Partition students into passing and failing
       
    75  *     Map<Boolean, List<Student>> passingFailing =
       
    76  *         students.stream()
       
    77  *                 .collect(Collectors.partitioningBy(s -> s.getGrade() >= PASS_THRESHOLD);
       
    78  *
       
    79  * }</pre>
       
    80  *
       
    81  * <p>A {@code Collector} is specified by three functions that work together to
       
    82  * manage a result or result container.  They are: creation of an initial
       
    83  * result, incorporating a new data element into a result, and combining two
       
    84  * results into one. The last function -- combining two results into one -- is
       
    85  * used during parallel operations, where subsets of the input are accumulated
       
    86  * in parallel, and then the subresults merged into a combined result. The
       
    87  * result may be a mutable container or a value.  If the result is mutable, the
       
    88  * accumulation and combination functions may either mutate their left argument
       
    89  * and return that (such as adding elements to a collection), or return a new
       
    90  * result, in which case it should not perform any mutation.
       
    91  *
       
    92  * <p>Collectors also have a set of characteristics, including
       
    93  * {@link Characteristics#CONCURRENT} and
       
    94  * {@link Characteristics#STRICTLY_MUTATIVE}.  These characteristics provide
       
    95  * hints that can be used by a reduction implementation to provide better
       
    96  * performance.
       
    97  *
       
    98  * <p>Libraries that implement reduction based on {@code Collector}, such as
       
    99  * {@link Stream#collect(Collector)}, must adhere to the following constraints:
       
   100  * <ul>
       
   101  *     <li>The first argument passed to the accumulator function, and both
       
   102  *     arguments passed to the combiner function, must be the result of a
       
   103  *     previous invocation of {@link #resultSupplier()}, {@link #accumulator()},
       
   104  *     or {@link #combiner()}.</li>
       
   105  *     <li>The implementation should not do anything with the result of any of
       
   106  *     the result supplier, accumulator, or combiner functions other than to
       
   107  *     pass them again to the accumulator or combiner functions, or return them
       
   108  *     to the caller of the reduction operation.</li>
       
   109  *     <li>If a result is passed to the accumulator or combiner function, and
       
   110  *     the same object is not returned from that function, it is never used
       
   111  *     again.</li>
       
   112  *     <li>Once a result is passed to the combiner function, it is never passed
       
   113  *     to the accumulator function again.</li>
       
   114  *     <li>For non-concurrent collectors, any result returned from the result
       
   115  *     supplier, accumulator, or combiner functions must be serially
       
   116  *     thread-confined.  This enables collection to occur in parallel without
       
   117  *     the {@code Collector} needing to implement any additional synchronization.
       
   118  *     The reduction implementation must manage that the input is properly
       
   119  *     partitioned, that partitions are processed in isolation, and combining
       
   120  *     happens only after accumulation is complete.</li>
       
   121  *     <li>For concurrent collectors, an implementation is free to (but not
       
   122  *     required to) implement reduction concurrently.  A concurrent reduction
       
   123  *     is one where the accumulator function is called concurrently from
       
   124  *     multiple threads, using the same concurrently-modifiable result container,
       
   125  *     rather than keeping the result isolated during accumulation.
       
   126  *     A concurrent reduction should only be applied if the collector has the
       
   127  *     {@link Characteristics#UNORDERED} characteristics or if the
       
   128  *     originating data is unordered.</li>
       
   129  * </ul>
       
   130  *
       
   131  * @apiNote
       
   132  * Performing a reduction operation with a {@code Collector} should produce a
       
   133  * result equivalent to:
       
   134  * <pre>{@code
       
   135  *     BiFunction<R,T,R> accumulator = collector.accumulator();
       
   136  *     R result = collector.resultSupplier().get();
       
   137  *     for (T t : data)
       
   138  *         result = accumulator.apply(result, t);
       
   139  *     return result;
       
   140  * }</pre>
       
   141  *
       
   142  * <p>However, the library is free to partition the input, perform the reduction
       
   143  * on the partitions, and then use the combiner function to combine the partial
       
   144  * results to achieve a parallel reduction.  Depending on the specific reduction
       
   145  * operation, this may perform better or worse, depending on the relative cost
       
   146  * of the accumulator and combiner functions.
       
   147  *
       
   148  * <p>An example of an operation that can be easily modeled by {@code Collector}
       
   149  * is accumulating elements into a {@code TreeSet}. In this case, the {@code
       
   150  * resultSupplier()} function is {@code () -> new Treeset<T>()}, the
       
   151  * {@code accumulator} function is
       
   152  * {@code (set, element) -> { set.add(element); return set; }}, and the combiner
       
   153  * function is {@code (left, right) -> { left.addAll(right); return left; }}.
       
   154  * (This behavior is implemented by
       
   155  * {@code Collectors.toCollection(TreeSet::new)}).
       
   156  *
       
   157  * TODO Associativity and commutativity
       
   158  *
       
   159  * @see Stream#collect(Collector)
       
   160  * @see Collectors
       
   161  *
       
   162  * @param <T> the type of input element to the collect operation
       
   163  * @param <R> the result type of the collect operation
       
   164  * @since 1.8
       
   165  */
       
   166 public interface Collector<T, R> {
       
   167     /**
       
   168      * A function that creates and returns a new result that represents
       
   169      * "no values".  If the accumulator or combiner functions may mutate their
       
   170      * arguments, this must be a new, empty result container.
       
   171      *
       
   172      * @return a function which, when invoked, returns a result representing
       
   173      * "no values"
       
   174      */
       
   175     Supplier<R> resultSupplier();
       
   176 
       
   177     /**
       
   178      * A function that folds a new value into a cumulative result.  The result
       
   179      * may be a mutable result container or a value.  The accumulator function
       
   180      * may modify a mutable container and return it, or create a new result and
       
   181      * return that, but if it returns a new result object, it must not modify
       
   182      * any of its arguments.
       
   183      *
       
   184      * <p>If the collector has the {@link Characteristics#STRICTLY_MUTATIVE}
       
   185      * characteristic, then the accumulator function <em>must</em> always return
       
   186      * its first argument, after possibly mutating its state.
       
   187      *
       
   188      * @return a function which folds a new value into a cumulative result
       
   189      */
       
   190     BiFunction<R, T, R> accumulator();
       
   191 
       
   192     /**
       
   193      * A function that accepts two partial results and merges them.  The
       
   194      * combiner function may fold state from one argument into the other and
       
   195      * return that, or may return a new result object, but if it returns
       
   196      * a new result object, it must not modify the state of either of its
       
   197      * arguments.
       
   198      *
       
   199      * <p>If the collector has the {@link Characteristics#STRICTLY_MUTATIVE}
       
   200      * characteristic, then the combiner function <em>must</em> always return
       
   201      * its first argument, after possibly mutating its state.
       
   202      *
       
   203      * @return a function which combines two partial results into a cumulative
       
   204      * result
       
   205      */
       
   206     BinaryOperator<R> combiner();
       
   207 
       
   208     /**
       
   209      * Returns a {@code Set} of {@code Collector.Characteristics} indicating
       
   210      * the characteristics of this Collector.  This set should be immutable.
       
   211      *
       
   212      * @return an immutable set of collector characteristics
       
   213      */
       
   214     Set<Characteristics> characteristics();
       
   215 
       
   216     /**
       
   217      * Characteristics indicating properties of a {@code Collector}, which can
       
   218      * be used to optimize reduction implementations.
       
   219      */
       
   220     enum Characteristics {
       
   221         /**
       
   222          * Indicates that this collector is <em>concurrent</em>, meaning that
       
   223          * the result container can support the accumulator function being
       
   224          * called concurrently with the same result container from multiple
       
   225          * threads. Concurrent collectors must also always have the
       
   226          * {@code STRICTLY_MUTATIVE} characteristic.
       
   227          *
       
   228          * <p>If a {@code CONCURRENT} collector is not also {@code UNORDERED},
       
   229          * then it should only be evaluated concurrently if applied to an
       
   230          * unordered data source.
       
   231          */
       
   232         CONCURRENT,
       
   233 
       
   234         /**
       
   235          * Indicates that the result container has no intrinsic order, such as
       
   236          * a {@link Set}.
       
   237          */
       
   238         UNORDERED,
       
   239 
       
   240         /**
       
   241          * Indicates that this collector operates by strict mutation of its
       
   242          * result container. This means that the {@link #accumulator()} and
       
   243          * {@link #combiner()} functions will always modify the state of and
       
   244          * return their first argument, rather than returning a different result
       
   245          * container.
       
   246          */
       
   247         STRICTLY_MUTATIVE
       
   248     }
       
   249 }