jdk/src/share/classes/java/util/stream/Collector.java
changeset 19214 e5901820c3c1
parent 17167 87067e3340d3
child 19850 93b368e54c1c
equal deleted inserted replaced
19213:c360667a0da2 19214:e5901820c3c1
    23  * questions.
    23  * questions.
    24  */
    24  */
    25 package java.util.stream;
    25 package java.util.stream;
    26 
    26 
    27 import java.util.Collections;
    27 import java.util.Collections;
       
    28 import java.util.EnumSet;
    28 import java.util.Set;
    29 import java.util.Set;
    29 import java.util.function.BiFunction;
    30 import java.util.function.BiConsumer;
    30 import java.util.function.BinaryOperator;
    31 import java.util.function.BinaryOperator;
       
    32 import java.util.function.Function;
    31 import java.util.function.Supplier;
    33 import java.util.function.Supplier;
    32 
    34 
    33 /**
    35 /**
    34  * A <a href="package-summary.html#Reduction">reduction operation</a> that
    36  * A <a href="package-summary.html#Reduction">reduction operation</a> that
    35  * supports folding input elements into a cumulative result.  The result may be
    37  * folds input elements into a mutable result container, optionally transforming
    36  * a value or may be a mutable result container.  Examples of operations
    38  * the accumulated result into a final representation after all input elements
    37  * accumulating results into a mutable result container include: accumulating
    39  * have been processed.
    38  * input elements into a {@code Collection}; concatenating strings into a
    40  *
    39  * {@code StringBuilder}; computing summary information about elements such as
    41  * <p>Examples of mutable reduction operations include:
    40  * sum, min, max, or average; computing "pivot table" summaries such as "maximum
    42  * accumulating elements into a {@code Collection}; concatenating
    41  * valued transaction by seller", etc.  Reduction operations can be performed
    43  * strings using a {@code StringBuilder}; computing summary information about
    42  * either sequentially or in parallel.
    44  * elements such as sum, min, max, or average; computing "pivot table" summaries
       
    45  * such as "maximum valued transaction by seller", etc.  Reduction operations
       
    46  * can be performed either sequentially or in parallel.
    43  *
    47  *
    44  * <p>The following are examples of using the predefined {@code Collector}
    48  * <p>The following are examples of using the predefined {@code Collector}
    45  * implementations in {@link Collectors} with the {@code Stream} API to perform
    49  * implementations in {@link Collectors} with the {@code Stream} API to perform
    46  * mutable reduction tasks:
    50  * mutable reduction tasks:
    47  * <pre>{@code
    51  * <pre>{@code
    48  *     // Accumulate elements into a List
    52  *     // Accumulate names into a List
    49  *     List<String> list = stream.collect(Collectors.toList());
    53  *     List<String> list = people.stream().map(Person::getName).collect(Collectors.toList());
    50  *
    54  *
    51  *     // Accumulate elements into a TreeSet
    55  *     // Accumulate names into a TreeSet
    52  *     Set<String> list = stream.collect(Collectors.toCollection(TreeSet::new));
    56  *     Set<String> list = people.stream().map(Person::getName).collect(Collectors.toCollection(TreeSet::new));
    53  *
    57  *
    54  *     // Convert elements to strings and concatenate them, separated by commas
    58  *     // Convert elements to strings and concatenate them, separated by commas
    55  *     String joined = stream.map(Object::toString)
    59  *     String joined = things.stream()
    56  *                           .collect(Collectors.toStringJoiner(", "))
    60  *                           .map(Object::toString)
    57  *                           .toString();
    61  *                           .collect(Collectors.joining(", "));
    58  *
    62  *
    59  *     // Find highest-paid employee
    63  *     // Find highest-paid employee
    60  *     Employee highestPaid = employees.stream()
    64  *     Employee highestPaid = employees.stream()
    61  *                                     .collect(Collectors.maxBy(Comparators.comparing(Employee::getSalary)));
    65  *                                     .collect(Collectors.maxBy(Comparators.comparing(Employee::getSalary)))
       
    66  *                                     .get();
    62  *
    67  *
    63  *     // Group employees by department
    68  *     // Group employees by department
    64  *     Map<Department, List<Employee>> byDept
    69  *     Map<Department, List<Employee>> byDept
    65  *         = employees.stream()
    70  *         = employees.stream()
    66  *                    .collect(Collectors.groupingBy(Employee::getDepartment));
    71  *                    .collect(Collectors.groupingBy(Employee::getDepartment));
    67  *
    72  *
    68  *     // Find highest-paid employee by department
    73  *     // Find highest-paid employee by department
    69  *     Map<Department, Employee> highestPaidByDept
    74  *     Map<Department, Optional<Employee>> highestPaidByDept
    70  *         = employees.stream()
    75  *         = employees.stream()
    71  *                    .collect(Collectors.groupingBy(Employee::getDepartment,
    76  *                    .collect(Collectors.groupingBy(Employee::getDepartment,
    72  *                                                   Collectors.maxBy(Comparators.comparing(Employee::getSalary))));
    77  *                                                   Collectors.maxBy(Comparators.comparing(Employee::getSalary))));
    73  *
    78  *
    74  *     // Partition students into passing and failing
    79  *     // Partition students into passing and failing
    75  *     Map<Boolean, List<Student>> passingFailing =
    80  *     Map<Boolean, List<Student>> passingFailing =
    76  *         students.stream()
    81  *         students.stream()
    77  *                 .collect(Collectors.partitioningBy(s -> s.getGrade() >= PASS_THRESHOLD);
    82  *                 .collect(Collectors.partitioningBy(s -> s.getGrade() >= PASS_THRESHOLD));
    78  *
    83  *
    79  * }</pre>
    84  * }</pre>
    80  *
    85  *
    81  * <p>A {@code Collector} is specified by three functions that work together to
    86  * <p>A {@code Collector} is specified by four functions that work together to
    82  * manage a result or result container.  They are: creation of an initial
    87  * accumulate entries into a mutable result container, and optionally perform
    83  * result, incorporating a new data element into a result, and combining two
    88  * a final transform on the result.  They are: creation of a new result container,
    84  * results into one. The last function -- combining two results into one -- is
    89  * incorporating a new data element into a result container, combining two
    85  * used during parallel operations, where subsets of the input are accumulated
    90  * result containers into one, and performing a final transform on the container.
    86  * in parallel, and then the subresults merged into a combined result. The
    91  * The combiner function is used during parallel operations, where
    87  * result may be a mutable container or a value.  If the result is mutable, the
    92  * subsets of the input are accumulated into separate result
    88  * accumulation and combination functions may either mutate their left argument
    93  * containers, and then the subresults merged into a combined result.  The
    89  * and return that (such as adding elements to a collection), or return a new
    94  * combiner function may merge one set of subresults into the other and return
    90  * result, in which case it should not perform any mutation.
    95  * that, or it may return a new object to describe the combined results.
    91  *
    96  *
    92  * <p>Collectors also have a set of characteristics, including
    97  * <p>Collectors also have a set of characteristics, such as
    93  * {@link Characteristics#CONCURRENT} and
    98  * {@link Characteristics#CONCURRENT}.  These characteristics provide
    94  * {@link Characteristics#STRICTLY_MUTATIVE}.  These characteristics provide
       
    95  * hints that can be used by a reduction implementation to provide better
    99  * hints that can be used by a reduction implementation to provide better
    96  * performance.
   100  * performance.
    97  *
   101  *
    98  * <p>Libraries that implement reduction based on {@code Collector}, such as
   102  * <p>Libraries that implement reduction based on {@code Collector}, such as
    99  * {@link Stream#collect(Collector)}, must adhere to the following constraints:
   103  * {@link Stream#collect(Collector)}, must adhere to the following constraints:
   100  * <ul>
   104  * <ul>
   101  *     <li>The first argument passed to the accumulator function, and both
   105  *     <li>The first argument passed to the accumulator function, both
   102  *     arguments passed to the combiner function, must be the result of a
   106  *     arguments passed to the combiner function, and the argument passed to the
   103  *     previous invocation of {@link #resultSupplier()}, {@link #accumulator()},
   107  *     finisher function must be the result of a previous invocation of the
   104  *     or {@link #combiner()}.</li>
   108  *     result supplier, accumulator, or combiner functions.</li>
   105  *     <li>The implementation should not do anything with the result of any of
   109  *     <li>The implementation should not do anything with the result of any of
   106  *     the result supplier, accumulator, or combiner functions other than to
   110  *     the result supplier, accumulator, or combiner functions other than to
   107  *     pass them again to the accumulator or combiner functions, or return them
   111  *     pass them again to the accumulator, combiner, or finisher functions,
   108  *     to the caller of the reduction operation.</li>
   112  *     or return them to the caller of the reduction operation.</li>
   109  *     <li>If a result is passed to the accumulator or combiner function, and
   113  *     <li>If a result is passed to the combiner or finisher
   110  *     the same object is not returned from that function, it is never used
   114  *     function, and the same object is not returned from that function, it is
   111  *     again.</li>
   115  *     never used again.</li>
   112  *     <li>Once a result is passed to the combiner function, it is never passed
   116  *     <li>Once a result is passed to the combiner or finisher function, it
   113  *     to the accumulator function again.</li>
   117  *     is never passed to the accumulator function again.</li>
   114  *     <li>For non-concurrent collectors, any result returned from the result
   118  *     <li>For non-concurrent collectors, any result returned from the result
   115  *     supplier, accumulator, or combiner functions must be serially
   119  *     supplier, accumulator, or combiner functions must be serially
   116  *     thread-confined.  This enables collection to occur in parallel without
   120  *     thread-confined.  This enables collection to occur in parallel without
   117  *     the {@code Collector} needing to implement any additional synchronization.
   121  *     the {@code Collector} needing to implement any additional synchronization.
   118  *     The reduction implementation must manage that the input is properly
   122  *     The reduction implementation must manage that the input is properly
   130  *
   134  *
   131  * @apiNote
   135  * @apiNote
   132  * Performing a reduction operation with a {@code Collector} should produce a
   136  * Performing a reduction operation with a {@code Collector} should produce a
   133  * result equivalent to:
   137  * result equivalent to:
   134  * <pre>{@code
   138  * <pre>{@code
   135  *     BiFunction<R,T,R> accumulator = collector.accumulator();
   139  *     R container = collector.supplier().get();
   136  *     R result = collector.resultSupplier().get();
       
   137  *     for (T t : data)
   140  *     for (T t : data)
   138  *         result = accumulator.apply(result, t);
   141  *         collector.accumulator().accept(container, t);
   139  *     return result;
   142  *     return collector.finisher().apply(container);
   140  * }</pre>
   143  * }</pre>
   141  *
   144  *
   142  * <p>However, the library is free to partition the input, perform the reduction
   145  * <p>However, the library is free to partition the input, perform the reduction
   143  * on the partitions, and then use the combiner function to combine the partial
   146  * on the partitions, and then use the combiner function to combine the partial
   144  * results to achieve a parallel reduction.  Depending on the specific reduction
   147  * results to achieve a parallel reduction.  Depending on the specific reduction
   147  *
   150  *
   148  * <p>An example of an operation that can be easily modeled by {@code Collector}
   151  * <p>An example of an operation that can be easily modeled by {@code Collector}
   149  * is accumulating elements into a {@code TreeSet}. In this case, the {@code
   152  * is accumulating elements into a {@code TreeSet}. In this case, the {@code
   150  * resultSupplier()} function is {@code () -> new Treeset<T>()}, the
   153  * resultSupplier()} function is {@code () -> new Treeset<T>()}, the
   151  * {@code accumulator} function is
   154  * {@code accumulator} function is
   152  * {@code (set, element) -> { set.add(element); return set; }}, and the combiner
   155  * {@code (set, element) -> set.add(element) }, and the combiner
   153  * function is {@code (left, right) -> { left.addAll(right); return left; }}.
   156  * function is {@code (left, right) -> { left.addAll(right); return left; }}.
   154  * (This behavior is implemented by
   157  * (This behavior is implemented by
   155  * {@code Collectors.toCollection(TreeSet::new)}).
   158  * {@code Collectors.toCollection(TreeSet::new)}).
   156  *
   159  *
   157  * TODO Associativity and commutativity
   160  * TODO Associativity and commutativity
   158  *
   161  *
   159  * @see Stream#collect(Collector)
   162  * @see Stream#collect(Collector)
   160  * @see Collectors
   163  * @see Collectors
   161  *
   164  *
   162  * @param <T> the type of input element to the collect operation
   165  * @param <T> the type of input elements to the reduction operation
   163  * @param <R> the result type of the collect operation
   166  * @param <A> the mutable accumulation type of the reduction operation (often
       
   167  *           hidden as an implementation detail)
       
   168  * @param <R> the result type of the reduction operation
   164  * @since 1.8
   169  * @since 1.8
   165  */
   170  */
   166 public interface Collector<T, R> {
   171 public interface Collector<T, A, R> {
   167     /**
   172     /**
   168      * A function that creates and returns a new result that represents
   173      * A function that creates and returns a new mutable result container.
   169      * "no values".  If the accumulator or combiner functions may mutate their
   174      *
   170      * arguments, this must be a new, empty result container.
   175      * @return a function which returns a new, mutable result container
   171      *
   176      */
   172      * @return a function which, when invoked, returns a result representing
   177     Supplier<A> supplier();
   173      * "no values"
   178 
   174      */
   179     /**
   175     Supplier<R> resultSupplier();
   180      * A function that folds a new value into a mutable result container.
   176 
   181      *
   177     /**
   182      * @return a function which folds a new value into a mutable result container
   178      * A function that folds a new value into a cumulative result.  The result
   183      */
   179      * may be a mutable result container or a value.  The accumulator function
   184     BiConsumer<A, T> accumulator();
   180      * may modify a mutable container and return it, or create a new result and
       
   181      * return that, but if it returns a new result object, it must not modify
       
   182      * any of its arguments.
       
   183      *
       
   184      * <p>If the collector has the {@link Characteristics#STRICTLY_MUTATIVE}
       
   185      * characteristic, then the accumulator function <em>must</em> always return
       
   186      * its first argument, after possibly mutating its state.
       
   187      *
       
   188      * @return a function which folds a new value into a cumulative result
       
   189      */
       
   190     BiFunction<R, T, R> accumulator();
       
   191 
   185 
   192     /**
   186     /**
   193      * A function that accepts two partial results and merges them.  The
   187      * A function that accepts two partial results and merges them.  The
   194      * combiner function may fold state from one argument into the other and
   188      * combiner function may fold state from one argument into the other and
   195      * return that, or may return a new result object, but if it returns
   189      * return that, or may return a new result object.
   196      * a new result object, it must not modify the state of either of its
       
   197      * arguments.
       
   198      *
       
   199      * <p>If the collector has the {@link Characteristics#STRICTLY_MUTATIVE}
       
   200      * characteristic, then the combiner function <em>must</em> always return
       
   201      * its first argument, after possibly mutating its state.
       
   202      *
   190      *
   203      * @return a function which combines two partial results into a cumulative
   191      * @return a function which combines two partial results into a cumulative
   204      * result
   192      * result
   205      */
   193      */
   206     BinaryOperator<R> combiner();
   194     BinaryOperator<A> combiner();
       
   195 
       
   196     /**
       
   197      * Perform the final transformation from the intermediate accumulation type
       
   198      * {@code A} to the final result representation {@code R}.
       
   199      *
       
   200      * <p>If the characteristic {@code IDENTITY_TRANSFORM} is
       
   201      * set, this function may be presumed to be an identity transform with an
       
   202      * unchecked cast from {@code A} to {@code R}.
       
   203      *
       
   204      * @return a function which transforms the intermediate result to the final
       
   205      * result
       
   206      */
       
   207     Function<A, R> finisher();
   207 
   208 
   208     /**
   209     /**
   209      * Returns a {@code Set} of {@code Collector.Characteristics} indicating
   210      * Returns a {@code Set} of {@code Collector.Characteristics} indicating
   210      * the characteristics of this Collector.  This set should be immutable.
   211      * the characteristics of this Collector.  This set should be immutable.
   211      *
   212      *
   212      * @return an immutable set of collector characteristics
   213      * @return an immutable set of collector characteristics
   213      */
   214      */
   214     Set<Characteristics> characteristics();
   215     Set<Characteristics> characteristics();
       
   216 
       
   217     /**
       
   218      * Returns a new {@code Collector} described by the given {@code supplier},
       
   219      * {@code accumulator}, and {@code combiner} functions.  The resulting
       
   220      * {@code Collector} has the {@code Collector.Characteristics.IDENTITY_FINISH}
       
   221      * characteristic.
       
   222      *
       
   223      * @param supplier The supplier function for the new collector
       
   224      * @param accumulator The accumulator function for the new collector
       
   225      * @param combiner The combiner function for the new collector
       
   226      * @param characteristics The collector characteristics for the new
       
   227      *                        collector
       
   228      * @param <T> The type of input elements for the new collector
       
   229      * @param <R> The type of intermediate accumulation result, and final result,
       
   230      *           for the new collector
       
   231      * @return the new {@code Collector}
       
   232      */
       
   233     public static<T, R> Collector<T, R, R> of(Supplier<R> supplier,
       
   234                                               BiConsumer<R, T> accumulator,
       
   235                                               BinaryOperator<R> combiner,
       
   236                                               Characteristics... characteristics) {
       
   237         Set<Characteristics> cs = (characteristics.length == 0)
       
   238                                   ? Collectors.CH_ID
       
   239                                   : Collections.unmodifiableSet(EnumSet.of(Collector.Characteristics.IDENTITY_FINISH,
       
   240                                                                            characteristics));
       
   241         return new Collectors.CollectorImpl<>(supplier, accumulator, combiner, cs);
       
   242     }
       
   243 
       
   244     /**
       
   245      * Returns a new {@code Collector} described by the given {@code supplier},
       
   246      * {@code accumulator}, {@code combiner}, and {@code finisher} functions.
       
   247      *
       
   248      * @param supplier The supplier function for the new collector
       
   249      * @param accumulator The accumulator function for the new collector
       
   250      * @param combiner The combiner function for the new collector
       
   251      * @param finisher The finisher function for the new collector
       
   252      * @param characteristics The collector characteristics for the new
       
   253      *                        collector
       
   254      * @param <T> The type of input elements for the new collector
       
   255      * @param <A> The intermediate accumulation type of the new collector
       
   256      * @param <R> The final result type of the new collector
       
   257      * @return the new {@code Collector}
       
   258      */
       
   259     public static<T, A, R> Collector<T, A, R> of(Supplier<A> supplier,
       
   260                                                  BiConsumer<A, T> accumulator,
       
   261                                                  BinaryOperator<A> combiner,
       
   262                                                  Function<A, R> finisher,
       
   263                                                  Characteristics... characteristics) {
       
   264         Set<Characteristics> cs = Collectors.CH_NOID;
       
   265         if (characteristics.length > 0) {
       
   266             cs = EnumSet.noneOf(Characteristics.class);
       
   267             Collections.addAll(cs, characteristics);
       
   268             cs = Collections.unmodifiableSet(cs);
       
   269         }
       
   270         return new Collectors.CollectorImpl<>(supplier, accumulator, combiner, finisher, cs);
       
   271     }
   215 
   272 
   216     /**
   273     /**
   217      * Characteristics indicating properties of a {@code Collector}, which can
   274      * Characteristics indicating properties of a {@code Collector}, which can
   218      * be used to optimize reduction implementations.
   275      * be used to optimize reduction implementations.
   219      */
   276      */
   220     enum Characteristics {
   277     enum Characteristics {
   221         /**
   278         /**
   222          * Indicates that this collector is <em>concurrent</em>, meaning that
   279          * Indicates that this collector is <em>concurrent</em>, meaning that
   223          * the result container can support the accumulator function being
   280          * the result container can support the accumulator function being
   224          * called concurrently with the same result container from multiple
   281          * called concurrently with the same result container from multiple
   225          * threads. Concurrent collectors must also always have the
   282          * threads.
   226          * {@code STRICTLY_MUTATIVE} characteristic.
       
   227          *
   283          *
   228          * <p>If a {@code CONCURRENT} collector is not also {@code UNORDERED},
   284          * <p>If a {@code CONCURRENT} collector is not also {@code UNORDERED},
   229          * then it should only be evaluated concurrently if applied to an
   285          * then it should only be evaluated concurrently if applied to an
   230          * unordered data source.
   286          * unordered data source.
   231          */
   287          */
   236          * a {@link Set}.
   292          * a {@link Set}.
   237          */
   293          */
   238         UNORDERED,
   294         UNORDERED,
   239 
   295 
   240         /**
   296         /**
   241          * Indicates that this collector operates by strict mutation of its
   297          * Indicates that the finisher function is the identity function and
   242          * result container. This means that the {@link #accumulator()} and
   298          * can be elided.  If set, it must be the case that an unchecked cast
   243          * {@link #combiner()} functions will always modify the state of and
   299          * from A to R will succeed.
   244          * return their first argument, rather than returning a different result
       
   245          * container.
       
   246          */
   300          */
   247         STRICTLY_MUTATIVE
   301         IDENTITY_FINISH
   248     }
   302     }
   249 }
   303 }