|
1 /* |
|
2 * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Oracle designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Oracle in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 */ |
|
25 package java.util.stream; |
|
26 |
|
27 import java.util.Collections; |
|
28 import java.util.Set; |
|
29 import java.util.function.BiFunction; |
|
30 import java.util.function.BinaryOperator; |
|
31 import java.util.function.Supplier; |
|
32 |
|
33 /** |
|
34 * A <a href="package-summary.html#Reduction">reduction operation</a> that |
|
35 * supports folding input elements into a cumulative result. The result may be |
|
36 * a value or may be a mutable result container. Examples of operations |
|
37 * accumulating results into a mutable result container include: accumulating |
|
38 * input elements into a {@code Collection}; concatenating strings into a |
|
39 * {@code StringBuilder}; computing summary information about elements such as |
|
40 * sum, min, max, or average; computing "pivot table" summaries such as "maximum |
|
41 * valued transaction by seller", etc. Reduction operations can be performed |
|
42 * either sequentially or in parallel. |
|
43 * |
|
44 * <p>The following are examples of using the predefined {@code Collector} |
|
45 * implementations in {@link Collectors} with the {@code Stream} API to perform |
|
46 * mutable reduction tasks: |
|
47 * <pre>{@code |
|
48 * // Accumulate elements into a List |
|
49 * List<String> list = stream.collect(Collectors.toList()); |
|
50 * |
|
51 * // Accumulate elements into a TreeSet |
|
52 * Set<String> list = stream.collect(Collectors.toCollection(TreeSet::new)); |
|
53 * |
|
54 * // Convert elements to strings and concatenate them, separated by commas |
|
55 * String joined = stream.map(Object::toString) |
|
56 * .collect(Collectors.toStringJoiner(", ")) |
|
57 * .toString(); |
|
58 * |
|
59 * // Find highest-paid employee |
|
60 * Employee highestPaid = employees.stream() |
|
61 * .collect(Collectors.maxBy(Comparators.comparing(Employee::getSalary))); |
|
62 * |
|
63 * // Group employees by department |
|
64 * Map<Department, List<Employee>> byDept |
|
65 * = employees.stream() |
|
66 * .collect(Collectors.groupingBy(Employee::getDepartment)); |
|
67 * |
|
68 * // Find highest-paid employee by department |
|
69 * Map<Department, Employee> highestPaidByDept |
|
70 * = employees.stream() |
|
71 * .collect(Collectors.groupingBy(Employee::getDepartment, |
|
72 * Collectors.maxBy(Comparators.comparing(Employee::getSalary)))); |
|
73 * |
|
74 * // Partition students into passing and failing |
|
75 * Map<Boolean, List<Student>> passingFailing = |
|
76 * students.stream() |
|
77 * .collect(Collectors.partitioningBy(s -> s.getGrade() >= PASS_THRESHOLD); |
|
78 * |
|
79 * }</pre> |
|
80 * |
|
81 * <p>A {@code Collector} is specified by three functions that work together to |
|
82 * manage a result or result container. They are: creation of an initial |
|
83 * result, incorporating a new data element into a result, and combining two |
|
84 * results into one. The last function -- combining two results into one -- is |
|
85 * used during parallel operations, where subsets of the input are accumulated |
|
86 * in parallel, and then the subresults merged into a combined result. The |
|
87 * result may be a mutable container or a value. If the result is mutable, the |
|
88 * accumulation and combination functions may either mutate their left argument |
|
89 * and return that (such as adding elements to a collection), or return a new |
|
90 * result, in which case it should not perform any mutation. |
|
91 * |
|
92 * <p>Collectors also have a set of characteristics, including |
|
93 * {@link Characteristics#CONCURRENT} and |
|
94 * {@link Characteristics#STRICTLY_MUTATIVE}. These characteristics provide |
|
95 * hints that can be used by a reduction implementation to provide better |
|
96 * performance. |
|
97 * |
|
98 * <p>Libraries that implement reduction based on {@code Collector}, such as |
|
99 * {@link Stream#collect(Collector)}, must adhere to the following constraints: |
|
100 * <ul> |
|
101 * <li>The first argument passed to the accumulator function, and both |
|
102 * arguments passed to the combiner function, must be the result of a |
|
103 * previous invocation of {@link #resultSupplier()}, {@link #accumulator()}, |
|
104 * or {@link #combiner()}.</li> |
|
105 * <li>The implementation should not do anything with the result of any of |
|
106 * the result supplier, accumulator, or combiner functions other than to |
|
107 * pass them again to the accumulator or combiner functions, or return them |
|
108 * to the caller of the reduction operation.</li> |
|
109 * <li>If a result is passed to the accumulator or combiner function, and |
|
110 * the same object is not returned from that function, it is never used |
|
111 * again.</li> |
|
112 * <li>Once a result is passed to the combiner function, it is never passed |
|
113 * to the accumulator function again.</li> |
|
114 * <li>For non-concurrent collectors, any result returned from the result |
|
115 * supplier, accumulator, or combiner functions must be serially |
|
116 * thread-confined. This enables collection to occur in parallel without |
|
117 * the {@code Collector} needing to implement any additional synchronization. |
|
118 * The reduction implementation must manage that the input is properly |
|
119 * partitioned, that partitions are processed in isolation, and combining |
|
120 * happens only after accumulation is complete.</li> |
|
121 * <li>For concurrent collectors, an implementation is free to (but not |
|
122 * required to) implement reduction concurrently. A concurrent reduction |
|
123 * is one where the accumulator function is called concurrently from |
|
124 * multiple threads, using the same concurrently-modifiable result container, |
|
125 * rather than keeping the result isolated during accumulation. |
|
126 * A concurrent reduction should only be applied if the collector has the |
|
127 * {@link Characteristics#UNORDERED} characteristics or if the |
|
128 * originating data is unordered.</li> |
|
129 * </ul> |
|
130 * |
|
131 * @apiNote |
|
132 * Performing a reduction operation with a {@code Collector} should produce a |
|
133 * result equivalent to: |
|
134 * <pre>{@code |
|
135 * BiFunction<R,T,R> accumulator = collector.accumulator(); |
|
136 * R result = collector.resultSupplier().get(); |
|
137 * for (T t : data) |
|
138 * result = accumulator.apply(result, t); |
|
139 * return result; |
|
140 * }</pre> |
|
141 * |
|
142 * <p>However, the library is free to partition the input, perform the reduction |
|
143 * on the partitions, and then use the combiner function to combine the partial |
|
144 * results to achieve a parallel reduction. Depending on the specific reduction |
|
145 * operation, this may perform better or worse, depending on the relative cost |
|
146 * of the accumulator and combiner functions. |
|
147 * |
|
148 * <p>An example of an operation that can be easily modeled by {@code Collector} |
|
149 * is accumulating elements into a {@code TreeSet}. In this case, the {@code |
|
150 * resultSupplier()} function is {@code () -> new Treeset<T>()}, the |
|
151 * {@code accumulator} function is |
|
152 * {@code (set, element) -> { set.add(element); return set; }}, and the combiner |
|
153 * function is {@code (left, right) -> { left.addAll(right); return left; }}. |
|
154 * (This behavior is implemented by |
|
155 * {@code Collectors.toCollection(TreeSet::new)}). |
|
156 * |
|
157 * TODO Associativity and commutativity |
|
158 * |
|
159 * @see Stream#collect(Collector) |
|
160 * @see Collectors |
|
161 * |
|
162 * @param <T> the type of input element to the collect operation |
|
163 * @param <R> the result type of the collect operation |
|
164 * @since 1.8 |
|
165 */ |
|
166 public interface Collector<T, R> { |
|
167 /** |
|
168 * A function that creates and returns a new result that represents |
|
169 * "no values". If the accumulator or combiner functions may mutate their |
|
170 * arguments, this must be a new, empty result container. |
|
171 * |
|
172 * @return a function which, when invoked, returns a result representing |
|
173 * "no values" |
|
174 */ |
|
175 Supplier<R> resultSupplier(); |
|
176 |
|
177 /** |
|
178 * A function that folds a new value into a cumulative result. The result |
|
179 * may be a mutable result container or a value. The accumulator function |
|
180 * may modify a mutable container and return it, or create a new result and |
|
181 * return that, but if it returns a new result object, it must not modify |
|
182 * any of its arguments. |
|
183 * |
|
184 * <p>If the collector has the {@link Characteristics#STRICTLY_MUTATIVE} |
|
185 * characteristic, then the accumulator function <em>must</em> always return |
|
186 * its first argument, after possibly mutating its state. |
|
187 * |
|
188 * @return a function which folds a new value into a cumulative result |
|
189 */ |
|
190 BiFunction<R, T, R> accumulator(); |
|
191 |
|
192 /** |
|
193 * A function that accepts two partial results and merges them. The |
|
194 * combiner function may fold state from one argument into the other and |
|
195 * return that, or may return a new result object, but if it returns |
|
196 * a new result object, it must not modify the state of either of its |
|
197 * arguments. |
|
198 * |
|
199 * <p>If the collector has the {@link Characteristics#STRICTLY_MUTATIVE} |
|
200 * characteristic, then the combiner function <em>must</em> always return |
|
201 * its first argument, after possibly mutating its state. |
|
202 * |
|
203 * @return a function which combines two partial results into a cumulative |
|
204 * result |
|
205 */ |
|
206 BinaryOperator<R> combiner(); |
|
207 |
|
208 /** |
|
209 * Returns a {@code Set} of {@code Collector.Characteristics} indicating |
|
210 * the characteristics of this Collector. This set should be immutable. |
|
211 * |
|
212 * @return an immutable set of collector characteristics |
|
213 */ |
|
214 Set<Characteristics> characteristics(); |
|
215 |
|
216 /** |
|
217 * Characteristics indicating properties of a {@code Collector}, which can |
|
218 * be used to optimize reduction implementations. |
|
219 */ |
|
220 enum Characteristics { |
|
221 /** |
|
222 * Indicates that this collector is <em>concurrent</em>, meaning that |
|
223 * the result container can support the accumulator function being |
|
224 * called concurrently with the same result container from multiple |
|
225 * threads. Concurrent collectors must also always have the |
|
226 * {@code STRICTLY_MUTATIVE} characteristic. |
|
227 * |
|
228 * <p>If a {@code CONCURRENT} collector is not also {@code UNORDERED}, |
|
229 * then it should only be evaluated concurrently if applied to an |
|
230 * unordered data source. |
|
231 */ |
|
232 CONCURRENT, |
|
233 |
|
234 /** |
|
235 * Indicates that the result container has no intrinsic order, such as |
|
236 * a {@link Set}. |
|
237 */ |
|
238 UNORDERED, |
|
239 |
|
240 /** |
|
241 * Indicates that this collector operates by strict mutation of its |
|
242 * result container. This means that the {@link #accumulator()} and |
|
243 * {@link #combiner()} functions will always modify the state of and |
|
244 * return their first argument, rather than returning a different result |
|
245 * container. |
|
246 */ |
|
247 STRICTLY_MUTATIVE |
|
248 } |
|
249 } |