23 * questions. |
23 * questions. |
24 */ |
24 */ |
25 package java.util.stream; |
25 package java.util.stream; |
26 |
26 |
27 import java.util.Collections; |
27 import java.util.Collections; |
|
28 import java.util.EnumSet; |
28 import java.util.Set; |
29 import java.util.Set; |
29 import java.util.function.BiFunction; |
30 import java.util.function.BiConsumer; |
30 import java.util.function.BinaryOperator; |
31 import java.util.function.BinaryOperator; |
|
32 import java.util.function.Function; |
31 import java.util.function.Supplier; |
33 import java.util.function.Supplier; |
32 |
34 |
33 /** |
35 /** |
34 * A <a href="package-summary.html#Reduction">reduction operation</a> that |
36 * A <a href="package-summary.html#Reduction">reduction operation</a> that |
35 * supports folding input elements into a cumulative result. The result may be |
37 * folds input elements into a mutable result container, optionally transforming |
36 * a value or may be a mutable result container. Examples of operations |
38 * the accumulated result into a final representation after all input elements |
37 * accumulating results into a mutable result container include: accumulating |
39 * have been processed. |
38 * input elements into a {@code Collection}; concatenating strings into a |
40 * |
39 * {@code StringBuilder}; computing summary information about elements such as |
41 * <p>Examples of mutable reduction operations include: |
40 * sum, min, max, or average; computing "pivot table" summaries such as "maximum |
42 * accumulating elements into a {@code Collection}; concatenating |
41 * valued transaction by seller", etc. Reduction operations can be performed |
43 * strings using a {@code StringBuilder}; computing summary information about |
42 * either sequentially or in parallel. |
44 * elements such as sum, min, max, or average; computing "pivot table" summaries |
|
45 * such as "maximum valued transaction by seller", etc. Reduction operations |
|
46 * can be performed either sequentially or in parallel. |
43 * |
47 * |
44 * <p>The following are examples of using the predefined {@code Collector} |
48 * <p>The following are examples of using the predefined {@code Collector} |
45 * implementations in {@link Collectors} with the {@code Stream} API to perform |
49 * implementations in {@link Collectors} with the {@code Stream} API to perform |
46 * mutable reduction tasks: |
50 * mutable reduction tasks: |
47 * <pre>{@code |
51 * <pre>{@code |
48 * // Accumulate elements into a List |
52 * // Accumulate names into a List |
49 * List<String> list = stream.collect(Collectors.toList()); |
53 * List<String> list = people.stream().map(Person::getName).collect(Collectors.toList()); |
50 * |
54 * |
51 * // Accumulate elements into a TreeSet |
55 * // Accumulate names into a TreeSet |
52 * Set<String> list = stream.collect(Collectors.toCollection(TreeSet::new)); |
56 * Set<String> list = people.stream().map(Person::getName).collect(Collectors.toCollection(TreeSet::new)); |
53 * |
57 * |
54 * // Convert elements to strings and concatenate them, separated by commas |
58 * // Convert elements to strings and concatenate them, separated by commas |
55 * String joined = stream.map(Object::toString) |
59 * String joined = things.stream() |
56 * .collect(Collectors.toStringJoiner(", ")) |
60 * .map(Object::toString) |
57 * .toString(); |
61 * .collect(Collectors.joining(", ")); |
58 * |
62 * |
59 * // Find highest-paid employee |
63 * // Find highest-paid employee |
60 * Employee highestPaid = employees.stream() |
64 * Employee highestPaid = employees.stream() |
61 * .collect(Collectors.maxBy(Comparators.comparing(Employee::getSalary))); |
65 * .collect(Collectors.maxBy(Comparators.comparing(Employee::getSalary))) |
|
66 * .get(); |
62 * |
67 * |
63 * // Group employees by department |
68 * // Group employees by department |
64 * Map<Department, List<Employee>> byDept |
69 * Map<Department, List<Employee>> byDept |
65 * = employees.stream() |
70 * = employees.stream() |
66 * .collect(Collectors.groupingBy(Employee::getDepartment)); |
71 * .collect(Collectors.groupingBy(Employee::getDepartment)); |
67 * |
72 * |
68 * // Find highest-paid employee by department |
73 * // Find highest-paid employee by department |
69 * Map<Department, Employee> highestPaidByDept |
74 * Map<Department, Optional<Employee>> highestPaidByDept |
70 * = employees.stream() |
75 * = employees.stream() |
71 * .collect(Collectors.groupingBy(Employee::getDepartment, |
76 * .collect(Collectors.groupingBy(Employee::getDepartment, |
72 * Collectors.maxBy(Comparators.comparing(Employee::getSalary)))); |
77 * Collectors.maxBy(Comparators.comparing(Employee::getSalary)))); |
73 * |
78 * |
74 * // Partition students into passing and failing |
79 * // Partition students into passing and failing |
75 * Map<Boolean, List<Student>> passingFailing = |
80 * Map<Boolean, List<Student>> passingFailing = |
76 * students.stream() |
81 * students.stream() |
77 * .collect(Collectors.partitioningBy(s -> s.getGrade() >= PASS_THRESHOLD); |
82 * .collect(Collectors.partitioningBy(s -> s.getGrade() >= PASS_THRESHOLD)); |
78 * |
83 * |
79 * }</pre> |
84 * }</pre> |
80 * |
85 * |
81 * <p>A {@code Collector} is specified by three functions that work together to |
86 * <p>A {@code Collector} is specified by four functions that work together to |
82 * manage a result or result container. They are: creation of an initial |
87 * accumulate entries into a mutable result container, and optionally perform |
83 * result, incorporating a new data element into a result, and combining two |
88 * a final transform on the result. They are: creation of a new result container, |
84 * results into one. The last function -- combining two results into one -- is |
89 * incorporating a new data element into a result container, combining two |
85 * used during parallel operations, where subsets of the input are accumulated |
90 * result containers into one, and performing a final transform on the container. |
86 * in parallel, and then the subresults merged into a combined result. The |
91 * The combiner function is used during parallel operations, where |
87 * result may be a mutable container or a value. If the result is mutable, the |
92 * subsets of the input are accumulated into separate result |
88 * accumulation and combination functions may either mutate their left argument |
93 * containers, and then the subresults merged into a combined result. The |
89 * and return that (such as adding elements to a collection), or return a new |
94 * combiner function may merge one set of subresults into the other and return |
90 * result, in which case it should not perform any mutation. |
95 * that, or it may return a new object to describe the combined results. |
91 * |
96 * |
92 * <p>Collectors also have a set of characteristics, including |
97 * <p>Collectors also have a set of characteristics, such as |
93 * {@link Characteristics#CONCURRENT} and |
98 * {@link Characteristics#CONCURRENT}. These characteristics provide |
94 * {@link Characteristics#STRICTLY_MUTATIVE}. These characteristics provide |
|
95 * hints that can be used by a reduction implementation to provide better |
99 * hints that can be used by a reduction implementation to provide better |
96 * performance. |
100 * performance. |
97 * |
101 * |
98 * <p>Libraries that implement reduction based on {@code Collector}, such as |
102 * <p>Libraries that implement reduction based on {@code Collector}, such as |
99 * {@link Stream#collect(Collector)}, must adhere to the following constraints: |
103 * {@link Stream#collect(Collector)}, must adhere to the following constraints: |
100 * <ul> |
104 * <ul> |
101 * <li>The first argument passed to the accumulator function, and both |
105 * <li>The first argument passed to the accumulator function, both |
102 * arguments passed to the combiner function, must be the result of a |
106 * arguments passed to the combiner function, and the argument passed to the |
103 * previous invocation of {@link #resultSupplier()}, {@link #accumulator()}, |
107 * finisher function must be the result of a previous invocation of the |
104 * or {@link #combiner()}.</li> |
108 * result supplier, accumulator, or combiner functions.</li> |
105 * <li>The implementation should not do anything with the result of any of |
109 * <li>The implementation should not do anything with the result of any of |
106 * the result supplier, accumulator, or combiner functions other than to |
110 * the result supplier, accumulator, or combiner functions other than to |
107 * pass them again to the accumulator or combiner functions, or return them |
111 * pass them again to the accumulator, combiner, or finisher functions, |
108 * to the caller of the reduction operation.</li> |
112 * or return them to the caller of the reduction operation.</li> |
109 * <li>If a result is passed to the accumulator or combiner function, and |
113 * <li>If a result is passed to the combiner or finisher |
110 * the same object is not returned from that function, it is never used |
114 * function, and the same object is not returned from that function, it is |
111 * again.</li> |
115 * never used again.</li> |
112 * <li>Once a result is passed to the combiner function, it is never passed |
116 * <li>Once a result is passed to the combiner or finisher function, it |
113 * to the accumulator function again.</li> |
117 * is never passed to the accumulator function again.</li> |
114 * <li>For non-concurrent collectors, any result returned from the result |
118 * <li>For non-concurrent collectors, any result returned from the result |
115 * supplier, accumulator, or combiner functions must be serially |
119 * supplier, accumulator, or combiner functions must be serially |
116 * thread-confined. This enables collection to occur in parallel without |
120 * thread-confined. This enables collection to occur in parallel without |
117 * the {@code Collector} needing to implement any additional synchronization. |
121 * the {@code Collector} needing to implement any additional synchronization. |
118 * The reduction implementation must manage that the input is properly |
122 * The reduction implementation must manage that the input is properly |
147 * |
150 * |
148 * <p>An example of an operation that can be easily modeled by {@code Collector} |
151 * <p>An example of an operation that can be easily modeled by {@code Collector} |
149 * is accumulating elements into a {@code TreeSet}. In this case, the {@code |
152 * is accumulating elements into a {@code TreeSet}. In this case, the {@code |
150 * resultSupplier()} function is {@code () -> new Treeset<T>()}, the |
153 * resultSupplier()} function is {@code () -> new Treeset<T>()}, the |
151 * {@code accumulator} function is |
154 * {@code accumulator} function is |
152 * {@code (set, element) -> { set.add(element); return set; }}, and the combiner |
155 * {@code (set, element) -> set.add(element) }, and the combiner |
153 * function is {@code (left, right) -> { left.addAll(right); return left; }}. |
156 * function is {@code (left, right) -> { left.addAll(right); return left; }}. |
154 * (This behavior is implemented by |
157 * (This behavior is implemented by |
155 * {@code Collectors.toCollection(TreeSet::new)}). |
158 * {@code Collectors.toCollection(TreeSet::new)}). |
156 * |
159 * |
157 * TODO Associativity and commutativity |
160 * TODO Associativity and commutativity |
158 * |
161 * |
159 * @see Stream#collect(Collector) |
162 * @see Stream#collect(Collector) |
160 * @see Collectors |
163 * @see Collectors |
161 * |
164 * |
162 * @param <T> the type of input element to the collect operation |
165 * @param <T> the type of input elements to the reduction operation |
163 * @param <R> the result type of the collect operation |
166 * @param <A> the mutable accumulation type of the reduction operation (often |
|
167 * hidden as an implementation detail) |
|
168 * @param <R> the result type of the reduction operation |
164 * @since 1.8 |
169 * @since 1.8 |
165 */ |
170 */ |
166 public interface Collector<T, R> { |
171 public interface Collector<T, A, R> { |
167 /** |
172 /** |
168 * A function that creates and returns a new result that represents |
173 * A function that creates and returns a new mutable result container. |
169 * "no values". If the accumulator or combiner functions may mutate their |
174 * |
170 * arguments, this must be a new, empty result container. |
175 * @return a function which returns a new, mutable result container |
171 * |
176 */ |
172 * @return a function which, when invoked, returns a result representing |
177 Supplier<A> supplier(); |
173 * "no values" |
178 |
174 */ |
179 /** |
175 Supplier<R> resultSupplier(); |
180 * A function that folds a new value into a mutable result container. |
176 |
181 * |
177 /** |
182 * @return a function which folds a new value into a mutable result container |
178 * A function that folds a new value into a cumulative result. The result |
183 */ |
179 * may be a mutable result container or a value. The accumulator function |
184 BiConsumer<A, T> accumulator(); |
180 * may modify a mutable container and return it, or create a new result and |
|
181 * return that, but if it returns a new result object, it must not modify |
|
182 * any of its arguments. |
|
183 * |
|
184 * <p>If the collector has the {@link Characteristics#STRICTLY_MUTATIVE} |
|
185 * characteristic, then the accumulator function <em>must</em> always return |
|
186 * its first argument, after possibly mutating its state. |
|
187 * |
|
188 * @return a function which folds a new value into a cumulative result |
|
189 */ |
|
190 BiFunction<R, T, R> accumulator(); |
|
191 |
185 |
192 /** |
186 /** |
193 * A function that accepts two partial results and merges them. The |
187 * A function that accepts two partial results and merges them. The |
194 * combiner function may fold state from one argument into the other and |
188 * combiner function may fold state from one argument into the other and |
195 * return that, or may return a new result object, but if it returns |
189 * return that, or may return a new result object. |
196 * a new result object, it must not modify the state of either of its |
|
197 * arguments. |
|
198 * |
|
199 * <p>If the collector has the {@link Characteristics#STRICTLY_MUTATIVE} |
|
200 * characteristic, then the combiner function <em>must</em> always return |
|
201 * its first argument, after possibly mutating its state. |
|
202 * |
190 * |
203 * @return a function which combines two partial results into a cumulative |
191 * @return a function which combines two partial results into a cumulative |
204 * result |
192 * result |
205 */ |
193 */ |
206 BinaryOperator<R> combiner(); |
194 BinaryOperator<A> combiner(); |
|
195 |
|
196 /** |
|
197 * Perform the final transformation from the intermediate accumulation type |
|
198 * {@code A} to the final result representation {@code R}. |
|
199 * |
|
200 * <p>If the characteristic {@code IDENTITY_TRANSFORM} is |
|
201 * set, this function may be presumed to be an identity transform with an |
|
202 * unchecked cast from {@code A} to {@code R}. |
|
203 * |
|
204 * @return a function which transforms the intermediate result to the final |
|
205 * result |
|
206 */ |
|
207 Function<A, R> finisher(); |
207 |
208 |
208 /** |
209 /** |
209 * Returns a {@code Set} of {@code Collector.Characteristics} indicating |
210 * Returns a {@code Set} of {@code Collector.Characteristics} indicating |
210 * the characteristics of this Collector. This set should be immutable. |
211 * the characteristics of this Collector. This set should be immutable. |
211 * |
212 * |
212 * @return an immutable set of collector characteristics |
213 * @return an immutable set of collector characteristics |
213 */ |
214 */ |
214 Set<Characteristics> characteristics(); |
215 Set<Characteristics> characteristics(); |
|
216 |
|
217 /** |
|
218 * Returns a new {@code Collector} described by the given {@code supplier}, |
|
219 * {@code accumulator}, and {@code combiner} functions. The resulting |
|
220 * {@code Collector} has the {@code Collector.Characteristics.IDENTITY_FINISH} |
|
221 * characteristic. |
|
222 * |
|
223 * @param supplier The supplier function for the new collector |
|
224 * @param accumulator The accumulator function for the new collector |
|
225 * @param combiner The combiner function for the new collector |
|
226 * @param characteristics The collector characteristics for the new |
|
227 * collector |
|
228 * @param <T> The type of input elements for the new collector |
|
229 * @param <R> The type of intermediate accumulation result, and final result, |
|
230 * for the new collector |
|
231 * @return the new {@code Collector} |
|
232 */ |
|
233 public static<T, R> Collector<T, R, R> of(Supplier<R> supplier, |
|
234 BiConsumer<R, T> accumulator, |
|
235 BinaryOperator<R> combiner, |
|
236 Characteristics... characteristics) { |
|
237 Set<Characteristics> cs = (characteristics.length == 0) |
|
238 ? Collectors.CH_ID |
|
239 : Collections.unmodifiableSet(EnumSet.of(Collector.Characteristics.IDENTITY_FINISH, |
|
240 characteristics)); |
|
241 return new Collectors.CollectorImpl<>(supplier, accumulator, combiner, cs); |
|
242 } |
|
243 |
|
244 /** |
|
245 * Returns a new {@code Collector} described by the given {@code supplier}, |
|
246 * {@code accumulator}, {@code combiner}, and {@code finisher} functions. |
|
247 * |
|
248 * @param supplier The supplier function for the new collector |
|
249 * @param accumulator The accumulator function for the new collector |
|
250 * @param combiner The combiner function for the new collector |
|
251 * @param finisher The finisher function for the new collector |
|
252 * @param characteristics The collector characteristics for the new |
|
253 * collector |
|
254 * @param <T> The type of input elements for the new collector |
|
255 * @param <A> The intermediate accumulation type of the new collector |
|
256 * @param <R> The final result type of the new collector |
|
257 * @return the new {@code Collector} |
|
258 */ |
|
259 public static<T, A, R> Collector<T, A, R> of(Supplier<A> supplier, |
|
260 BiConsumer<A, T> accumulator, |
|
261 BinaryOperator<A> combiner, |
|
262 Function<A, R> finisher, |
|
263 Characteristics... characteristics) { |
|
264 Set<Characteristics> cs = Collectors.CH_NOID; |
|
265 if (characteristics.length > 0) { |
|
266 cs = EnumSet.noneOf(Characteristics.class); |
|
267 Collections.addAll(cs, characteristics); |
|
268 cs = Collections.unmodifiableSet(cs); |
|
269 } |
|
270 return new Collectors.CollectorImpl<>(supplier, accumulator, combiner, finisher, cs); |
|
271 } |
215 |
272 |
216 /** |
273 /** |
217 * Characteristics indicating properties of a {@code Collector}, which can |
274 * Characteristics indicating properties of a {@code Collector}, which can |
218 * be used to optimize reduction implementations. |
275 * be used to optimize reduction implementations. |
219 */ |
276 */ |
220 enum Characteristics { |
277 enum Characteristics { |
221 /** |
278 /** |
222 * Indicates that this collector is <em>concurrent</em>, meaning that |
279 * Indicates that this collector is <em>concurrent</em>, meaning that |
223 * the result container can support the accumulator function being |
280 * the result container can support the accumulator function being |
224 * called concurrently with the same result container from multiple |
281 * called concurrently with the same result container from multiple |
225 * threads. Concurrent collectors must also always have the |
282 * threads. |
226 * {@code STRICTLY_MUTATIVE} characteristic. |
|
227 * |
283 * |
228 * <p>If a {@code CONCURRENT} collector is not also {@code UNORDERED}, |
284 * <p>If a {@code CONCURRENT} collector is not also {@code UNORDERED}, |
229 * then it should only be evaluated concurrently if applied to an |
285 * then it should only be evaluated concurrently if applied to an |
230 * unordered data source. |
286 * unordered data source. |
231 */ |
287 */ |