17167
|
1 |
/*
|
|
2 |
* Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
|
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 |
*
|
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
|
7 |
* published by the Free Software Foundation. Oracle designates this
|
|
8 |
* particular file as subject to the "Classpath" exception as provided
|
|
9 |
* by Oracle in the LICENSE file that accompanied this code.
|
|
10 |
*
|
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
15 |
* accompanied this code).
|
|
16 |
*
|
|
17 |
* You should have received a copy of the GNU General Public License version
|
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
20 |
*
|
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
22 |
* or visit www.oracle.com if you need additional information or have any
|
|
23 |
* questions.
|
|
24 |
*/
|
|
25 |
package java.util.stream;
|
|
26 |
|
|
27 |
import java.util.Collections;
|
|
28 |
import java.util.Set;
|
|
29 |
import java.util.function.BiFunction;
|
|
30 |
import java.util.function.BinaryOperator;
|
|
31 |
import java.util.function.Supplier;
|
|
32 |
|
|
33 |
/**
|
|
34 |
* A <a href="package-summary.html#Reduction">reduction operation</a> that
|
|
35 |
* supports folding input elements into a cumulative result. The result may be
|
|
36 |
* a value or may be a mutable result container. Examples of operations
|
|
37 |
* accumulating results into a mutable result container include: accumulating
|
|
38 |
* input elements into a {@code Collection}; concatenating strings into a
|
|
39 |
* {@code StringBuilder}; computing summary information about elements such as
|
|
40 |
* sum, min, max, or average; computing "pivot table" summaries such as "maximum
|
|
41 |
* valued transaction by seller", etc. Reduction operations can be performed
|
|
42 |
* either sequentially or in parallel.
|
|
43 |
*
|
|
44 |
* <p>The following are examples of using the predefined {@code Collector}
|
|
45 |
* implementations in {@link Collectors} with the {@code Stream} API to perform
|
|
46 |
* mutable reduction tasks:
|
|
47 |
* <pre>{@code
|
|
48 |
* // Accumulate elements into a List
|
|
49 |
* List<String> list = stream.collect(Collectors.toList());
|
|
50 |
*
|
|
51 |
* // Accumulate elements into a TreeSet
|
|
52 |
* Set<String> list = stream.collect(Collectors.toCollection(TreeSet::new));
|
|
53 |
*
|
|
54 |
* // Convert elements to strings and concatenate them, separated by commas
|
|
55 |
* String joined = stream.map(Object::toString)
|
|
56 |
* .collect(Collectors.toStringJoiner(", "))
|
|
57 |
* .toString();
|
|
58 |
*
|
|
59 |
* // Find highest-paid employee
|
|
60 |
* Employee highestPaid = employees.stream()
|
|
61 |
* .collect(Collectors.maxBy(Comparators.comparing(Employee::getSalary)));
|
|
62 |
*
|
|
63 |
* // Group employees by department
|
|
64 |
* Map<Department, List<Employee>> byDept
|
|
65 |
* = employees.stream()
|
|
66 |
* .collect(Collectors.groupingBy(Employee::getDepartment));
|
|
67 |
*
|
|
68 |
* // Find highest-paid employee by department
|
|
69 |
* Map<Department, Employee> highestPaidByDept
|
|
70 |
* = employees.stream()
|
|
71 |
* .collect(Collectors.groupingBy(Employee::getDepartment,
|
|
72 |
* Collectors.maxBy(Comparators.comparing(Employee::getSalary))));
|
|
73 |
*
|
|
74 |
* // Partition students into passing and failing
|
|
75 |
* Map<Boolean, List<Student>> passingFailing =
|
|
76 |
* students.stream()
|
|
77 |
* .collect(Collectors.partitioningBy(s -> s.getGrade() >= PASS_THRESHOLD);
|
|
78 |
*
|
|
79 |
* }</pre>
|
|
80 |
*
|
|
81 |
* <p>A {@code Collector} is specified by three functions that work together to
|
|
82 |
* manage a result or result container. They are: creation of an initial
|
|
83 |
* result, incorporating a new data element into a result, and combining two
|
|
84 |
* results into one. The last function -- combining two results into one -- is
|
|
85 |
* used during parallel operations, where subsets of the input are accumulated
|
|
86 |
* in parallel, and then the subresults merged into a combined result. The
|
|
87 |
* result may be a mutable container or a value. If the result is mutable, the
|
|
88 |
* accumulation and combination functions may either mutate their left argument
|
|
89 |
* and return that (such as adding elements to a collection), or return a new
|
|
90 |
* result, in which case it should not perform any mutation.
|
|
91 |
*
|
|
92 |
* <p>Collectors also have a set of characteristics, including
|
|
93 |
* {@link Characteristics#CONCURRENT} and
|
|
94 |
* {@link Characteristics#STRICTLY_MUTATIVE}. These characteristics provide
|
|
95 |
* hints that can be used by a reduction implementation to provide better
|
|
96 |
* performance.
|
|
97 |
*
|
|
98 |
* <p>Libraries that implement reduction based on {@code Collector}, such as
|
|
99 |
* {@link Stream#collect(Collector)}, must adhere to the following constraints:
|
|
100 |
* <ul>
|
|
101 |
* <li>The first argument passed to the accumulator function, and both
|
|
102 |
* arguments passed to the combiner function, must be the result of a
|
|
103 |
* previous invocation of {@link #resultSupplier()}, {@link #accumulator()},
|
|
104 |
* or {@link #combiner()}.</li>
|
|
105 |
* <li>The implementation should not do anything with the result of any of
|
|
106 |
* the result supplier, accumulator, or combiner functions other than to
|
|
107 |
* pass them again to the accumulator or combiner functions, or return them
|
|
108 |
* to the caller of the reduction operation.</li>
|
|
109 |
* <li>If a result is passed to the accumulator or combiner function, and
|
|
110 |
* the same object is not returned from that function, it is never used
|
|
111 |
* again.</li>
|
|
112 |
* <li>Once a result is passed to the combiner function, it is never passed
|
|
113 |
* to the accumulator function again.</li>
|
|
114 |
* <li>For non-concurrent collectors, any result returned from the result
|
|
115 |
* supplier, accumulator, or combiner functions must be serially
|
|
116 |
* thread-confined. This enables collection to occur in parallel without
|
|
117 |
* the {@code Collector} needing to implement any additional synchronization.
|
|
118 |
* The reduction implementation must manage that the input is properly
|
|
119 |
* partitioned, that partitions are processed in isolation, and combining
|
|
120 |
* happens only after accumulation is complete.</li>
|
|
121 |
* <li>For concurrent collectors, an implementation is free to (but not
|
|
122 |
* required to) implement reduction concurrently. A concurrent reduction
|
|
123 |
* is one where the accumulator function is called concurrently from
|
|
124 |
* multiple threads, using the same concurrently-modifiable result container,
|
|
125 |
* rather than keeping the result isolated during accumulation.
|
|
126 |
* A concurrent reduction should only be applied if the collector has the
|
|
127 |
* {@link Characteristics#UNORDERED} characteristics or if the
|
|
128 |
* originating data is unordered.</li>
|
|
129 |
* </ul>
|
|
130 |
*
|
|
131 |
* @apiNote
|
|
132 |
* Performing a reduction operation with a {@code Collector} should produce a
|
|
133 |
* result equivalent to:
|
|
134 |
* <pre>{@code
|
|
135 |
* BiFunction<R,T,R> accumulator = collector.accumulator();
|
|
136 |
* R result = collector.resultSupplier().get();
|
|
137 |
* for (T t : data)
|
|
138 |
* result = accumulator.apply(result, t);
|
|
139 |
* return result;
|
|
140 |
* }</pre>
|
|
141 |
*
|
|
142 |
* <p>However, the library is free to partition the input, perform the reduction
|
|
143 |
* on the partitions, and then use the combiner function to combine the partial
|
|
144 |
* results to achieve a parallel reduction. Depending on the specific reduction
|
|
145 |
* operation, this may perform better or worse, depending on the relative cost
|
|
146 |
* of the accumulator and combiner functions.
|
|
147 |
*
|
|
148 |
* <p>An example of an operation that can be easily modeled by {@code Collector}
|
|
149 |
* is accumulating elements into a {@code TreeSet}. In this case, the {@code
|
|
150 |
* resultSupplier()} function is {@code () -> new Treeset<T>()}, the
|
|
151 |
* {@code accumulator} function is
|
|
152 |
* {@code (set, element) -> { set.add(element); return set; }}, and the combiner
|
|
153 |
* function is {@code (left, right) -> { left.addAll(right); return left; }}.
|
|
154 |
* (This behavior is implemented by
|
|
155 |
* {@code Collectors.toCollection(TreeSet::new)}).
|
|
156 |
*
|
|
157 |
* TODO Associativity and commutativity
|
|
158 |
*
|
|
159 |
* @see Stream#collect(Collector)
|
|
160 |
* @see Collectors
|
|
161 |
*
|
|
162 |
* @param <T> the type of input element to the collect operation
|
|
163 |
* @param <R> the result type of the collect operation
|
|
164 |
* @since 1.8
|
|
165 |
*/
|
|
166 |
public interface Collector<T, R> {
|
|
167 |
/**
|
|
168 |
* A function that creates and returns a new result that represents
|
|
169 |
* "no values". If the accumulator or combiner functions may mutate their
|
|
170 |
* arguments, this must be a new, empty result container.
|
|
171 |
*
|
|
172 |
* @return a function which, when invoked, returns a result representing
|
|
173 |
* "no values"
|
|
174 |
*/
|
|
175 |
Supplier<R> resultSupplier();
|
|
176 |
|
|
177 |
/**
|
|
178 |
* A function that folds a new value into a cumulative result. The result
|
|
179 |
* may be a mutable result container or a value. The accumulator function
|
|
180 |
* may modify a mutable container and return it, or create a new result and
|
|
181 |
* return that, but if it returns a new result object, it must not modify
|
|
182 |
* any of its arguments.
|
|
183 |
*
|
|
184 |
* <p>If the collector has the {@link Characteristics#STRICTLY_MUTATIVE}
|
|
185 |
* characteristic, then the accumulator function <em>must</em> always return
|
|
186 |
* its first argument, after possibly mutating its state.
|
|
187 |
*
|
|
188 |
* @return a function which folds a new value into a cumulative result
|
|
189 |
*/
|
|
190 |
BiFunction<R, T, R> accumulator();
|
|
191 |
|
|
192 |
/**
|
|
193 |
* A function that accepts two partial results and merges them. The
|
|
194 |
* combiner function may fold state from one argument into the other and
|
|
195 |
* return that, or may return a new result object, but if it returns
|
|
196 |
* a new result object, it must not modify the state of either of its
|
|
197 |
* arguments.
|
|
198 |
*
|
|
199 |
* <p>If the collector has the {@link Characteristics#STRICTLY_MUTATIVE}
|
|
200 |
* characteristic, then the combiner function <em>must</em> always return
|
|
201 |
* its first argument, after possibly mutating its state.
|
|
202 |
*
|
|
203 |
* @return a function which combines two partial results into a cumulative
|
|
204 |
* result
|
|
205 |
*/
|
|
206 |
BinaryOperator<R> combiner();
|
|
207 |
|
|
208 |
/**
|
|
209 |
* Returns a {@code Set} of {@code Collector.Characteristics} indicating
|
|
210 |
* the characteristics of this Collector. This set should be immutable.
|
|
211 |
*
|
|
212 |
* @return an immutable set of collector characteristics
|
|
213 |
*/
|
|
214 |
Set<Characteristics> characteristics();
|
|
215 |
|
|
216 |
/**
|
|
217 |
* Characteristics indicating properties of a {@code Collector}, which can
|
|
218 |
* be used to optimize reduction implementations.
|
|
219 |
*/
|
|
220 |
enum Characteristics {
|
|
221 |
/**
|
|
222 |
* Indicates that this collector is <em>concurrent</em>, meaning that
|
|
223 |
* the result container can support the accumulator function being
|
|
224 |
* called concurrently with the same result container from multiple
|
|
225 |
* threads. Concurrent collectors must also always have the
|
|
226 |
* {@code STRICTLY_MUTATIVE} characteristic.
|
|
227 |
*
|
|
228 |
* <p>If a {@code CONCURRENT} collector is not also {@code UNORDERED},
|
|
229 |
* then it should only be evaluated concurrently if applied to an
|
|
230 |
* unordered data source.
|
|
231 |
*/
|
|
232 |
CONCURRENT,
|
|
233 |
|
|
234 |
/**
|
|
235 |
* Indicates that the result container has no intrinsic order, such as
|
|
236 |
* a {@link Set}.
|
|
237 |
*/
|
|
238 |
UNORDERED,
|
|
239 |
|
|
240 |
/**
|
|
241 |
* Indicates that this collector operates by strict mutation of its
|
|
242 |
* result container. This means that the {@link #accumulator()} and
|
|
243 |
* {@link #combiner()} functions will always modify the state of and
|
|
244 |
* return their first argument, rather than returning a different result
|
|
245 |
* container.
|
|
246 |
*/
|
|
247 |
STRICTLY_MUTATIVE
|
|
248 |
}
|
|
249 |
}
|