author | psandoz |
Wed, 24 Jun 2015 12:05:30 +0200 | |
changeset 31258 | 8e44e5e2563e |
permissions | -rw-r--r-- |
31258
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
1 |
/* |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
2 |
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
4 |
* |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
5 |
* This code is free software; you can redistribute it and/or modify it |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
7 |
* published by the Free Software Foundation. Oracle designates this |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
8 |
* particular file as subject to the "Classpath" exception as provided |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
9 |
* by Oracle in the LICENSE file that accompanied this code. |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
10 |
* |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
15 |
* accompanied this code). |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
16 |
* |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
17 |
* You should have received a copy of the GNU General Public License version |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
20 |
* |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
22 |
* or visit www.oracle.com if you need additional information or have any |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
23 |
* questions. |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
24 |
*/ |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
25 |
package java.nio.file; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
26 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
27 |
import java.io.BufferedReader; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
28 |
import java.io.IOException; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
29 |
import java.io.UncheckedIOException; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
30 |
import java.nio.ByteBuffer; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
31 |
import java.nio.channels.Channels; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
32 |
import java.nio.channels.FileChannel; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
33 |
import java.nio.channels.ReadableByteChannel; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
34 |
import java.nio.charset.Charset; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
35 |
import java.nio.charset.StandardCharsets; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
36 |
import java.util.HashSet; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
37 |
import java.util.Set; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
38 |
import java.util.Spliterator; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
39 |
import java.util.function.Consumer; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
40 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
41 |
/** |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
42 |
* A file-based lines spliterator, leveraging a shared mapped byte buffer and |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
43 |
* associated file channel, covering lines of a file for character encodings |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
44 |
* where line feed characters can be easily identified from character encoded |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
45 |
* bytes. |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
46 |
* |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
47 |
* <p> |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
48 |
* When the root spliterator is first split a mapped byte buffer will be created |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
49 |
* over the file for it's size that was observed when the stream was created. |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
50 |
* Thus a mapped byte buffer is only required for parallel stream execution. |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
51 |
* Sub-spliterators will share that mapped byte buffer. Splitting will use the |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
52 |
* mapped byte buffer to find the closest line feed characters(s) to the left or |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
53 |
* right of the mid-point of covered range of bytes of the file. If a line feed |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
54 |
* is found then the spliterator is split with returned spliterator containing |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
55 |
* the identified line feed characters(s) at the end of it's covered range of |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
56 |
* bytes. |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
57 |
* |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
58 |
* <p> |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
59 |
* Traversing will create a buffered reader, derived from the file channel, for |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
60 |
* the range of bytes of the file. The lines are then read from that buffered |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
61 |
* reader. Once traversing commences no further splitting can be performed and |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
62 |
* the reference to the mapped byte buffer will be set to null. |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
63 |
*/ |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
64 |
final class FileChannelLinesSpliterator implements Spliterator<String> { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
65 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
66 |
static final Set<String> SUPPORTED_CHARSET_NAMES; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
67 |
static { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
68 |
SUPPORTED_CHARSET_NAMES = new HashSet<>(); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
69 |
SUPPORTED_CHARSET_NAMES.add(StandardCharsets.UTF_8.name()); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
70 |
SUPPORTED_CHARSET_NAMES.add(StandardCharsets.ISO_8859_1.name()); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
71 |
SUPPORTED_CHARSET_NAMES.add(StandardCharsets.US_ASCII.name()); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
72 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
73 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
74 |
private final FileChannel fc; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
75 |
private final Charset cs; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
76 |
private int index; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
77 |
private final int fence; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
78 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
79 |
// Null before first split, non-null when splitting, null when traversing |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
80 |
private ByteBuffer buffer; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
81 |
// Non-null when traversing |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
82 |
private BufferedReader reader; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
83 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
84 |
FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence) { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
85 |
this.fc = fc; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
86 |
this.cs = cs; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
87 |
this.index = index; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
88 |
this.fence = fence; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
89 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
90 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
91 |
private FileChannelLinesSpliterator(FileChannel fc, Charset cs, int index, int fence, ByteBuffer buffer) { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
92 |
this.fc = fc; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
93 |
this.buffer = buffer; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
94 |
this.cs = cs; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
95 |
this.index = index; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
96 |
this.fence = fence; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
97 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
98 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
99 |
@Override |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
100 |
public boolean tryAdvance(Consumer<? super String> action) { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
101 |
String line = readLine(); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
102 |
if (line != null) { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
103 |
action.accept(line); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
104 |
return true; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
105 |
} else { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
106 |
return false; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
107 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
108 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
109 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
110 |
@Override |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
111 |
public void forEachRemaining(Consumer<? super String> action) { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
112 |
String line; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
113 |
while ((line = readLine()) != null) { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
114 |
action.accept(line); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
115 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
116 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
117 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
118 |
private BufferedReader getBufferedReader() { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
119 |
/** |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
120 |
* A readable byte channel that reads bytes from an underlying |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
121 |
* file channel over a specified range. |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
122 |
*/ |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
123 |
ReadableByteChannel rrbc = new ReadableByteChannel() { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
124 |
@Override |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
125 |
public int read(ByteBuffer dst) throws IOException { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
126 |
int bytesToRead = fence - index; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
127 |
if (bytesToRead == 0) |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
128 |
return -1; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
129 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
130 |
int bytesRead; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
131 |
if (bytesToRead < dst.remaining()) { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
132 |
// The number of bytes to read is less than remaining |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
133 |
// bytes in the buffer |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
134 |
// Snapshot the limit, reduce it, read, then restore |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
135 |
int oldLimit = dst.limit(); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
136 |
dst.limit(dst.position() + bytesToRead); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
137 |
bytesRead = fc.read(dst, index); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
138 |
dst.limit(oldLimit); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
139 |
} else { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
140 |
bytesRead = fc.read(dst, index); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
141 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
142 |
if (bytesRead == -1) { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
143 |
index = fence; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
144 |
return bytesRead; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
145 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
146 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
147 |
index += bytesRead; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
148 |
return bytesRead; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
149 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
150 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
151 |
@Override |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
152 |
public boolean isOpen() { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
153 |
return fc.isOpen(); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
154 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
155 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
156 |
@Override |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
157 |
public void close() throws IOException { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
158 |
fc.close(); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
159 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
160 |
}; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
161 |
return new BufferedReader(Channels.newReader(rrbc, cs.newDecoder(), -1)); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
162 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
163 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
164 |
private String readLine() { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
165 |
if (reader == null) { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
166 |
reader = getBufferedReader(); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
167 |
buffer = null; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
168 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
169 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
170 |
try { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
171 |
return reader.readLine(); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
172 |
} catch (IOException e) { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
173 |
throw new UncheckedIOException(e); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
174 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
175 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
176 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
177 |
private ByteBuffer getMappedByteBuffer() { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
178 |
// TODO can the mapped byte buffer be explicitly unmapped? |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
179 |
// It's possible, via a shared-secret mechanism, when either |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
180 |
// 1) the spliterator starts traversing, although traversal can |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
181 |
// happen concurrently for mulitple spliterators, so care is |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
182 |
// needed in this case; or |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
183 |
// 2) when the stream is closed using some shared holder to pass |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
184 |
// the mapped byte buffer when it is created. |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
185 |
try { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
186 |
return fc.map(FileChannel.MapMode.READ_ONLY, 0, fence); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
187 |
} catch (IOException e) { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
188 |
throw new UncheckedIOException(e); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
189 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
190 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
191 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
192 |
@Override |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
193 |
public Spliterator<String> trySplit() { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
194 |
// Cannot split after partial traverse |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
195 |
if (reader != null) |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
196 |
return null; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
197 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
198 |
ByteBuffer b; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
199 |
if ((b = buffer) == null) { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
200 |
b = buffer = getMappedByteBuffer(); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
201 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
202 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
203 |
final int hi = fence, lo = index; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
204 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
205 |
// Check if line separator hits the mid point |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
206 |
int mid = (lo + hi) >>> 1; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
207 |
int c = b.get(mid); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
208 |
if (c == '\n') { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
209 |
mid++; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
210 |
} else if (c == '\r') { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
211 |
// Check if a line separator of "\r\n" |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
212 |
if (++mid < hi && b.get(mid) == '\n') { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
213 |
mid++; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
214 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
215 |
} else { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
216 |
// TODO give up after a certain distance from the mid point? |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
217 |
// Scan to the left and right of the mid point |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
218 |
int midL = mid - 1; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
219 |
int midR = mid + 1; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
220 |
mid = 0; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
221 |
while (midL > lo && midR < hi) { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
222 |
// Sample to the left |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
223 |
c = b.get(midL--); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
224 |
if (c == '\n' || c == '\r') { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
225 |
// If c is "\r" then no need to check for "\r\n" |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
226 |
// since the subsequent value was previously checked |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
227 |
mid = midL + 2; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
228 |
break; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
229 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
230 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
231 |
// Sample to the right |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
232 |
c = b.get(midR++); |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
233 |
if (c == '\n' || c == '\r') { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
234 |
mid = midR; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
235 |
// Check if line-separator is "\r\n" |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
236 |
if (c == '\r' && mid < hi && b.get(mid) == '\n') { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
237 |
mid++; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
238 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
239 |
break; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
240 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
241 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
242 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
243 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
244 |
// The left spliterator will have the line-separator at the end |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
245 |
return (mid > lo && mid < hi) |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
246 |
? new FileChannelLinesSpliterator(fc, cs, lo, index = mid, b) |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
247 |
: null; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
248 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
249 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
250 |
@Override |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
251 |
public long estimateSize() { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
252 |
// Use the number of bytes as an estimate. |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
253 |
// We could divide by a constant that is the average number of |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
254 |
// characters per-line, but that constant will be factored out. |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
255 |
return fence - index; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
256 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
257 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
258 |
@Override |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
259 |
public long getExactSizeIfKnown() { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
260 |
return -1; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
261 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
262 |
|
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
263 |
@Override |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
264 |
public int characteristics() { |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
265 |
return Spliterator.ORDERED | Spliterator.NONNULL; |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
266 |
} |
8e44e5e2563e
8072773: (fs) Files.lines needs a better splitting implementation for stream source
psandoz
parents:
diff
changeset
|
267 |
} |