author | aleonard |
Thu, 10 Oct 2019 10:28:55 +0100 | |
changeset 58561 | 3968bf3673c5 |
parent 47216 | 71c04702a3d5 |
permissions | -rw-r--r-- |
1713 | 1 |
/* |
58561
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
2 |
* Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved. |
1713 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
1713 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
1713 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
1713 | 24 |
*/ |
25 |
||
26 |
package sun.nio.cs; |
|
27 |
||
28 |
import java.nio.Buffer; |
|
29 |
import java.nio.ByteBuffer; |
|
30 |
import java.nio.CharBuffer; |
|
31 |
import java.nio.charset.Charset; |
|
32 |
import java.nio.charset.CharsetDecoder; |
|
33 |
import java.nio.charset.CharsetEncoder; |
|
34 |
import java.nio.charset.CoderResult; |
|
2294 | 35 |
import java.util.Arrays; |
1713 | 36 |
import static sun.nio.cs.CharsetMapping.*; |
37 |
||
38 |
public class SingleByte |
|
39 |
{ |
|
40 |
private static final CoderResult withResult(CoderResult cr, |
|
41 |
Buffer src, int sp, |
|
42 |
Buffer dst, int dp) |
|
43 |
{ |
|
44 |
src.position(sp - src.arrayOffset()); |
|
45 |
dst.position(dp - dst.arrayOffset()); |
|
46 |
return cr; |
|
47 |
} |
|
48 |
||
32649
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
28850
diff
changeset
|
49 |
public static final class Decoder extends CharsetDecoder |
2294 | 50 |
implements ArrayDecoder { |
1713 | 51 |
private final char[] b2c; |
33663 | 52 |
private final boolean isASCIICompatible; |
58561
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
53 |
private final boolean isLatin1Decodable; |
1713 | 54 |
|
55 |
public Decoder(Charset cs, char[] b2c) { |
|
56 |
super(cs, 1.0f, 1.0f); |
|
57 |
this.b2c = b2c; |
|
33663 | 58 |
this.isASCIICompatible = false; |
58561
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
59 |
this.isLatin1Decodable = false; |
33663 | 60 |
} |
61 |
||
62 |
public Decoder(Charset cs, char[] b2c, boolean isASCIICompatible) { |
|
63 |
super(cs, 1.0f, 1.0f); |
|
64 |
this.b2c = b2c; |
|
65 |
this.isASCIICompatible = isASCIICompatible; |
|
58561
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
66 |
this.isLatin1Decodable = false; |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
67 |
} |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
68 |
|
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
69 |
public Decoder(Charset cs, char[] b2c, boolean isASCIICompatible, boolean isLatin1Decodable) { |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
70 |
super(cs, 1.0f, 1.0f); |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
71 |
this.b2c = b2c; |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
72 |
this.isASCIICompatible = isASCIICompatible; |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
73 |
this.isLatin1Decodable = isLatin1Decodable; |
1713 | 74 |
} |
75 |
||
76 |
private CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { |
|
77 |
byte[] sa = src.array(); |
|
78 |
int sp = src.arrayOffset() + src.position(); |
|
79 |
int sl = src.arrayOffset() + src.limit(); |
|
80 |
||
81 |
char[] da = dst.array(); |
|
82 |
int dp = dst.arrayOffset() + dst.position(); |
|
83 |
int dl = dst.arrayOffset() + dst.limit(); |
|
84 |
||
85 |
CoderResult cr = CoderResult.UNDERFLOW; |
|
86 |
if ((dl - dp) < (sl - sp)) { |
|
87 |
sl = sp + (dl - dp); |
|
88 |
cr = CoderResult.OVERFLOW; |
|
89 |
} |
|
90 |
||
91 |
while (sp < sl) { |
|
92 |
char c = decode(sa[sp]); |
|
93 |
if (c == UNMAPPABLE_DECODING) { |
|
94 |
return withResult(CoderResult.unmappableForLength(1), |
|
95 |
src, sp, dst, dp); |
|
96 |
} |
|
97 |
da[dp++] = c; |
|
98 |
sp++; |
|
99 |
} |
|
100 |
return withResult(cr, src, sp, dst, dp); |
|
101 |
} |
|
102 |
||
103 |
private CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { |
|
104 |
int mark = src.position(); |
|
105 |
try { |
|
106 |
while (src.hasRemaining()) { |
|
107 |
char c = decode(src.get()); |
|
108 |
if (c == UNMAPPABLE_DECODING) |
|
109 |
return CoderResult.unmappableForLength(1); |
|
110 |
if (!dst.hasRemaining()) |
|
111 |
return CoderResult.OVERFLOW; |
|
112 |
dst.put(c); |
|
113 |
mark++; |
|
114 |
} |
|
115 |
return CoderResult.UNDERFLOW; |
|
116 |
} finally { |
|
117 |
src.position(mark); |
|
118 |
} |
|
119 |
} |
|
120 |
||
121 |
protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { |
|
122 |
if (src.hasArray() && dst.hasArray()) |
|
123 |
return decodeArrayLoop(src, dst); |
|
124 |
else |
|
125 |
return decodeBufferLoop(src, dst); |
|
126 |
} |
|
127 |
||
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
128 |
public final char decode(int b) { |
1713 | 129 |
return b2c[b + 128]; |
130 |
} |
|
2294 | 131 |
|
132 |
private char repl = '\uFFFD'; |
|
133 |
protected void implReplaceWith(String newReplacement) { |
|
134 |
repl = newReplacement.charAt(0); |
|
135 |
} |
|
136 |
||
33663 | 137 |
@Override |
58561
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
138 |
public int decodeToLatin1(byte[] src, int sp, int len, byte[] dst) { |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
139 |
if (len > dst.length) |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
140 |
len = dst.length; |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
141 |
|
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
142 |
int dp = 0; |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
143 |
while (dp < len) { |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
144 |
dst[dp++] = (byte)decode(src[sp++]); |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
145 |
} |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
146 |
return dp; |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
147 |
} |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
148 |
|
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
149 |
@Override |
2294 | 150 |
public int decode(byte[] src, int sp, int len, char[] dst) { |
151 |
if (len > dst.length) |
|
152 |
len = dst.length; |
|
153 |
int dp = 0; |
|
154 |
while (dp < len) { |
|
155 |
dst[dp] = decode(src[sp++]); |
|
156 |
if (dst[dp] == UNMAPPABLE_DECODING) { |
|
157 |
dst[dp] = repl; |
|
158 |
} |
|
159 |
dp++; |
|
160 |
} |
|
161 |
return dp; |
|
162 |
} |
|
33663 | 163 |
|
164 |
@Override |
|
165 |
public boolean isASCIICompatible() { |
|
166 |
return isASCIICompatible; |
|
167 |
} |
|
58561
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
168 |
|
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
169 |
@Override |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
170 |
public boolean isLatin1Decodable() { |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
171 |
return isLatin1Decodable; |
3968bf3673c5
8231717: Improve performance of charset decoding when charset is always compactable
aleonard
parents:
47216
diff
changeset
|
172 |
} |
1713 | 173 |
} |
174 |
||
32649
2ee9017c7597
8136583: Core libraries should use blessed modifier order
martin
parents:
28850
diff
changeset
|
175 |
public static final class Encoder extends CharsetEncoder |
2294 | 176 |
implements ArrayEncoder { |
1713 | 177 |
private Surrogate.Parser sgp; |
178 |
private final char[] c2b; |
|
179 |
private final char[] c2bIndex; |
|
33663 | 180 |
private final boolean isASCIICompatible; |
1713 | 181 |
|
33663 | 182 |
public Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible) { |
1713 | 183 |
super(cs, 1.0f, 1.0f); |
184 |
this.c2b = c2b; |
|
185 |
this.c2bIndex = c2bIndex; |
|
33663 | 186 |
this.isASCIICompatible = isASCIICompatible; |
1713 | 187 |
} |
188 |
||
189 |
public boolean canEncode(char c) { |
|
190 |
return encode(c) != UNMAPPABLE_ENCODING; |
|
191 |
} |
|
192 |
||
2294 | 193 |
public boolean isLegalReplacement(byte[] repl) { |
194 |
return ((repl.length == 1 && repl[0] == (byte)'?') || |
|
195 |
super.isLegalReplacement(repl)); |
|
196 |
} |
|
197 |
||
1713 | 198 |
private CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { |
199 |
char[] sa = src.array(); |
|
200 |
int sp = src.arrayOffset() + src.position(); |
|
201 |
int sl = src.arrayOffset() + src.limit(); |
|
202 |
||
203 |
byte[] da = dst.array(); |
|
204 |
int dp = dst.arrayOffset() + dst.position(); |
|
205 |
int dl = dst.arrayOffset() + dst.limit(); |
|
28850
4996a75e8bfb
8030179: java/nio/Buffer/Chars.java, testcases seems all pass but jtreg/testng failed with java.lang.AssertionError
sherman
parents:
25859
diff
changeset
|
206 |
int len = Math.min(dl - dp, sl - sp); |
1713 | 207 |
|
28850
4996a75e8bfb
8030179: java/nio/Buffer/Chars.java, testcases seems all pass but jtreg/testng failed with java.lang.AssertionError
sherman
parents:
25859
diff
changeset
|
208 |
while (len-- > 0) { |
1713 | 209 |
char c = sa[sp]; |
210 |
int b = encode(c); |
|
211 |
if (b == UNMAPPABLE_ENCODING) { |
|
3714 | 212 |
if (Character.isSurrogate(c)) { |
1713 | 213 |
if (sgp == null) |
214 |
sgp = new Surrogate.Parser(); |
|
28850
4996a75e8bfb
8030179: java/nio/Buffer/Chars.java, testcases seems all pass but jtreg/testng failed with java.lang.AssertionError
sherman
parents:
25859
diff
changeset
|
215 |
if (sgp.parse(c, sa, sp, sl) < 0) { |
1713 | 216 |
return withResult(sgp.error(), src, sp, dst, dp); |
28850
4996a75e8bfb
8030179: java/nio/Buffer/Chars.java, testcases seems all pass but jtreg/testng failed with java.lang.AssertionError
sherman
parents:
25859
diff
changeset
|
217 |
} |
1713 | 218 |
return withResult(sgp.unmappableResult(), src, sp, dst, dp); |
219 |
} |
|
220 |
return withResult(CoderResult.unmappableForLength(1), |
|
221 |
src, sp, dst, dp); |
|
222 |
} |
|
223 |
da[dp++] = (byte)b; |
|
224 |
sp++; |
|
225 |
} |
|
28850
4996a75e8bfb
8030179: java/nio/Buffer/Chars.java, testcases seems all pass but jtreg/testng failed with java.lang.AssertionError
sherman
parents:
25859
diff
changeset
|
226 |
return withResult(sp < sl ? CoderResult.OVERFLOW : CoderResult.UNDERFLOW, |
4996a75e8bfb
8030179: java/nio/Buffer/Chars.java, testcases seems all pass but jtreg/testng failed with java.lang.AssertionError
sherman
parents:
25859
diff
changeset
|
227 |
src, sp, dst, dp); |
1713 | 228 |
} |
229 |
||
230 |
private CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { |
|
231 |
int mark = src.position(); |
|
232 |
try { |
|
233 |
while (src.hasRemaining()) { |
|
234 |
char c = src.get(); |
|
235 |
int b = encode(c); |
|
236 |
if (b == UNMAPPABLE_ENCODING) { |
|
3714 | 237 |
if (Character.isSurrogate(c)) { |
1713 | 238 |
if (sgp == null) |
239 |
sgp = new Surrogate.Parser(); |
|
240 |
if (sgp.parse(c, src) < 0) |
|
241 |
return sgp.error(); |
|
242 |
return sgp.unmappableResult(); |
|
243 |
} |
|
244 |
return CoderResult.unmappableForLength(1); |
|
245 |
} |
|
246 |
if (!dst.hasRemaining()) |
|
247 |
return CoderResult.OVERFLOW; |
|
248 |
dst.put((byte)b); |
|
249 |
mark++; |
|
250 |
} |
|
251 |
return CoderResult.UNDERFLOW; |
|
252 |
} finally { |
|
253 |
src.position(mark); |
|
254 |
} |
|
255 |
} |
|
256 |
||
257 |
protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { |
|
258 |
if (src.hasArray() && dst.hasArray()) |
|
259 |
return encodeArrayLoop(src, dst); |
|
260 |
else |
|
261 |
return encodeBufferLoop(src, dst); |
|
262 |
} |
|
263 |
||
13366
2f5fdf6d8c22
6653797: Reimplement JDK charset repository charsets.jar
sherman
parents:
5506
diff
changeset
|
264 |
public final int encode(char ch) { |
1713 | 265 |
char index = c2bIndex[ch >> 8]; |
266 |
if (index == UNMAPPABLE_ENCODING) |
|
267 |
return UNMAPPABLE_ENCODING; |
|
268 |
return c2b[index + (ch & 0xff)]; |
|
269 |
} |
|
2294 | 270 |
|
271 |
private byte repl = (byte)'?'; |
|
272 |
protected void implReplaceWith(byte[] newReplacement) { |
|
273 |
repl = newReplacement[0]; |
|
274 |
} |
|
275 |
||
276 |
public int encode(char[] src, int sp, int len, byte[] dst) { |
|
277 |
int dp = 0; |
|
278 |
int sl = sp + Math.min(len, dst.length); |
|
279 |
while (sp < sl) { |
|
280 |
char c = src[sp++]; |
|
281 |
int b = encode(c); |
|
282 |
if (b != UNMAPPABLE_ENCODING) { |
|
283 |
dst[dp++] = (byte)b; |
|
284 |
continue; |
|
285 |
} |
|
3714 | 286 |
if (Character.isHighSurrogate(c) && sp < sl && |
287 |
Character.isLowSurrogate(src[sp])) { |
|
2294 | 288 |
if (len > dst.length) { |
289 |
sl++; |
|
290 |
len--; |
|
291 |
} |
|
292 |
sp++; |
|
293 |
} |
|
294 |
dst[dp++] = repl; |
|
295 |
} |
|
296 |
return dp; |
|
297 |
} |
|
33663 | 298 |
|
299 |
@Override |
|
300 |
public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { |
|
301 |
int dp = 0; |
|
302 |
int sl = sp + Math.min(len, dst.length); |
|
303 |
while (sp < sl) { |
|
304 |
char c = (char)(src[sp++] & 0xff); |
|
305 |
int b = encode(c); |
|
306 |
if (b == UNMAPPABLE_ENCODING) { |
|
307 |
dst[dp++] = repl; |
|
308 |
} else { |
|
309 |
dst[dp++] = (byte)b; |
|
310 |
} |
|
311 |
} |
|
312 |
return dp; |
|
313 |
} |
|
314 |
||
315 |
@Override |
|
316 |
public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { |
|
317 |
int dp = 0; |
|
318 |
int sl = sp + Math.min(len, dst.length); |
|
319 |
while (sp < sl) { |
|
320 |
char c = StringUTF16.getChar(src, sp++); |
|
321 |
int b = encode(c); |
|
322 |
if (b != UNMAPPABLE_ENCODING) { |
|
323 |
dst[dp++] = (byte)b; |
|
324 |
continue; |
|
325 |
} |
|
326 |
if (Character.isHighSurrogate(c) && sp < sl && |
|
327 |
Character.isLowSurrogate(StringUTF16.getChar(src, sp))) { |
|
328 |
if (len > dst.length) { |
|
329 |
sl++; |
|
330 |
len--; |
|
331 |
} |
|
332 |
sp++; |
|
333 |
} |
|
334 |
dst[dp++] = repl; |
|
335 |
} |
|
336 |
return dp; |
|
337 |
} |
|
338 |
||
339 |
@Override |
|
340 |
public boolean isASCIICompatible() { |
|
341 |
return isASCIICompatible; |
|
342 |
} |
|
1713 | 343 |
} |
344 |
||
345 |
// init the c2b and c2bIndex tables from b2c. |
|
346 |
public static void initC2B(char[] b2c, char[] c2bNR, |
|
347 |
char[] c2b, char[] c2bIndex) { |
|
348 |
for (int i = 0; i < c2bIndex.length; i++) |
|
349 |
c2bIndex[i] = UNMAPPABLE_ENCODING; |
|
350 |
for (int i = 0; i < c2b.length; i++) |
|
351 |
c2b[i] = UNMAPPABLE_ENCODING; |
|
352 |
int off = 0; |
|
353 |
for (int i = 0; i < b2c.length; i++) { |
|
354 |
char c = b2c[i]; |
|
355 |
if (c == UNMAPPABLE_DECODING) |
|
356 |
continue; |
|
357 |
int index = (c >> 8); |
|
358 |
if (c2bIndex[index] == UNMAPPABLE_ENCODING) { |
|
359 |
c2bIndex[index] = (char)off; |
|
360 |
off += 0x100; |
|
361 |
} |
|
362 |
index = c2bIndex[index] + (c & 0xff); |
|
363 |
c2b[index] = (char)((i>=0x80)?(i-0x80):(i+0x80)); |
|
364 |
} |
|
365 |
if (c2bNR != null) { |
|
366 |
// c-->b nr entries |
|
367 |
int i = 0; |
|
368 |
while (i < c2bNR.length) { |
|
369 |
char b = c2bNR[i++]; |
|
370 |
char c = c2bNR[i++]; |
|
371 |
int index = (c >> 8); |
|
372 |
if (c2bIndex[index] == UNMAPPABLE_ENCODING) { |
|
373 |
c2bIndex[index] = (char)off; |
|
374 |
off += 0x100; |
|
375 |
} |
|
376 |
index = c2bIndex[index] + (c & 0xff); |
|
377 |
c2b[index] = b; |
|
378 |
} |
|
379 |
} |
|
380 |
} |
|
381 |
} |