author | dholmes |
Wed, 06 Nov 2019 21:18:42 -0500 | |
changeset 58956 | 9a0a5e70eeb2 |
parent 47216 | 71c04702a3d5 |
permissions | -rw-r--r-- |
9547
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
1 |
/* |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
2 |
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
4 |
* |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
5 |
* This code is free software; you can redistribute it and/or modify it |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
7 |
* published by the Free Software Foundation. |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
8 |
* |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
13 |
* accompanied this code). |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
14 |
* |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
15 |
* You should have received a copy of the GNU General Public License version |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation, |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
18 |
* |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
20 |
* or visit www.oracle.com if you need additional information or have any |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
21 |
* questions. |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
22 |
*/ |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
23 |
|
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
24 |
import java.util.*; |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
25 |
import java.nio.*; |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
26 |
import java.nio.charset.*; |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
27 |
|
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
28 |
public class StrCodingBenchmarkUTF8 { |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
29 |
|
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
30 |
public static void main(String[] args) throws Throwable { |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
31 |
|
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
32 |
final int itrs = Integer.getInteger("iterations", 100000); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
33 |
final int size = 2048; |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
34 |
final int subsize = Integer.getInteger("subsize", 128); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
35 |
final Random rnd = new Random(); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
36 |
final int maxchar = 0x7f; |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
37 |
|
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
38 |
Charset charset = Charset.forName("UTF-8"); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
39 |
final String csn = charset.name(); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
40 |
final Charset cs = charset; |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
41 |
|
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
42 |
int[] starts = new int[] { 0, 0x80, 0x800, 0x10000}; |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
43 |
for (int nb = 1; nb <= 4; nb++) { |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
44 |
|
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
45 |
final CharsetEncoder enc = cs.newEncoder(); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
46 |
|
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
47 |
char[] cc = new char[size]; |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
48 |
int i = 0; |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
49 |
while (i < size - 3) { |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
50 |
i += Character.toChars(starts[nb - 1] + rnd.nextInt(maxchar), cc, i); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
51 |
} |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
52 |
|
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
53 |
final String string = new String(cc); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
54 |
final byte[] bytes = string.getBytes(cs); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
55 |
|
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
56 |
System.out.printf("%n--------%s[nb=%d]---------%n", csn, nb); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
57 |
int sz = 12; |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
58 |
while (sz < size) { |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
59 |
System.out.printf(" [len=%d]%n", sz); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
60 |
final byte[] bs = Arrays.copyOf(bytes, sz); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
61 |
final String str = new String(bs, csn); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
62 |
StrCodingBenchmark.Job[] jobs = { |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
63 |
new StrCodingBenchmark.Job("String decode: csn") { |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
64 |
public void work() throws Throwable { |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
65 |
for (int i = 0; i < itrs; i++) |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
66 |
new String(bs, csn); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
67 |
}}, |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
68 |
|
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
69 |
new StrCodingBenchmark.Job("String decode: cs") { |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
70 |
public void work() throws Throwable { |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
71 |
for (int i = 0; i < itrs; i++) |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
72 |
new String(bs, cs); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
73 |
}}, |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
74 |
|
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
75 |
new StrCodingBenchmark.Job("String encode: csn") { |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
76 |
public void work() throws Throwable { |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
77 |
for (int i = 0; i < itrs; i++) |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
78 |
str.getBytes(csn); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
79 |
}}, |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
80 |
|
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
81 |
new StrCodingBenchmark.Job("String encode: cs") { |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
82 |
public void work() throws Throwable { |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
83 |
for (int i = 0; i < itrs; i++) |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
84 |
str.getBytes(cs); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
85 |
}}, |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
86 |
}; |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
87 |
StrCodingBenchmark.time(StrCodingBenchmark.filter(null, jobs)); |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
88 |
sz <<= 1; |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
89 |
} |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
90 |
} |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
91 |
} |
454881baaca0
7040220: java/char_encodin Optimize UTF-8 charset for String.getBytes()/new String(byte[])
sherman
parents:
diff
changeset
|
92 |
} |