author | jjg |
Wed, 12 Jul 2017 12:48:01 -0700 | |
changeset 45881 | aaec0fbe17ae |
parent 45718 | ba97c984166b |
child 45894 | 995421c69f66 |
permissions | -rw-r--r-- |
2 | 1 |
/* |
45124
144479e89cdb
8179592: Update tables in java.base to be HTML 5-friendly.
jjg
parents:
44851
diff
changeset
|
2 |
* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
package java.nio.charset; |
|
27 |
||
28 |
import java.nio.ByteBuffer; |
|
29 |
import java.nio.CharBuffer; |
|
30 |
import java.nio.charset.spi.CharsetProvider; |
|
31 |
import java.security.AccessController; |
|
32 |
import java.security.PrivilegedAction; |
|
30818
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
33 |
import java.util.Arrays; |
2 | 34 |
import java.util.Collections; |
35 |
import java.util.HashSet; |
|
36 |
import java.util.Iterator; |
|
37 |
import java.util.Locale; |
|
38 |
import java.util.Map; |
|
39 |
import java.util.NoSuchElementException; |
|
33675
7d9d372a41df
8141652: Rename methods Objects.nonNullElse* to requireNonNullElse*
rriggs
parents:
32143
diff
changeset
|
40 |
import java.util.Objects; |
2 | 41 |
import java.util.Set; |
42 |
import java.util.ServiceLoader; |
|
43 |
import java.util.ServiceConfigurationError; |
|
44 |
import java.util.SortedMap; |
|
45 |
import java.util.TreeMap; |
|
34882 | 46 |
import jdk.internal.misc.VM; |
2 | 47 |
import sun.nio.cs.StandardCharsets; |
48 |
import sun.nio.cs.ThreadLocalCoders; |
|
49 |
import sun.security.action.GetPropertyAction; |
|
50 |
||
51 |
||
52 |
/** |
|
53 |
* A named mapping between sequences of sixteen-bit Unicode <a |
|
54 |
* href="../../lang/Character.html#unicode">code units</a> and sequences of |
|
55 |
* bytes. This class defines methods for creating decoders and encoders and |
|
56 |
* for retrieving the various names associated with a charset. Instances of |
|
57 |
* this class are immutable. |
|
58 |
* |
|
59 |
* <p> This class also defines static methods for testing whether a particular |
|
60 |
* charset is supported, for locating charset instances by name, and for |
|
61 |
* constructing a map that contains every charset for which support is |
|
62 |
* available in the current Java virtual machine. Support for new charsets can |
|
63 |
* be added via the service-provider interface defined in the {@link |
|
64 |
* java.nio.charset.spi.CharsetProvider} class. |
|
65 |
* |
|
66 |
* <p> All of the methods defined in this class are safe for use by multiple |
|
67 |
* concurrent threads. |
|
68 |
* |
|
69 |
* |
|
44844
b2b4d98404ba
8179364: update "<a name=" in java.base module to use id attribute
jjg
parents:
43790
diff
changeset
|
70 |
* <a id="names"></a><a id="charenc"></a> |
18574
4aeaeb541678
8019380: doclint warnings in java.nio, java.nio.file.**, java.nio.channels.**
alanb
parents:
18164
diff
changeset
|
71 |
* <h2>Charset names</h2> |
2 | 72 |
* |
73 |
* <p> Charsets are named by strings composed of the following characters: |
|
74 |
* |
|
75 |
* <ul> |
|
76 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
77 |
* <li> The uppercase letters {@code 'A'} through {@code 'Z'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
78 |
* (<code>'\u0041'</code> through <code>'\u005a'</code>), |
2 | 79 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
80 |
* <li> The lowercase letters {@code 'a'} through {@code 'z'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
81 |
* (<code>'\u0061'</code> through <code>'\u007a'</code>), |
2 | 82 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
83 |
* <li> The digits {@code '0'} through {@code '9'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
84 |
* (<code>'\u0030'</code> through <code>'\u0039'</code>), |
2 | 85 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
86 |
* <li> The dash character {@code '-'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
87 |
* (<code>'\u002d'</code>, <small>HYPHEN-MINUS</small>), |
2 | 88 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
89 |
* <li> The plus character {@code '+'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
90 |
* (<code>'\u002b'</code>, <small>PLUS SIGN</small>), |
1146
bbcbf6618d8c
4849617: (cs)Revise Charset spec to allow '+' in names
sherman
parents:
895
diff
changeset
|
91 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
92 |
* <li> The period character {@code '.'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
93 |
* (<code>'\u002e'</code>, <small>FULL STOP</small>), |
2 | 94 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
95 |
* <li> The colon character {@code ':'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
96 |
* (<code>'\u003a'</code>, <small>COLON</small>), and |
2 | 97 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
98 |
* <li> The underscore character {@code '_'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
99 |
* (<code>'\u005f'</code>, <small>LOW LINE</small>). |
2 | 100 |
* |
101 |
* </ul> |
|
102 |
* |
|
103 |
* A charset name must begin with either a letter or a digit. The empty string |
|
104 |
* is not a legal charset name. Charset names are not case-sensitive; that is, |
|
105 |
* case is always ignored when comparing charset names. Charset names |
|
106 |
* generally follow the conventions documented in <a |
|
107 |
* href="http://www.ietf.org/rfc/rfc2278.txt"><i>RFC 2278: IANA Charset |
|
108 |
* Registration Procedures</i></a>. |
|
109 |
* |
|
110 |
* <p> Every charset has a <i>canonical name</i> and may also have one or more |
|
111 |
* <i>aliases</i>. The canonical name is returned by the {@link #name() name} method |
|
112 |
* of this class. Canonical names are, by convention, usually in upper case. |
|
113 |
* The aliases of a charset are returned by the {@link #aliases() aliases} |
|
114 |
* method. |
|
115 |
* |
|
44844
b2b4d98404ba
8179364: update "<a name=" in java.base module to use id attribute
jjg
parents:
43790
diff
changeset
|
116 |
* <p><a id="hn">Some charsets have an <i>historical name</i> that is defined for |
19034 | 117 |
* compatibility with previous versions of the Java platform.</a> A charset's |
2 | 118 |
* historical name is either its canonical name or one of its aliases. The |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
119 |
* historical name is returned by the {@code getEncoding()} methods of the |
2 | 120 |
* {@link java.io.InputStreamReader#getEncoding InputStreamReader} and {@link |
121 |
* java.io.OutputStreamWriter#getEncoding OutputStreamWriter} classes. |
|
122 |
* |
|
44844
b2b4d98404ba
8179364: update "<a name=" in java.base module to use id attribute
jjg
parents:
43790
diff
changeset
|
123 |
* <p><a id="iana"> </a>If a charset listed in the <a |
2 | 124 |
* href="http://www.iana.org/assignments/character-sets"><i>IANA Charset |
125 |
* Registry</i></a> is supported by an implementation of the Java platform then |
|
21801
b8a5ff5f0c2a
8028049: Tidy warnings cleanup for packages java.nio/java.io
yan
parents:
21278
diff
changeset
|
126 |
* its canonical name must be the name listed in the registry. Many charsets |
2 | 127 |
* are given more than one name in the registry, in which case the registry |
128 |
* identifies one of the names as <i>MIME-preferred</i>. If a charset has more |
|
129 |
* than one registry name then its canonical name must be the MIME-preferred |
|
130 |
* name and the other names in the registry must be valid aliases. If a |
|
131 |
* supported charset is not listed in the IANA registry then its canonical name |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
132 |
* must begin with one of the strings {@code "X-"} or {@code "x-"}. |
2 | 133 |
* |
134 |
* <p> The IANA charset registry does change over time, and so the canonical |
|
135 |
* name and the aliases of a particular charset may also change over time. To |
|
136 |
* ensure compatibility it is recommended that no alias ever be removed from a |
|
137 |
* charset, and that if the canonical name of a charset is changed then its |
|
138 |
* previous canonical name be made into an alias. |
|
139 |
* |
|
140 |
* |
|
18574
4aeaeb541678
8019380: doclint warnings in java.nio, java.nio.file.**, java.nio.channels.**
alanb
parents:
18164
diff
changeset
|
141 |
* <h2>Standard charsets</h2> |
2 | 142 |
* |
19034 | 143 |
* |
9526
a80328f995f1
4884238: Adds java.nio.charset.StandardCharset to provide static final constants for the standard charsets.
mduigou
parents:
7668
diff
changeset
|
144 |
* |
44844
b2b4d98404ba
8179364: update "<a name=" in java.base module to use id attribute
jjg
parents:
43790
diff
changeset
|
145 |
* <p><a id="standard">Every implementation of the Java platform is required to support the |
19034 | 146 |
* following standard charsets.</a> Consult the release documentation for your |
2 | 147 |
* implementation to see if any other charsets are supported. The behavior |
148 |
* of such optional charsets may differ between implementations. |
|
149 |
* |
|
45124
144479e89cdb
8179592: Update tables in java.base to be HTML 5-friendly.
jjg
parents:
44851
diff
changeset
|
150 |
* <blockquote><table class="striped" style="width:80%"> |
144479e89cdb
8179592: Update tables in java.base to be HTML 5-friendly.
jjg
parents:
44851
diff
changeset
|
151 |
* <caption style="display:none">Description of standard charsets</caption> |
144479e89cdb
8179592: Update tables in java.base to be HTML 5-friendly.
jjg
parents:
44851
diff
changeset
|
152 |
* <thead> |
45881
aaec0fbe17ae
8184208: update class="striped" tables for accessibility
jjg
parents:
45718
diff
changeset
|
153 |
* <tr><th scope="col" style="text-align:left">Charset</th><th scope="col" style="text-align:left">Description</th></tr> |
45124
144479e89cdb
8179592: Update tables in java.base to be HTML 5-friendly.
jjg
parents:
44851
diff
changeset
|
154 |
* </thead> |
144479e89cdb
8179592: Update tables in java.base to be HTML 5-friendly.
jjg
parents:
44851
diff
changeset
|
155 |
* <tbody> |
45881
aaec0fbe17ae
8184208: update class="striped" tables for accessibility
jjg
parents:
45718
diff
changeset
|
156 |
* <tr><th scope="row" style="vertical-align:top">{@code US-ASCII}</th> |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
157 |
* <td>Seven-bit ASCII, a.k.a. {@code ISO646-US}, |
2 | 158 |
* a.k.a. the Basic Latin block of the Unicode character set</td></tr> |
45881
aaec0fbe17ae
8184208: update class="striped" tables for accessibility
jjg
parents:
45718
diff
changeset
|
159 |
* <tr><th scope="row" style="vertical-align:top"><code>ISO-8859-1 </code></th> |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
160 |
* <td>ISO Latin Alphabet No. 1, a.k.a. {@code ISO-LATIN-1}</td></tr> |
45881
aaec0fbe17ae
8184208: update class="striped" tables for accessibility
jjg
parents:
45718
diff
changeset
|
161 |
* <tr><th scope="row" style="vertical-align:top">{@code UTF-8}</th> |
2 | 162 |
* <td>Eight-bit UCS Transformation Format</td></tr> |
45881
aaec0fbe17ae
8184208: update class="striped" tables for accessibility
jjg
parents:
45718
diff
changeset
|
163 |
* <tr><th scope="row" style="vertical-align:top">{@code UTF-16BE}</th> |
2 | 164 |
* <td>Sixteen-bit UCS Transformation Format, |
165 |
* big-endian byte order</td></tr> |
|
45881
aaec0fbe17ae
8184208: update class="striped" tables for accessibility
jjg
parents:
45718
diff
changeset
|
166 |
* <tr><th scope="row" style="vertical-align:top">{@code UTF-16LE}</th> |
2 | 167 |
* <td>Sixteen-bit UCS Transformation Format, |
168 |
* little-endian byte order</td></tr> |
|
45881
aaec0fbe17ae
8184208: update class="striped" tables for accessibility
jjg
parents:
45718
diff
changeset
|
169 |
* <tr><th scope="row" style="vertical-align:top">{@code UTF-16}</th> |
2 | 170 |
* <td>Sixteen-bit UCS Transformation Format, |
171 |
* byte order identified by an optional byte-order mark</td></tr> |
|
45124
144479e89cdb
8179592: Update tables in java.base to be HTML 5-friendly.
jjg
parents:
44851
diff
changeset
|
172 |
* </tbody> |
2 | 173 |
* </table></blockquote> |
174 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
175 |
* <p> The {@code UTF-8} charset is specified by <a |
2 | 176 |
* href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC 2279</i></a>; the |
177 |
* transformation format upon which it is based is specified in |
|
178 |
* Amendment 2 of ISO 10646-1 and is also described in the <a |
|
179 |
* href="http://www.unicode.org/unicode/standard/standard.html"><i>Unicode |
|
180 |
* Standard</i></a>. |
|
181 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
182 |
* <p> The {@code UTF-16} charsets are specified by <a |
2 | 183 |
* href="http://www.ietf.org/rfc/rfc2781.txt"><i>RFC 2781</i></a>; the |
184 |
* transformation formats upon which they are based are specified in |
|
185 |
* Amendment 1 of ISO 10646-1 and are also described in the <a |
|
186 |
* href="http://www.unicode.org/unicode/standard/standard.html"><i>Unicode |
|
187 |
* Standard</i></a>. |
|
188 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
189 |
* <p> The {@code UTF-16} charsets use sixteen-bit quantities and are |
2 | 190 |
* therefore sensitive to byte order. In these encodings the byte order of a |
191 |
* stream may be indicated by an initial <i>byte-order mark</i> represented by |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
192 |
* the Unicode character <code>'\uFEFF'</code>. Byte-order marks are handled |
2 | 193 |
* as follows: |
194 |
* |
|
195 |
* <ul> |
|
196 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
197 |
* <li><p> When decoding, the {@code UTF-16BE} and {@code UTF-16LE} |
784
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
198 |
* charsets interpret the initial byte-order marks as a <small>ZERO-WIDTH |
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
199 |
* NON-BREAKING SPACE</small>; when encoding, they do not write |
2 | 200 |
* byte-order marks. </p></li> |
784
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
201 |
|
2 | 202 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
203 |
* <li><p> When decoding, the {@code UTF-16} charset interprets the |
784
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
204 |
* byte-order mark at the beginning of the input stream to indicate the |
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
205 |
* byte-order of the stream but defaults to big-endian if there is no |
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
206 |
* byte-order mark; when encoding, it uses big-endian byte order and writes |
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
207 |
* a big-endian byte-order mark. </p></li> |
2 | 208 |
* |
209 |
* </ul> |
|
210 |
* |
|
21278 | 211 |
* In any case, byte order marks occurring after the first element of an |
784
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
212 |
* input sequence are not omitted since the same code is used to represent |
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
213 |
* <small>ZERO-WIDTH NON-BREAKING SPACE</small>. |
2 | 214 |
* |
215 |
* <p> Every instance of the Java virtual machine has a default charset, which |
|
216 |
* may or may not be one of the standard charsets. The default charset is |
|
217 |
* determined during virtual-machine startup and typically depends upon the |
|
218 |
* locale and charset being used by the underlying operating system. </p> |
|
219 |
* |
|
9676
5663e62f8d7e
7041612: Rename StandardCharset to StandardCharsets
mduigou
parents:
9526
diff
changeset
|
220 |
* <p>The {@link StandardCharsets} class defines constants for each of the |
9526
a80328f995f1
4884238: Adds java.nio.charset.StandardCharset to provide static final constants for the standard charsets.
mduigou
parents:
7668
diff
changeset
|
221 |
* standard charsets. |
2 | 222 |
* |
18574
4aeaeb541678
8019380: doclint warnings in java.nio, java.nio.file.**, java.nio.channels.**
alanb
parents:
18164
diff
changeset
|
223 |
* <h2>Terminology</h2> |
2 | 224 |
* |
40 | 225 |
* <p> The name of this class is taken from the terms used in |
226 |
* <a href="http://www.ietf.org/rfc/rfc2278.txt"><i>RFC 2278</i></a>. |
|
227 |
* In that document a <i>charset</i> is defined as the combination of |
|
228 |
* one or more coded character sets and a character-encoding scheme. |
|
229 |
* (This definition is confusing; some other software systems define |
|
230 |
* <i>charset</i> as a synonym for <i>coded character set</i>.) |
|
2 | 231 |
* |
232 |
* <p> A <i>coded character set</i> is a mapping between a set of abstract |
|
233 |
* characters and a set of integers. US-ASCII, ISO 8859-1, |
|
40 | 234 |
* JIS X 0201, and Unicode are examples of coded character sets. |
235 |
* |
|
236 |
* <p> Some standards have defined a <i>character set</i> to be simply a |
|
237 |
* set of abstract characters without an associated assigned numbering. |
|
238 |
* An alphabet is an example of such a character set. However, the subtle |
|
239 |
* distinction between <i>character set</i> and <i>coded character set</i> |
|
240 |
* is rarely used in practice; the former has become a short form for the |
|
241 |
* latter, including in the Java API specification. |
|
2 | 242 |
* |
40 | 243 |
* <p> A <i>character-encoding scheme</i> is a mapping between one or more |
244 |
* coded character sets and a set of octet (eight-bit byte) sequences. |
|
245 |
* UTF-8, UTF-16, ISO 2022, and EUC are examples of |
|
246 |
* character-encoding schemes. Encoding schemes are often associated with |
|
247 |
* a particular coded character set; UTF-8, for example, is used only to |
|
248 |
* encode Unicode. Some schemes, however, are associated with multiple |
|
249 |
* coded character sets; EUC, for example, can be used to encode |
|
250 |
* characters in a variety of Asian coded character sets. |
|
2 | 251 |
* |
252 |
* <p> When a coded character set is used exclusively with a single |
|
40 | 253 |
* character-encoding scheme then the corresponding charset is usually |
254 |
* named for the coded character set; otherwise a charset is usually named |
|
255 |
* for the encoding scheme and, possibly, the locale of the coded |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
256 |
* character sets that it supports. Hence {@code US-ASCII} is both the |
40 | 257 |
* name of a coded character set and of the charset that encodes it, while |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
258 |
* {@code EUC-JP} is the name of the charset that encodes the |
2 | 259 |
* JIS X 0201, JIS X 0208, and JIS X 0212 |
40 | 260 |
* coded character sets for the Japanese language. |
2 | 261 |
* |
262 |
* <p> The native character encoding of the Java programming language is |
|
40 | 263 |
* UTF-16. A charset in the Java platform therefore defines a mapping |
264 |
* between sequences of sixteen-bit UTF-16 code units (that is, sequences |
|
265 |
* of chars) and sequences of bytes. </p> |
|
2 | 266 |
* |
267 |
* |
|
268 |
* @author Mark Reinhold |
|
269 |
* @author JSR-51 Expert Group |
|
270 |
* @since 1.4 |
|
271 |
* |
|
272 |
* @see CharsetDecoder |
|
273 |
* @see CharsetEncoder |
|
274 |
* @see java.nio.charset.spi.CharsetProvider |
|
275 |
* @see java.lang.Character |
|
276 |
*/ |
|
277 |
||
278 |
public abstract class Charset |
|
279 |
implements Comparable<Charset> |
|
280 |
{ |
|
281 |
||
282 |
/* -- Static methods -- */ |
|
283 |
||
284 |
/** |
|
285 |
* Checks that the given string is a legal charset name. </p> |
|
286 |
* |
|
287 |
* @param s |
|
288 |
* A purported charset name |
|
289 |
* |
|
290 |
* @throws IllegalCharsetNameException |
|
291 |
* If the given name is not a legal charset name |
|
292 |
*/ |
|
293 |
private static void checkName(String s) { |
|
294 |
int n = s.length(); |
|
45718
ba97c984166b
8182743: Ineffective use of volatile hurts performance of Charset.atBugLevel()
clanger
parents:
45124
diff
changeset
|
295 |
if (n == 0) { |
43790
b9e56c7fba7e
8174831: Reduce number of Charset classes loaded on bootstrap
redestad
parents:
37781
diff
changeset
|
296 |
throw new IllegalCharsetNameException(s); |
2 | 297 |
} |
298 |
for (int i = 0; i < n; i++) { |
|
299 |
char c = s.charAt(i); |
|
300 |
if (c >= 'A' && c <= 'Z') continue; |
|
301 |
if (c >= 'a' && c <= 'z') continue; |
|
302 |
if (c >= '0' && c <= '9') continue; |
|
303 |
if (c == '-' && i != 0) continue; |
|
1146
bbcbf6618d8c
4849617: (cs)Revise Charset spec to allow '+' in names
sherman
parents:
895
diff
changeset
|
304 |
if (c == '+' && i != 0) continue; |
2 | 305 |
if (c == ':' && i != 0) continue; |
306 |
if (c == '_' && i != 0) continue; |
|
307 |
if (c == '.' && i != 0) continue; |
|
308 |
throw new IllegalCharsetNameException(s); |
|
309 |
} |
|
310 |
} |
|
311 |
||
312 |
/* The standard set of charsets */ |
|
43790
b9e56c7fba7e
8174831: Reduce number of Charset classes loaded on bootstrap
redestad
parents:
37781
diff
changeset
|
313 |
private static final CharsetProvider standardProvider = new StandardCharsets(); |
b9e56c7fba7e
8174831: Reduce number of Charset classes loaded on bootstrap
redestad
parents:
37781
diff
changeset
|
314 |
|
b9e56c7fba7e
8174831: Reduce number of Charset classes loaded on bootstrap
redestad
parents:
37781
diff
changeset
|
315 |
private static final String[] zeroAliases = new String[0]; |
2 | 316 |
|
317 |
// Cache of the most-recently-returned charsets, |
|
318 |
// along with the names that were used to find them |
|
319 |
// |
|
34774
03b4e6dc367b
8145680: Remove unnecessary explicit initialization of volatile variables in java.base
redestad
parents:
33675
diff
changeset
|
320 |
private static volatile Object[] cache1; // "Level 1" cache |
03b4e6dc367b
8145680: Remove unnecessary explicit initialization of volatile variables in java.base
redestad
parents:
33675
diff
changeset
|
321 |
private static volatile Object[] cache2; // "Level 2" cache |
2 | 322 |
|
323 |
private static void cache(String charsetName, Charset cs) { |
|
324 |
cache2 = cache1; |
|
325 |
cache1 = new Object[] { charsetName, cs }; |
|
326 |
} |
|
327 |
||
328 |
// Creates an iterator that walks over the available providers, ignoring |
|
329 |
// those whose lookup or instantiation causes a security exception to be |
|
330 |
// thrown. Should be invoked with full privileges. |
|
331 |
// |
|
10137
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
332 |
private static Iterator<CharsetProvider> providers() { |
29986
97167d851fc4
8078467: Update core libraries to use diamond with anonymous classes
darcy
parents:
25859
diff
changeset
|
333 |
return new Iterator<>() { |
2 | 334 |
ClassLoader cl = ClassLoader.getSystemClassLoader(); |
335 |
ServiceLoader<CharsetProvider> sl = |
|
336 |
ServiceLoader.load(CharsetProvider.class, cl); |
|
337 |
Iterator<CharsetProvider> i = sl.iterator(); |
|
10137
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
338 |
CharsetProvider next = null; |
2 | 339 |
|
340 |
private boolean getNext() { |
|
341 |
while (next == null) { |
|
342 |
try { |
|
343 |
if (!i.hasNext()) |
|
344 |
return false; |
|
345 |
next = i.next(); |
|
346 |
} catch (ServiceConfigurationError sce) { |
|
347 |
if (sce.getCause() instanceof SecurityException) { |
|
348 |
// Ignore security exceptions |
|
349 |
continue; |
|
350 |
} |
|
351 |
throw sce; |
|
352 |
} |
|
353 |
} |
|
354 |
return true; |
|
355 |
} |
|
356 |
||
357 |
public boolean hasNext() { |
|
358 |
return getNext(); |
|
359 |
} |
|
360 |
||
10137
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
361 |
public CharsetProvider next() { |
2 | 362 |
if (!getNext()) |
363 |
throw new NoSuchElementException(); |
|
10137
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
364 |
CharsetProvider n = next; |
2 | 365 |
next = null; |
366 |
return n; |
|
367 |
} |
|
368 |
||
369 |
public void remove() { |
|
370 |
throw new UnsupportedOperationException(); |
|
371 |
} |
|
372 |
||
373 |
}; |
|
374 |
} |
|
375 |
||
376 |
// Thread-local gate to prevent recursive provider lookups |
|
10137
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
377 |
private static ThreadLocal<ThreadLocal<?>> gate = |
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
378 |
new ThreadLocal<ThreadLocal<?>>(); |
2 | 379 |
|
380 |
private static Charset lookupViaProviders(final String charsetName) { |
|
381 |
||
382 |
// The runtime startup sequence looks up standard charsets as a |
|
383 |
// consequence of the VM's invocation of System.initializeSystemClass |
|
384 |
// in order to, e.g., set system properties and encode filenames. At |
|
385 |
// that point the application class loader has not been initialized, |
|
386 |
// however, so we can't look for providers because doing so will cause |
|
387 |
// that loader to be prematurely initialized with incomplete |
|
388 |
// information. |
|
389 |
// |
|
34882 | 390 |
if (!VM.isBooted()) |
2 | 391 |
return null; |
392 |
||
393 |
if (gate.get() != null) |
|
394 |
// Avoid recursive provider lookups |
|
395 |
return null; |
|
396 |
try { |
|
397 |
gate.set(gate); |
|
398 |
||
399 |
return AccessController.doPrivileged( |
|
29986
97167d851fc4
8078467: Update core libraries to use diamond with anonymous classes
darcy
parents:
25859
diff
changeset
|
400 |
new PrivilegedAction<>() { |
2 | 401 |
public Charset run() { |
10137
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
402 |
for (Iterator<CharsetProvider> i = providers(); |
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
403 |
i.hasNext();) { |
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
404 |
CharsetProvider cp = i.next(); |
2 | 405 |
Charset cs = cp.charsetForName(charsetName); |
406 |
if (cs != null) |
|
407 |
return cs; |
|
408 |
} |
|
409 |
return null; |
|
410 |
} |
|
411 |
}); |
|
412 |
||
413 |
} finally { |
|
414 |
gate.set(null); |
|
415 |
} |
|
416 |
} |
|
417 |
||
418 |
/* The extended set of charsets */ |
|
17470
b65cf2b5983b
8012326: Deadlock occurs when Charset.availableCharsets() is called by several threads at the same time
sherman
parents:
10882
diff
changeset
|
419 |
private static class ExtendedProviderHolder { |
30818
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
420 |
static final CharsetProvider[] extendedProviders = extendedProviders(); |
17470
b65cf2b5983b
8012326: Deadlock occurs when Charset.availableCharsets() is called by several threads at the same time
sherman
parents:
10882
diff
changeset
|
421 |
// returns ExtendedProvider, if installed |
30818
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
422 |
private static CharsetProvider[] extendedProviders() { |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
423 |
return AccessController.doPrivileged(new PrivilegedAction<>() { |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
424 |
public CharsetProvider[] run() { |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
425 |
CharsetProvider[] cps = new CharsetProvider[1]; |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
426 |
int n = 0; |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
427 |
ServiceLoader<CharsetProvider> sl = |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
428 |
ServiceLoader.loadInstalled(CharsetProvider.class); |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
429 |
for (CharsetProvider cp : sl) { |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
430 |
if (n + 1 > cps.length) { |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
431 |
cps = Arrays.copyOf(cps, cps.length << 1); |
17470
b65cf2b5983b
8012326: Deadlock occurs when Charset.availableCharsets() is called by several threads at the same time
sherman
parents:
10882
diff
changeset
|
432 |
} |
30818
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
433 |
cps[n++] = cp; |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
434 |
} |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
435 |
return n == cps.length ? cps : Arrays.copyOf(cps, n); |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
436 |
}}); |
17470
b65cf2b5983b
8012326: Deadlock occurs when Charset.availableCharsets() is called by several threads at the same time
sherman
parents:
10882
diff
changeset
|
437 |
} |
2 | 438 |
} |
439 |
||
440 |
private static Charset lookupExtendedCharset(String charsetName) { |
|
34882 | 441 |
if (!VM.isBooted()) // see lookupViaProviders() |
30818
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
442 |
return null; |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
443 |
CharsetProvider[] ecps = ExtendedProviderHolder.extendedProviders; |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
444 |
for (CharsetProvider cp : ecps) { |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
445 |
Charset cs = cp.charsetForName(charsetName); |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
446 |
if (cs != null) |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
447 |
return cs; |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
448 |
} |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
449 |
return null; |
2 | 450 |
} |
451 |
||
452 |
private static Charset lookup(String charsetName) { |
|
453 |
if (charsetName == null) |
|
454 |
throw new IllegalArgumentException("Null charset name"); |
|
455 |
Object[] a; |
|
456 |
if ((a = cache1) != null && charsetName.equals(a[0])) |
|
457 |
return (Charset)a[1]; |
|
458 |
// We expect most programs to use one Charset repeatedly. |
|
459 |
// We convey a hint to this effect to the VM by putting the |
|
460 |
// level 1 cache miss code in a separate method. |
|
461 |
return lookup2(charsetName); |
|
462 |
} |
|
463 |
||
464 |
private static Charset lookup2(String charsetName) { |
|
465 |
Object[] a; |
|
466 |
if ((a = cache2) != null && charsetName.equals(a[0])) { |
|
467 |
cache2 = cache1; |
|
468 |
cache1 = a; |
|
469 |
return (Charset)a[1]; |
|
470 |
} |
|
471 |
Charset cs; |
|
472 |
if ((cs = standardProvider.charsetForName(charsetName)) != null || |
|
473 |
(cs = lookupExtendedCharset(charsetName)) != null || |
|
474 |
(cs = lookupViaProviders(charsetName)) != null) |
|
475 |
{ |
|
476 |
cache(charsetName, cs); |
|
477 |
return cs; |
|
478 |
} |
|
479 |
||
480 |
/* Only need to check the name if we didn't find a charset for it */ |
|
481 |
checkName(charsetName); |
|
482 |
return null; |
|
483 |
} |
|
484 |
||
485 |
/** |
|
18164 | 486 |
* Tells whether the named charset is supported. |
2 | 487 |
* |
488 |
* @param charsetName |
|
489 |
* The name of the requested charset; may be either |
|
490 |
* a canonical name or an alias |
|
491 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
492 |
* @return {@code true} if, and only if, support for the named charset |
2 | 493 |
* is available in the current Java virtual machine |
494 |
* |
|
495 |
* @throws IllegalCharsetNameException |
|
496 |
* If the given charset name is illegal |
|
497 |
* |
|
498 |
* @throws IllegalArgumentException |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
499 |
* If the given {@code charsetName} is null |
2 | 500 |
*/ |
501 |
public static boolean isSupported(String charsetName) { |
|
502 |
return (lookup(charsetName) != null); |
|
503 |
} |
|
504 |
||
505 |
/** |
|
18164 | 506 |
* Returns a charset object for the named charset. |
2 | 507 |
* |
508 |
* @param charsetName |
|
509 |
* The name of the requested charset; may be either |
|
510 |
* a canonical name or an alias |
|
511 |
* |
|
512 |
* @return A charset object for the named charset |
|
513 |
* |
|
514 |
* @throws IllegalCharsetNameException |
|
515 |
* If the given charset name is illegal |
|
516 |
* |
|
517 |
* @throws IllegalArgumentException |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
518 |
* If the given {@code charsetName} is null |
2 | 519 |
* |
520 |
* @throws UnsupportedCharsetException |
|
521 |
* If no support for the named charset is available |
|
522 |
* in this instance of the Java virtual machine |
|
523 |
*/ |
|
524 |
public static Charset forName(String charsetName) { |
|
525 |
Charset cs = lookup(charsetName); |
|
526 |
if (cs != null) |
|
527 |
return cs; |
|
528 |
throw new UnsupportedCharsetException(charsetName); |
|
529 |
} |
|
530 |
||
531 |
// Fold charsets from the given iterator into the given map, ignoring |
|
532 |
// charsets whose names already have entries in the map. |
|
533 |
// |
|
895 | 534 |
private static void put(Iterator<Charset> i, Map<String,Charset> m) { |
2 | 535 |
while (i.hasNext()) { |
895 | 536 |
Charset cs = i.next(); |
2 | 537 |
if (!m.containsKey(cs.name())) |
538 |
m.put(cs.name(), cs); |
|
539 |
} |
|
540 |
} |
|
541 |
||
542 |
/** |
|
543 |
* Constructs a sorted map from canonical charset names to charset objects. |
|
544 |
* |
|
545 |
* <p> The map returned by this method will have one entry for each charset |
|
546 |
* for which support is available in the current Java virtual machine. If |
|
547 |
* two or more supported charsets have the same canonical name then the |
|
548 |
* resulting map will contain just one of them; which one it will contain |
|
549 |
* is not specified. </p> |
|
550 |
* |
|
551 |
* <p> The invocation of this method, and the subsequent use of the |
|
552 |
* resulting map, may cause time-consuming disk or network I/O operations |
|
553 |
* to occur. This method is provided for applications that need to |
|
554 |
* enumerate all of the available charsets, for example to allow user |
|
555 |
* charset selection. This method is not used by the {@link #forName |
|
556 |
* forName} method, which instead employs an efficient incremental lookup |
|
557 |
* algorithm. |
|
558 |
* |
|
559 |
* <p> This method may return different results at different times if new |
|
560 |
* charset providers are dynamically made available to the current Java |
|
561 |
* virtual machine. In the absence of such changes, the charsets returned |
|
562 |
* by this method are exactly those that can be retrieved via the {@link |
|
563 |
* #forName forName} method. </p> |
|
564 |
* |
|
565 |
* @return An immutable, case-insensitive map from canonical charset names |
|
566 |
* to charset objects |
|
567 |
*/ |
|
568 |
public static SortedMap<String,Charset> availableCharsets() { |
|
569 |
return AccessController.doPrivileged( |
|
29986
97167d851fc4
8078467: Update core libraries to use diamond with anonymous classes
darcy
parents:
25859
diff
changeset
|
570 |
new PrivilegedAction<>() { |
2 | 571 |
public SortedMap<String,Charset> run() { |
572 |
TreeMap<String,Charset> m = |
|
29986
97167d851fc4
8078467: Update core libraries to use diamond with anonymous classes
darcy
parents:
25859
diff
changeset
|
573 |
new TreeMap<>( |
36411
f0cd8358b5ea
8151384: Improve String.CASE_INSENSITIVE_ORDER and remove sun.misc.ASCIICaseInsensitiveComparator
chegar
parents:
34882
diff
changeset
|
574 |
String.CASE_INSENSITIVE_ORDER); |
2 | 575 |
put(standardProvider.charsets(), m); |
30818
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
576 |
CharsetProvider[] ecps = ExtendedProviderHolder.extendedProviders; |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
577 |
for (CharsetProvider ecp :ecps) { |
17470
b65cf2b5983b
8012326: Deadlock occurs when Charset.availableCharsets() is called by several threads at the same time
sherman
parents:
10882
diff
changeset
|
578 |
put(ecp.charsets(), m); |
30818
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
579 |
} |
10137
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
580 |
for (Iterator<CharsetProvider> i = providers(); i.hasNext();) { |
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
581 |
CharsetProvider cp = i.next(); |
2 | 582 |
put(cp.charsets(), m); |
583 |
} |
|
584 |
return Collections.unmodifiableSortedMap(m); |
|
585 |
} |
|
586 |
}); |
|
587 |
} |
|
588 |
||
589 |
private static volatile Charset defaultCharset; |
|
590 |
||
591 |
/** |
|
592 |
* Returns the default charset of this Java virtual machine. |
|
593 |
* |
|
594 |
* <p> The default charset is determined during virtual-machine startup and |
|
595 |
* typically depends upon the locale and charset of the underlying |
|
596 |
* operating system. |
|
597 |
* |
|
598 |
* @return A charset object for the default charset |
|
599 |
* |
|
600 |
* @since 1.5 |
|
601 |
*/ |
|
602 |
public static Charset defaultCharset() { |
|
603 |
if (defaultCharset == null) { |
|
604 |
synchronized (Charset.class) { |
|
37781
71ed5645f17c
8155775: Re-examine naming of privileged methods to access System properties
redestad
parents:
37593
diff
changeset
|
605 |
String csn = GetPropertyAction |
71ed5645f17c
8155775: Re-examine naming of privileged methods to access System properties
redestad
parents:
37593
diff
changeset
|
606 |
.privilegedGetProperty("file.encoding"); |
2 | 607 |
Charset cs = lookup(csn); |
608 |
if (cs != null) |
|
609 |
defaultCharset = cs; |
|
610 |
else |
|
611 |
defaultCharset = forName("UTF-8"); |
|
612 |
} |
|
613 |
} |
|
614 |
return defaultCharset; |
|
615 |
} |
|
616 |
||
617 |
||
618 |
/* -- Instance fields and methods -- */ |
|
619 |
||
620 |
private final String name; // tickles a bug in oldjavac |
|
621 |
private final String[] aliases; // tickles a bug in oldjavac |
|
895 | 622 |
private Set<String> aliasSet = null; |
2 | 623 |
|
624 |
/** |
|
625 |
* Initializes a new charset with the given canonical name and alias |
|
18164 | 626 |
* set. |
2 | 627 |
* |
628 |
* @param canonicalName |
|
629 |
* The canonical name of this charset |
|
630 |
* |
|
631 |
* @param aliases |
|
632 |
* An array of this charset's aliases, or null if it has no aliases |
|
633 |
* |
|
634 |
* @throws IllegalCharsetNameException |
|
635 |
* If the canonical name or any of the aliases are illegal |
|
636 |
*/ |
|
637 |
protected Charset(String canonicalName, String[] aliases) { |
|
638 |
checkName(canonicalName); |
|
33675
7d9d372a41df
8141652: Rename methods Objects.nonNullElse* to requireNonNullElse*
rriggs
parents:
32143
diff
changeset
|
639 |
String[] as = Objects.requireNonNullElse(aliases, zeroAliases); |
2 | 640 |
for (int i = 0; i < as.length; i++) |
641 |
checkName(as[i]); |
|
642 |
this.name = canonicalName; |
|
643 |
this.aliases = as; |
|
644 |
} |
|
645 |
||
646 |
/** |
|
18164 | 647 |
* Returns this charset's canonical name. |
2 | 648 |
* |
649 |
* @return The canonical name of this charset |
|
650 |
*/ |
|
651 |
public final String name() { |
|
652 |
return name; |
|
653 |
} |
|
654 |
||
655 |
/** |
|
18164 | 656 |
* Returns a set containing this charset's aliases. |
2 | 657 |
* |
658 |
* @return An immutable set of this charset's aliases |
|
659 |
*/ |
|
660 |
public final Set<String> aliases() { |
|
661 |
if (aliasSet != null) |
|
662 |
return aliasSet; |
|
663 |
int n = aliases.length; |
|
29986
97167d851fc4
8078467: Update core libraries to use diamond with anonymous classes
darcy
parents:
25859
diff
changeset
|
664 |
HashSet<String> hs = new HashSet<>(n); |
2 | 665 |
for (int i = 0; i < n; i++) |
666 |
hs.add(aliases[i]); |
|
667 |
aliasSet = Collections.unmodifiableSet(hs); |
|
668 |
return aliasSet; |
|
669 |
} |
|
670 |
||
671 |
/** |
|
672 |
* Returns this charset's human-readable name for the default locale. |
|
673 |
* |
|
674 |
* <p> The default implementation of this method simply returns this |
|
675 |
* charset's canonical name. Concrete subclasses of this class may |
|
676 |
* override this method in order to provide a localized display name. </p> |
|
677 |
* |
|
678 |
* @return The display name of this charset in the default locale |
|
679 |
*/ |
|
680 |
public String displayName() { |
|
681 |
return name; |
|
682 |
} |
|
683 |
||
684 |
/** |
|
685 |
* Tells whether or not this charset is registered in the <a |
|
686 |
* href="http://www.iana.org/assignments/character-sets">IANA Charset |
|
18164 | 687 |
* Registry</a>. |
2 | 688 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
689 |
* @return {@code true} if, and only if, this charset is known by its |
2 | 690 |
* implementor to be registered with the IANA |
691 |
*/ |
|
692 |
public final boolean isRegistered() { |
|
693 |
return !name.startsWith("X-") && !name.startsWith("x-"); |
|
694 |
} |
|
695 |
||
696 |
/** |
|
697 |
* Returns this charset's human-readable name for the given locale. |
|
698 |
* |
|
699 |
* <p> The default implementation of this method simply returns this |
|
700 |
* charset's canonical name. Concrete subclasses of this class may |
|
701 |
* override this method in order to provide a localized display name. </p> |
|
702 |
* |
|
703 |
* @param locale |
|
704 |
* The locale for which the display name is to be retrieved |
|
705 |
* |
|
706 |
* @return The display name of this charset in the given locale |
|
707 |
*/ |
|
708 |
public String displayName(Locale locale) { |
|
709 |
return name; |
|
710 |
} |
|
711 |
||
712 |
/** |
|
713 |
* Tells whether or not this charset contains the given charset. |
|
714 |
* |
|
715 |
* <p> A charset <i>C</i> is said to <i>contain</i> a charset <i>D</i> if, |
|
716 |
* and only if, every character representable in <i>D</i> is also |
|
717 |
* representable in <i>C</i>. If this relationship holds then it is |
|
718 |
* guaranteed that every string that can be encoded in <i>D</i> can also be |
|
719 |
* encoded in <i>C</i> without performing any replacements. |
|
720 |
* |
|
721 |
* <p> That <i>C</i> contains <i>D</i> does not imply that each character |
|
722 |
* representable in <i>C</i> by a particular byte sequence is represented |
|
723 |
* in <i>D</i> by the same byte sequence, although sometimes this is the |
|
724 |
* case. |
|
725 |
* |
|
726 |
* <p> Every charset contains itself. |
|
727 |
* |
|
728 |
* <p> This method computes an approximation of the containment relation: |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
729 |
* If it returns {@code true} then the given charset is known to be |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
730 |
* contained by this charset; if it returns {@code false}, however, then |
2 | 731 |
* it is not necessarily the case that the given charset is not contained |
732 |
* in this charset. |
|
733 |
* |
|
18574
4aeaeb541678
8019380: doclint warnings in java.nio, java.nio.file.**, java.nio.channels.**
alanb
parents:
18164
diff
changeset
|
734 |
* @param cs |
4aeaeb541678
8019380: doclint warnings in java.nio, java.nio.file.**, java.nio.channels.**
alanb
parents:
18164
diff
changeset
|
735 |
* The given charset |
4aeaeb541678
8019380: doclint warnings in java.nio, java.nio.file.**, java.nio.channels.**
alanb
parents:
18164
diff
changeset
|
736 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
737 |
* @return {@code true} if the given charset is contained in this charset |
2 | 738 |
*/ |
739 |
public abstract boolean contains(Charset cs); |
|
740 |
||
741 |
/** |
|
18164 | 742 |
* Constructs a new decoder for this charset. |
2 | 743 |
* |
744 |
* @return A new decoder for this charset |
|
745 |
*/ |
|
746 |
public abstract CharsetDecoder newDecoder(); |
|
747 |
||
748 |
/** |
|
18164 | 749 |
* Constructs a new encoder for this charset. |
2 | 750 |
* |
751 |
* @return A new encoder for this charset |
|
752 |
* |
|
753 |
* @throws UnsupportedOperationException |
|
754 |
* If this charset does not support encoding |
|
755 |
*/ |
|
756 |
public abstract CharsetEncoder newEncoder(); |
|
757 |
||
758 |
/** |
|
759 |
* Tells whether or not this charset supports encoding. |
|
760 |
* |
|
761 |
* <p> Nearly all charsets support encoding. The primary exceptions are |
|
762 |
* special-purpose <i>auto-detect</i> charsets whose decoders can determine |
|
763 |
* which of several possible encoding schemes is in use by examining the |
|
764 |
* input byte sequence. Such charsets do not support encoding because |
|
765 |
* there is no way to determine which encoding should be used on output. |
|
766 |
* Implementations of such charsets should override this method to return |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
767 |
* {@code false}. </p> |
2 | 768 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
769 |
* @return {@code true} if, and only if, this charset supports encoding |
2 | 770 |
*/ |
771 |
public boolean canEncode() { |
|
772 |
return true; |
|
773 |
} |
|
774 |
||
775 |
/** |
|
776 |
* Convenience method that decodes bytes in this charset into Unicode |
|
777 |
* characters. |
|
778 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
779 |
* <p> An invocation of this method upon a charset {@code cs} returns the |
2 | 780 |
* same result as the expression |
781 |
* |
|
782 |
* <pre> |
|
783 |
* cs.newDecoder() |
|
784 |
* .onMalformedInput(CodingErrorAction.REPLACE) |
|
785 |
* .onUnmappableCharacter(CodingErrorAction.REPLACE) |
|
786 |
* .decode(bb); </pre> |
|
787 |
* |
|
788 |
* except that it is potentially more efficient because it can cache |
|
789 |
* decoders between successive invocations. |
|
790 |
* |
|
791 |
* <p> This method always replaces malformed-input and unmappable-character |
|
792 |
* sequences with this charset's default replacement byte array. In order |
|
793 |
* to detect such sequences, use the {@link |
|
794 |
* CharsetDecoder#decode(java.nio.ByteBuffer)} method directly. </p> |
|
795 |
* |
|
796 |
* @param bb The byte buffer to be decoded |
|
797 |
* |
|
798 |
* @return A char buffer containing the decoded characters |
|
799 |
*/ |
|
800 |
public final CharBuffer decode(ByteBuffer bb) { |
|
801 |
try { |
|
802 |
return ThreadLocalCoders.decoderFor(this) |
|
803 |
.onMalformedInput(CodingErrorAction.REPLACE) |
|
804 |
.onUnmappableCharacter(CodingErrorAction.REPLACE) |
|
805 |
.decode(bb); |
|
806 |
} catch (CharacterCodingException x) { |
|
807 |
throw new Error(x); // Can't happen |
|
808 |
} |
|
809 |
} |
|
810 |
||
811 |
/** |
|
812 |
* Convenience method that encodes Unicode characters into bytes in this |
|
813 |
* charset. |
|
814 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
815 |
* <p> An invocation of this method upon a charset {@code cs} returns the |
2 | 816 |
* same result as the expression |
817 |
* |
|
818 |
* <pre> |
|
819 |
* cs.newEncoder() |
|
820 |
* .onMalformedInput(CodingErrorAction.REPLACE) |
|
821 |
* .onUnmappableCharacter(CodingErrorAction.REPLACE) |
|
822 |
* .encode(bb); </pre> |
|
823 |
* |
|
824 |
* except that it is potentially more efficient because it can cache |
|
825 |
* encoders between successive invocations. |
|
826 |
* |
|
827 |
* <p> This method always replaces malformed-input and unmappable-character |
|
828 |
* sequences with this charset's default replacement string. In order to |
|
829 |
* detect such sequences, use the {@link |
|
830 |
* CharsetEncoder#encode(java.nio.CharBuffer)} method directly. </p> |
|
831 |
* |
|
832 |
* @param cb The char buffer to be encoded |
|
833 |
* |
|
834 |
* @return A byte buffer containing the encoded characters |
|
835 |
*/ |
|
836 |
public final ByteBuffer encode(CharBuffer cb) { |
|
837 |
try { |
|
838 |
return ThreadLocalCoders.encoderFor(this) |
|
839 |
.onMalformedInput(CodingErrorAction.REPLACE) |
|
840 |
.onUnmappableCharacter(CodingErrorAction.REPLACE) |
|
841 |
.encode(cb); |
|
842 |
} catch (CharacterCodingException x) { |
|
843 |
throw new Error(x); // Can't happen |
|
844 |
} |
|
845 |
} |
|
846 |
||
847 |
/** |
|
848 |
* Convenience method that encodes a string into bytes in this charset. |
|
849 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
850 |
* <p> An invocation of this method upon a charset {@code cs} returns the |
2 | 851 |
* same result as the expression |
852 |
* |
|
853 |
* <pre> |
|
854 |
* cs.encode(CharBuffer.wrap(s)); </pre> |
|
855 |
* |
|
856 |
* @param str The string to be encoded |
|
857 |
* |
|
858 |
* @return A byte buffer containing the encoded characters |
|
859 |
*/ |
|
860 |
public final ByteBuffer encode(String str) { |
|
861 |
return encode(CharBuffer.wrap(str)); |
|
862 |
} |
|
863 |
||
864 |
/** |
|
865 |
* Compares this charset to another. |
|
866 |
* |
|
867 |
* <p> Charsets are ordered by their canonical names, without regard to |
|
868 |
* case. </p> |
|
869 |
* |
|
870 |
* @param that |
|
871 |
* The charset to which this charset is to be compared |
|
872 |
* |
|
873 |
* @return A negative integer, zero, or a positive integer as this charset |
|
874 |
* is less than, equal to, or greater than the specified charset |
|
875 |
*/ |
|
876 |
public final int compareTo(Charset that) { |
|
877 |
return (name().compareToIgnoreCase(that.name())); |
|
878 |
} |
|
879 |
||
880 |
/** |
|
18164 | 881 |
* Computes a hashcode for this charset. |
2 | 882 |
* |
883 |
* @return An integer hashcode |
|
884 |
*/ |
|
885 |
public final int hashCode() { |
|
886 |
return name().hashCode(); |
|
887 |
} |
|
888 |
||
889 |
/** |
|
890 |
* Tells whether or not this object is equal to another. |
|
891 |
* |
|
892 |
* <p> Two charsets are equal if, and only if, they have the same canonical |
|
893 |
* names. A charset is never equal to any other type of object. </p> |
|
894 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
895 |
* @return {@code true} if, and only if, this charset is equal to the |
2 | 896 |
* given object |
897 |
*/ |
|
898 |
public final boolean equals(Object ob) { |
|
899 |
if (!(ob instanceof Charset)) |
|
900 |
return false; |
|
901 |
if (this == ob) |
|
902 |
return true; |
|
903 |
return name.equals(((Charset)ob).name()); |
|
904 |
} |
|
905 |
||
906 |
/** |
|
18164 | 907 |
* Returns a string describing this charset. |
2 | 908 |
* |
909 |
* @return A string describing this charset |
|
910 |
*/ |
|
911 |
public final String toString() { |
|
912 |
return name(); |
|
913 |
} |
|
914 |
||
915 |
} |