author | redestad |
Mon, 21 Dec 2015 20:54:00 +0100 | |
changeset 34774 | 03b4e6dc367b |
parent 33675 | 7d9d372a41df |
child 34882 | ce2a8ec851c1 |
permissions | -rw-r--r-- |
2 | 1 |
/* |
19034 | 2 |
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
package java.nio.charset; |
|
27 |
||
28 |
import java.nio.ByteBuffer; |
|
29 |
import java.nio.CharBuffer; |
|
30 |
import java.nio.charset.spi.CharsetProvider; |
|
31 |
import java.security.AccessController; |
|
32 |
import java.security.PrivilegedAction; |
|
30818
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
33 |
import java.util.Arrays; |
2 | 34 |
import java.util.Collections; |
35 |
import java.util.HashSet; |
|
36 |
import java.util.Iterator; |
|
37 |
import java.util.Locale; |
|
38 |
import java.util.Map; |
|
39 |
import java.util.NoSuchElementException; |
|
33675
7d9d372a41df
8141652: Rename methods Objects.nonNullElse* to requireNonNullElse*
rriggs
parents:
32143
diff
changeset
|
40 |
import java.util.Objects; |
2 | 41 |
import java.util.Set; |
42 |
import java.util.ServiceLoader; |
|
43 |
import java.util.ServiceConfigurationError; |
|
44 |
import java.util.SortedMap; |
|
45 |
import java.util.TreeMap; |
|
46 |
import sun.misc.ASCIICaseInsensitiveComparator; |
|
47 |
import sun.nio.cs.StandardCharsets; |
|
48 |
import sun.nio.cs.ThreadLocalCoders; |
|
49 |
import sun.security.action.GetPropertyAction; |
|
50 |
||
51 |
||
52 |
/** |
|
53 |
* A named mapping between sequences of sixteen-bit Unicode <a |
|
54 |
* href="../../lang/Character.html#unicode">code units</a> and sequences of |
|
55 |
* bytes. This class defines methods for creating decoders and encoders and |
|
56 |
* for retrieving the various names associated with a charset. Instances of |
|
57 |
* this class are immutable. |
|
58 |
* |
|
59 |
* <p> This class also defines static methods for testing whether a particular |
|
60 |
* charset is supported, for locating charset instances by name, and for |
|
61 |
* constructing a map that contains every charset for which support is |
|
62 |
* available in the current Java virtual machine. Support for new charsets can |
|
63 |
* be added via the service-provider interface defined in the {@link |
|
64 |
* java.nio.charset.spi.CharsetProvider} class. |
|
65 |
* |
|
66 |
* <p> All of the methods defined in this class are safe for use by multiple |
|
67 |
* concurrent threads. |
|
68 |
* |
|
69 |
* |
|
19034 | 70 |
* <a name="names"></a><a name="charenc"></a> |
18574
4aeaeb541678
8019380: doclint warnings in java.nio, java.nio.file.**, java.nio.channels.**
alanb
parents:
18164
diff
changeset
|
71 |
* <h2>Charset names</h2> |
2 | 72 |
* |
73 |
* <p> Charsets are named by strings composed of the following characters: |
|
74 |
* |
|
75 |
* <ul> |
|
76 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
77 |
* <li> The uppercase letters {@code 'A'} through {@code 'Z'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
78 |
* (<code>'\u0041'</code> through <code>'\u005a'</code>), |
2 | 79 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
80 |
* <li> The lowercase letters {@code 'a'} through {@code 'z'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
81 |
* (<code>'\u0061'</code> through <code>'\u007a'</code>), |
2 | 82 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
83 |
* <li> The digits {@code '0'} through {@code '9'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
84 |
* (<code>'\u0030'</code> through <code>'\u0039'</code>), |
2 | 85 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
86 |
* <li> The dash character {@code '-'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
87 |
* (<code>'\u002d'</code>, <small>HYPHEN-MINUS</small>), |
2 | 88 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
89 |
* <li> The plus character {@code '+'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
90 |
* (<code>'\u002b'</code>, <small>PLUS SIGN</small>), |
1146
bbcbf6618d8c
4849617: (cs)Revise Charset spec to allow '+' in names
sherman
parents:
895
diff
changeset
|
91 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
92 |
* <li> The period character {@code '.'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
93 |
* (<code>'\u002e'</code>, <small>FULL STOP</small>), |
2 | 94 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
95 |
* <li> The colon character {@code ':'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
96 |
* (<code>'\u003a'</code>, <small>COLON</small>), and |
2 | 97 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
98 |
* <li> The underscore character {@code '_'} |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
99 |
* (<code>'\u005f'</code>, <small>LOW LINE</small>). |
2 | 100 |
* |
101 |
* </ul> |
|
102 |
* |
|
103 |
* A charset name must begin with either a letter or a digit. The empty string |
|
104 |
* is not a legal charset name. Charset names are not case-sensitive; that is, |
|
105 |
* case is always ignored when comparing charset names. Charset names |
|
106 |
* generally follow the conventions documented in <a |
|
107 |
* href="http://www.ietf.org/rfc/rfc2278.txt"><i>RFC 2278: IANA Charset |
|
108 |
* Registration Procedures</i></a>. |
|
109 |
* |
|
110 |
* <p> Every charset has a <i>canonical name</i> and may also have one or more |
|
111 |
* <i>aliases</i>. The canonical name is returned by the {@link #name() name} method |
|
112 |
* of this class. Canonical names are, by convention, usually in upper case. |
|
113 |
* The aliases of a charset are returned by the {@link #aliases() aliases} |
|
114 |
* method. |
|
115 |
* |
|
19034 | 116 |
* <p><a name="hn">Some charsets have an <i>historical name</i> that is defined for |
117 |
* compatibility with previous versions of the Java platform.</a> A charset's |
|
2 | 118 |
* historical name is either its canonical name or one of its aliases. The |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
119 |
* historical name is returned by the {@code getEncoding()} methods of the |
2 | 120 |
* {@link java.io.InputStreamReader#getEncoding InputStreamReader} and {@link |
121 |
* java.io.OutputStreamWriter#getEncoding OutputStreamWriter} classes. |
|
122 |
* |
|
21801
b8a5ff5f0c2a
8028049: Tidy warnings cleanup for packages java.nio/java.io
yan
parents:
21278
diff
changeset
|
123 |
* <p><a name="iana"> </a>If a charset listed in the <a |
2 | 124 |
* href="http://www.iana.org/assignments/character-sets"><i>IANA Charset |
125 |
* Registry</i></a> is supported by an implementation of the Java platform then |
|
21801
b8a5ff5f0c2a
8028049: Tidy warnings cleanup for packages java.nio/java.io
yan
parents:
21278
diff
changeset
|
126 |
* its canonical name must be the name listed in the registry. Many charsets |
2 | 127 |
* are given more than one name in the registry, in which case the registry |
128 |
* identifies one of the names as <i>MIME-preferred</i>. If a charset has more |
|
129 |
* than one registry name then its canonical name must be the MIME-preferred |
|
130 |
* name and the other names in the registry must be valid aliases. If a |
|
131 |
* supported charset is not listed in the IANA registry then its canonical name |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
132 |
* must begin with one of the strings {@code "X-"} or {@code "x-"}. |
2 | 133 |
* |
134 |
* <p> The IANA charset registry does change over time, and so the canonical |
|
135 |
* name and the aliases of a particular charset may also change over time. To |
|
136 |
* ensure compatibility it is recommended that no alias ever be removed from a |
|
137 |
* charset, and that if the canonical name of a charset is changed then its |
|
138 |
* previous canonical name be made into an alias. |
|
139 |
* |
|
140 |
* |
|
18574
4aeaeb541678
8019380: doclint warnings in java.nio, java.nio.file.**, java.nio.channels.**
alanb
parents:
18164
diff
changeset
|
141 |
* <h2>Standard charsets</h2> |
2 | 142 |
* |
19034 | 143 |
* |
9526
a80328f995f1
4884238: Adds java.nio.charset.StandardCharset to provide static final constants for the standard charsets.
mduigou
parents:
7668
diff
changeset
|
144 |
* |
19034 | 145 |
* <p><a name="standard">Every implementation of the Java platform is required to support the |
146 |
* following standard charsets.</a> Consult the release documentation for your |
|
2 | 147 |
* implementation to see if any other charsets are supported. The behavior |
148 |
* of such optional charsets may differ between implementations. |
|
149 |
* |
|
150 |
* <blockquote><table width="80%" summary="Description of standard charsets"> |
|
19034 | 151 |
* <tr><th align="left">Charset</th><th align="left">Description</th></tr> |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
152 |
* <tr><td valign=top>{@code US-ASCII}</td> |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
153 |
* <td>Seven-bit ASCII, a.k.a. {@code ISO646-US}, |
2 | 154 |
* a.k.a. the Basic Latin block of the Unicode character set</td></tr> |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
155 |
* <tr><td valign=top><code>ISO-8859-1 </code></td> |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
156 |
* <td>ISO Latin Alphabet No. 1, a.k.a. {@code ISO-LATIN-1}</td></tr> |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
157 |
* <tr><td valign=top>{@code UTF-8}</td> |
2 | 158 |
* <td>Eight-bit UCS Transformation Format</td></tr> |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
159 |
* <tr><td valign=top>{@code UTF-16BE}</td> |
2 | 160 |
* <td>Sixteen-bit UCS Transformation Format, |
161 |
* big-endian byte order</td></tr> |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
162 |
* <tr><td valign=top>{@code UTF-16LE}</td> |
2 | 163 |
* <td>Sixteen-bit UCS Transformation Format, |
164 |
* little-endian byte order</td></tr> |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
165 |
* <tr><td valign=top>{@code UTF-16}</td> |
2 | 166 |
* <td>Sixteen-bit UCS Transformation Format, |
167 |
* byte order identified by an optional byte-order mark</td></tr> |
|
168 |
* </table></blockquote> |
|
169 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
170 |
* <p> The {@code UTF-8} charset is specified by <a |
2 | 171 |
* href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC 2279</i></a>; the |
172 |
* transformation format upon which it is based is specified in |
|
173 |
* Amendment 2 of ISO 10646-1 and is also described in the <a |
|
174 |
* href="http://www.unicode.org/unicode/standard/standard.html"><i>Unicode |
|
175 |
* Standard</i></a>. |
|
176 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
177 |
* <p> The {@code UTF-16} charsets are specified by <a |
2 | 178 |
* href="http://www.ietf.org/rfc/rfc2781.txt"><i>RFC 2781</i></a>; the |
179 |
* transformation formats upon which they are based are specified in |
|
180 |
* Amendment 1 of ISO 10646-1 and are also described in the <a |
|
181 |
* href="http://www.unicode.org/unicode/standard/standard.html"><i>Unicode |
|
182 |
* Standard</i></a>. |
|
183 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
184 |
* <p> The {@code UTF-16} charsets use sixteen-bit quantities and are |
2 | 185 |
* therefore sensitive to byte order. In these encodings the byte order of a |
186 |
* stream may be indicated by an initial <i>byte-order mark</i> represented by |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
187 |
* the Unicode character <code>'\uFEFF'</code>. Byte-order marks are handled |
2 | 188 |
* as follows: |
189 |
* |
|
190 |
* <ul> |
|
191 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
192 |
* <li><p> When decoding, the {@code UTF-16BE} and {@code UTF-16LE} |
784
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
193 |
* charsets interpret the initial byte-order marks as a <small>ZERO-WIDTH |
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
194 |
* NON-BREAKING SPACE</small>; when encoding, they do not write |
2 | 195 |
* byte-order marks. </p></li> |
784
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
196 |
|
2 | 197 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
198 |
* <li><p> When decoding, the {@code UTF-16} charset interprets the |
784
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
199 |
* byte-order mark at the beginning of the input stream to indicate the |
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
200 |
* byte-order of the stream but defaults to big-endian if there is no |
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
201 |
* byte-order mark; when encoding, it uses big-endian byte order and writes |
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
202 |
* a big-endian byte-order mark. </p></li> |
2 | 203 |
* |
204 |
* </ul> |
|
205 |
* |
|
21278 | 206 |
* In any case, byte order marks occurring after the first element of an |
784
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
207 |
* input sequence are not omitted since the same code is used to represent |
b42ef9406aae
4752069: (cs spec) BOM should not be ignored in UTF-16 charsets
sherman
parents:
40
diff
changeset
|
208 |
* <small>ZERO-WIDTH NON-BREAKING SPACE</small>. |
2 | 209 |
* |
210 |
* <p> Every instance of the Java virtual machine has a default charset, which |
|
211 |
* may or may not be one of the standard charsets. The default charset is |
|
212 |
* determined during virtual-machine startup and typically depends upon the |
|
213 |
* locale and charset being used by the underlying operating system. </p> |
|
214 |
* |
|
9676
5663e62f8d7e
7041612: Rename StandardCharset to StandardCharsets
mduigou
parents:
9526
diff
changeset
|
215 |
* <p>The {@link StandardCharsets} class defines constants for each of the |
9526
a80328f995f1
4884238: Adds java.nio.charset.StandardCharset to provide static final constants for the standard charsets.
mduigou
parents:
7668
diff
changeset
|
216 |
* standard charsets. |
2 | 217 |
* |
18574
4aeaeb541678
8019380: doclint warnings in java.nio, java.nio.file.**, java.nio.channels.**
alanb
parents:
18164
diff
changeset
|
218 |
* <h2>Terminology</h2> |
2 | 219 |
* |
40 | 220 |
* <p> The name of this class is taken from the terms used in |
221 |
* <a href="http://www.ietf.org/rfc/rfc2278.txt"><i>RFC 2278</i></a>. |
|
222 |
* In that document a <i>charset</i> is defined as the combination of |
|
223 |
* one or more coded character sets and a character-encoding scheme. |
|
224 |
* (This definition is confusing; some other software systems define |
|
225 |
* <i>charset</i> as a synonym for <i>coded character set</i>.) |
|
2 | 226 |
* |
227 |
* <p> A <i>coded character set</i> is a mapping between a set of abstract |
|
228 |
* characters and a set of integers. US-ASCII, ISO 8859-1, |
|
40 | 229 |
* JIS X 0201, and Unicode are examples of coded character sets. |
230 |
* |
|
231 |
* <p> Some standards have defined a <i>character set</i> to be simply a |
|
232 |
* set of abstract characters without an associated assigned numbering. |
|
233 |
* An alphabet is an example of such a character set. However, the subtle |
|
234 |
* distinction between <i>character set</i> and <i>coded character set</i> |
|
235 |
* is rarely used in practice; the former has become a short form for the |
|
236 |
* latter, including in the Java API specification. |
|
2 | 237 |
* |
40 | 238 |
* <p> A <i>character-encoding scheme</i> is a mapping between one or more |
239 |
* coded character sets and a set of octet (eight-bit byte) sequences. |
|
240 |
* UTF-8, UTF-16, ISO 2022, and EUC are examples of |
|
241 |
* character-encoding schemes. Encoding schemes are often associated with |
|
242 |
* a particular coded character set; UTF-8, for example, is used only to |
|
243 |
* encode Unicode. Some schemes, however, are associated with multiple |
|
244 |
* coded character sets; EUC, for example, can be used to encode |
|
245 |
* characters in a variety of Asian coded character sets. |
|
2 | 246 |
* |
247 |
* <p> When a coded character set is used exclusively with a single |
|
40 | 248 |
* character-encoding scheme then the corresponding charset is usually |
249 |
* named for the coded character set; otherwise a charset is usually named |
|
250 |
* for the encoding scheme and, possibly, the locale of the coded |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
251 |
* character sets that it supports. Hence {@code US-ASCII} is both the |
40 | 252 |
* name of a coded character set and of the charset that encodes it, while |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
253 |
* {@code EUC-JP} is the name of the charset that encodes the |
2 | 254 |
* JIS X 0201, JIS X 0208, and JIS X 0212 |
40 | 255 |
* coded character sets for the Japanese language. |
2 | 256 |
* |
257 |
* <p> The native character encoding of the Java programming language is |
|
40 | 258 |
* UTF-16. A charset in the Java platform therefore defines a mapping |
259 |
* between sequences of sixteen-bit UTF-16 code units (that is, sequences |
|
260 |
* of chars) and sequences of bytes. </p> |
|
2 | 261 |
* |
262 |
* |
|
263 |
* @author Mark Reinhold |
|
264 |
* @author JSR-51 Expert Group |
|
265 |
* @since 1.4 |
|
266 |
* |
|
267 |
* @see CharsetDecoder |
|
268 |
* @see CharsetEncoder |
|
269 |
* @see java.nio.charset.spi.CharsetProvider |
|
270 |
* @see java.lang.Character |
|
271 |
*/ |
|
272 |
||
273 |
public abstract class Charset |
|
274 |
implements Comparable<Charset> |
|
275 |
{ |
|
276 |
||
277 |
/* -- Static methods -- */ |
|
278 |
||
34774
03b4e6dc367b
8145680: Remove unnecessary explicit initialization of volatile variables in java.base
redestad
parents:
33675
diff
changeset
|
279 |
private static volatile String bugLevel; |
2 | 280 |
|
281 |
static boolean atBugLevel(String bl) { // package-private |
|
7541
f7a5cde8d213
6527572: (cs) Charset.forName can throw NullPointerException when testing bug level
sherman
parents:
5506
diff
changeset
|
282 |
String level = bugLevel; |
f7a5cde8d213
6527572: (cs) Charset.forName can throw NullPointerException when testing bug level
sherman
parents:
5506
diff
changeset
|
283 |
if (level == null) { |
2 | 284 |
if (!sun.misc.VM.isBooted()) |
285 |
return false; |
|
7541
f7a5cde8d213
6527572: (cs) Charset.forName can throw NullPointerException when testing bug level
sherman
parents:
5506
diff
changeset
|
286 |
bugLevel = level = AccessController.doPrivileged( |
f7a5cde8d213
6527572: (cs) Charset.forName can throw NullPointerException when testing bug level
sherman
parents:
5506
diff
changeset
|
287 |
new GetPropertyAction("sun.nio.cs.bugLevel", "")); |
2 | 288 |
} |
7541
f7a5cde8d213
6527572: (cs) Charset.forName can throw NullPointerException when testing bug level
sherman
parents:
5506
diff
changeset
|
289 |
return level.equals(bl); |
2 | 290 |
} |
291 |
||
292 |
/** |
|
293 |
* Checks that the given string is a legal charset name. </p> |
|
294 |
* |
|
295 |
* @param s |
|
296 |
* A purported charset name |
|
297 |
* |
|
298 |
* @throws IllegalCharsetNameException |
|
299 |
* If the given name is not a legal charset name |
|
300 |
*/ |
|
301 |
private static void checkName(String s) { |
|
302 |
int n = s.length(); |
|
303 |
if (!atBugLevel("1.4")) { |
|
304 |
if (n == 0) |
|
305 |
throw new IllegalCharsetNameException(s); |
|
306 |
} |
|
307 |
for (int i = 0; i < n; i++) { |
|
308 |
char c = s.charAt(i); |
|
309 |
if (c >= 'A' && c <= 'Z') continue; |
|
310 |
if (c >= 'a' && c <= 'z') continue; |
|
311 |
if (c >= '0' && c <= '9') continue; |
|
312 |
if (c == '-' && i != 0) continue; |
|
1146
bbcbf6618d8c
4849617: (cs)Revise Charset spec to allow '+' in names
sherman
parents:
895
diff
changeset
|
313 |
if (c == '+' && i != 0) continue; |
2 | 314 |
if (c == ':' && i != 0) continue; |
315 |
if (c == '_' && i != 0) continue; |
|
316 |
if (c == '.' && i != 0) continue; |
|
317 |
throw new IllegalCharsetNameException(s); |
|
318 |
} |
|
319 |
} |
|
320 |
||
321 |
/* The standard set of charsets */ |
|
322 |
private static CharsetProvider standardProvider = new StandardCharsets(); |
|
323 |
||
324 |
// Cache of the most-recently-returned charsets, |
|
325 |
// along with the names that were used to find them |
|
326 |
// |
|
34774
03b4e6dc367b
8145680: Remove unnecessary explicit initialization of volatile variables in java.base
redestad
parents:
33675
diff
changeset
|
327 |
private static volatile Object[] cache1; // "Level 1" cache |
03b4e6dc367b
8145680: Remove unnecessary explicit initialization of volatile variables in java.base
redestad
parents:
33675
diff
changeset
|
328 |
private static volatile Object[] cache2; // "Level 2" cache |
2 | 329 |
|
330 |
private static void cache(String charsetName, Charset cs) { |
|
331 |
cache2 = cache1; |
|
332 |
cache1 = new Object[] { charsetName, cs }; |
|
333 |
} |
|
334 |
||
335 |
// Creates an iterator that walks over the available providers, ignoring |
|
336 |
// those whose lookup or instantiation causes a security exception to be |
|
337 |
// thrown. Should be invoked with full privileges. |
|
338 |
// |
|
10137
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
339 |
private static Iterator<CharsetProvider> providers() { |
29986
97167d851fc4
8078467: Update core libraries to use diamond with anonymous classes
darcy
parents:
25859
diff
changeset
|
340 |
return new Iterator<>() { |
2 | 341 |
ClassLoader cl = ClassLoader.getSystemClassLoader(); |
342 |
ServiceLoader<CharsetProvider> sl = |
|
343 |
ServiceLoader.load(CharsetProvider.class, cl); |
|
344 |
Iterator<CharsetProvider> i = sl.iterator(); |
|
10137
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
345 |
CharsetProvider next = null; |
2 | 346 |
|
347 |
private boolean getNext() { |
|
348 |
while (next == null) { |
|
349 |
try { |
|
350 |
if (!i.hasNext()) |
|
351 |
return false; |
|
352 |
next = i.next(); |
|
353 |
} catch (ServiceConfigurationError sce) { |
|
354 |
if (sce.getCause() instanceof SecurityException) { |
|
355 |
// Ignore security exceptions |
|
356 |
continue; |
|
357 |
} |
|
358 |
throw sce; |
|
359 |
} |
|
360 |
} |
|
361 |
return true; |
|
362 |
} |
|
363 |
||
364 |
public boolean hasNext() { |
|
365 |
return getNext(); |
|
366 |
} |
|
367 |
||
10137
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
368 |
public CharsetProvider next() { |
2 | 369 |
if (!getNext()) |
370 |
throw new NoSuchElementException(); |
|
10137
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
371 |
CharsetProvider n = next; |
2 | 372 |
next = null; |
373 |
return n; |
|
374 |
} |
|
375 |
||
376 |
public void remove() { |
|
377 |
throw new UnsupportedOperationException(); |
|
378 |
} |
|
379 |
||
380 |
}; |
|
381 |
} |
|
382 |
||
383 |
// Thread-local gate to prevent recursive provider lookups |
|
10137
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
384 |
private static ThreadLocal<ThreadLocal<?>> gate = |
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
385 |
new ThreadLocal<ThreadLocal<?>>(); |
2 | 386 |
|
387 |
private static Charset lookupViaProviders(final String charsetName) { |
|
388 |
||
389 |
// The runtime startup sequence looks up standard charsets as a |
|
390 |
// consequence of the VM's invocation of System.initializeSystemClass |
|
391 |
// in order to, e.g., set system properties and encode filenames. At |
|
392 |
// that point the application class loader has not been initialized, |
|
393 |
// however, so we can't look for providers because doing so will cause |
|
394 |
// that loader to be prematurely initialized with incomplete |
|
395 |
// information. |
|
396 |
// |
|
397 |
if (!sun.misc.VM.isBooted()) |
|
398 |
return null; |
|
399 |
||
400 |
if (gate.get() != null) |
|
401 |
// Avoid recursive provider lookups |
|
402 |
return null; |
|
403 |
try { |
|
404 |
gate.set(gate); |
|
405 |
||
406 |
return AccessController.doPrivileged( |
|
29986
97167d851fc4
8078467: Update core libraries to use diamond with anonymous classes
darcy
parents:
25859
diff
changeset
|
407 |
new PrivilegedAction<>() { |
2 | 408 |
public Charset run() { |
10137
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
409 |
for (Iterator<CharsetProvider> i = providers(); |
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
410 |
i.hasNext();) { |
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
411 |
CharsetProvider cp = i.next(); |
2 | 412 |
Charset cs = cp.charsetForName(charsetName); |
413 |
if (cs != null) |
|
414 |
return cs; |
|
415 |
} |
|
416 |
return null; |
|
417 |
} |
|
418 |
}); |
|
419 |
||
420 |
} finally { |
|
421 |
gate.set(null); |
|
422 |
} |
|
423 |
} |
|
424 |
||
425 |
/* The extended set of charsets */ |
|
17470
b65cf2b5983b
8012326: Deadlock occurs when Charset.availableCharsets() is called by several threads at the same time
sherman
parents:
10882
diff
changeset
|
426 |
private static class ExtendedProviderHolder { |
30818
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
427 |
static final CharsetProvider[] extendedProviders = extendedProviders(); |
17470
b65cf2b5983b
8012326: Deadlock occurs when Charset.availableCharsets() is called by several threads at the same time
sherman
parents:
10882
diff
changeset
|
428 |
// returns ExtendedProvider, if installed |
30818
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
429 |
private static CharsetProvider[] extendedProviders() { |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
430 |
return AccessController.doPrivileged(new PrivilegedAction<>() { |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
431 |
public CharsetProvider[] run() { |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
432 |
CharsetProvider[] cps = new CharsetProvider[1]; |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
433 |
int n = 0; |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
434 |
ServiceLoader<CharsetProvider> sl = |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
435 |
ServiceLoader.loadInstalled(CharsetProvider.class); |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
436 |
for (CharsetProvider cp : sl) { |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
437 |
if (n + 1 > cps.length) { |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
438 |
cps = Arrays.copyOf(cps, cps.length << 1); |
17470
b65cf2b5983b
8012326: Deadlock occurs when Charset.availableCharsets() is called by several threads at the same time
sherman
parents:
10882
diff
changeset
|
439 |
} |
30818
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
440 |
cps[n++] = cp; |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
441 |
} |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
442 |
return n == cps.length ? cps : Arrays.copyOf(cps, n); |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
443 |
}}); |
17470
b65cf2b5983b
8012326: Deadlock occurs when Charset.availableCharsets() is called by several threads at the same time
sherman
parents:
10882
diff
changeset
|
444 |
} |
2 | 445 |
} |
446 |
||
447 |
private static Charset lookupExtendedCharset(String charsetName) { |
|
30818
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
448 |
if (!sun.misc.VM.isBooted()) // see lookupViaProviders() |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
449 |
return null; |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
450 |
CharsetProvider[] ecps = ExtendedProviderHolder.extendedProviders; |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
451 |
for (CharsetProvider cp : ecps) { |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
452 |
Charset cs = cp.charsetForName(charsetName); |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
453 |
if (cs != null) |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
454 |
return cs; |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
455 |
} |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
456 |
return null; |
2 | 457 |
} |
458 |
||
459 |
private static Charset lookup(String charsetName) { |
|
460 |
if (charsetName == null) |
|
461 |
throw new IllegalArgumentException("Null charset name"); |
|
462 |
Object[] a; |
|
463 |
if ((a = cache1) != null && charsetName.equals(a[0])) |
|
464 |
return (Charset)a[1]; |
|
465 |
// We expect most programs to use one Charset repeatedly. |
|
466 |
// We convey a hint to this effect to the VM by putting the |
|
467 |
// level 1 cache miss code in a separate method. |
|
468 |
return lookup2(charsetName); |
|
469 |
} |
|
470 |
||
471 |
private static Charset lookup2(String charsetName) { |
|
472 |
Object[] a; |
|
473 |
if ((a = cache2) != null && charsetName.equals(a[0])) { |
|
474 |
cache2 = cache1; |
|
475 |
cache1 = a; |
|
476 |
return (Charset)a[1]; |
|
477 |
} |
|
478 |
Charset cs; |
|
479 |
if ((cs = standardProvider.charsetForName(charsetName)) != null || |
|
480 |
(cs = lookupExtendedCharset(charsetName)) != null || |
|
481 |
(cs = lookupViaProviders(charsetName)) != null) |
|
482 |
{ |
|
483 |
cache(charsetName, cs); |
|
484 |
return cs; |
|
485 |
} |
|
486 |
||
487 |
/* Only need to check the name if we didn't find a charset for it */ |
|
488 |
checkName(charsetName); |
|
489 |
return null; |
|
490 |
} |
|
491 |
||
492 |
/** |
|
18164 | 493 |
* Tells whether the named charset is supported. |
2 | 494 |
* |
495 |
* @param charsetName |
|
496 |
* The name of the requested charset; may be either |
|
497 |
* a canonical name or an alias |
|
498 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
499 |
* @return {@code true} if, and only if, support for the named charset |
2 | 500 |
* is available in the current Java virtual machine |
501 |
* |
|
502 |
* @throws IllegalCharsetNameException |
|
503 |
* If the given charset name is illegal |
|
504 |
* |
|
505 |
* @throws IllegalArgumentException |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
506 |
* If the given {@code charsetName} is null |
2 | 507 |
*/ |
508 |
public static boolean isSupported(String charsetName) { |
|
509 |
return (lookup(charsetName) != null); |
|
510 |
} |
|
511 |
||
512 |
/** |
|
18164 | 513 |
* Returns a charset object for the named charset. |
2 | 514 |
* |
515 |
* @param charsetName |
|
516 |
* The name of the requested charset; may be either |
|
517 |
* a canonical name or an alias |
|
518 |
* |
|
519 |
* @return A charset object for the named charset |
|
520 |
* |
|
521 |
* @throws IllegalCharsetNameException |
|
522 |
* If the given charset name is illegal |
|
523 |
* |
|
524 |
* @throws IllegalArgumentException |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
525 |
* If the given {@code charsetName} is null |
2 | 526 |
* |
527 |
* @throws UnsupportedCharsetException |
|
528 |
* If no support for the named charset is available |
|
529 |
* in this instance of the Java virtual machine |
|
530 |
*/ |
|
531 |
public static Charset forName(String charsetName) { |
|
532 |
Charset cs = lookup(charsetName); |
|
533 |
if (cs != null) |
|
534 |
return cs; |
|
535 |
throw new UnsupportedCharsetException(charsetName); |
|
536 |
} |
|
537 |
||
538 |
// Fold charsets from the given iterator into the given map, ignoring |
|
539 |
// charsets whose names already have entries in the map. |
|
540 |
// |
|
895 | 541 |
private static void put(Iterator<Charset> i, Map<String,Charset> m) { |
2 | 542 |
while (i.hasNext()) { |
895 | 543 |
Charset cs = i.next(); |
2 | 544 |
if (!m.containsKey(cs.name())) |
545 |
m.put(cs.name(), cs); |
|
546 |
} |
|
547 |
} |
|
548 |
||
549 |
/** |
|
550 |
* Constructs a sorted map from canonical charset names to charset objects. |
|
551 |
* |
|
552 |
* <p> The map returned by this method will have one entry for each charset |
|
553 |
* for which support is available in the current Java virtual machine. If |
|
554 |
* two or more supported charsets have the same canonical name then the |
|
555 |
* resulting map will contain just one of them; which one it will contain |
|
556 |
* is not specified. </p> |
|
557 |
* |
|
558 |
* <p> The invocation of this method, and the subsequent use of the |
|
559 |
* resulting map, may cause time-consuming disk or network I/O operations |
|
560 |
* to occur. This method is provided for applications that need to |
|
561 |
* enumerate all of the available charsets, for example to allow user |
|
562 |
* charset selection. This method is not used by the {@link #forName |
|
563 |
* forName} method, which instead employs an efficient incremental lookup |
|
564 |
* algorithm. |
|
565 |
* |
|
566 |
* <p> This method may return different results at different times if new |
|
567 |
* charset providers are dynamically made available to the current Java |
|
568 |
* virtual machine. In the absence of such changes, the charsets returned |
|
569 |
* by this method are exactly those that can be retrieved via the {@link |
|
570 |
* #forName forName} method. </p> |
|
571 |
* |
|
572 |
* @return An immutable, case-insensitive map from canonical charset names |
|
573 |
* to charset objects |
|
574 |
*/ |
|
575 |
public static SortedMap<String,Charset> availableCharsets() { |
|
576 |
return AccessController.doPrivileged( |
|
29986
97167d851fc4
8078467: Update core libraries to use diamond with anonymous classes
darcy
parents:
25859
diff
changeset
|
577 |
new PrivilegedAction<>() { |
2 | 578 |
public SortedMap<String,Charset> run() { |
579 |
TreeMap<String,Charset> m = |
|
29986
97167d851fc4
8078467: Update core libraries to use diamond with anonymous classes
darcy
parents:
25859
diff
changeset
|
580 |
new TreeMap<>( |
2 | 581 |
ASCIICaseInsensitiveComparator.CASE_INSENSITIVE_ORDER); |
582 |
put(standardProvider.charsets(), m); |
|
30818
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
583 |
CharsetProvider[] ecps = ExtendedProviderHolder.extendedProviders; |
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
584 |
for (CharsetProvider ecp :ecps) { |
17470
b65cf2b5983b
8012326: Deadlock occurs when Charset.availableCharsets() is called by several threads at the same time
sherman
parents:
10882
diff
changeset
|
585 |
put(ecp.charsets(), m); |
30818
56133cf1bf00
8038310: Re-examine integration of extended Charsets
sherman
parents:
29986
diff
changeset
|
586 |
} |
10137
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
587 |
for (Iterator<CharsetProvider> i = providers(); i.hasNext();) { |
d92637d3d673
7068616: NIO libraries do not build with javac -Xlint:all,-deprecation -Werror
jjg
parents:
9676
diff
changeset
|
588 |
CharsetProvider cp = i.next(); |
2 | 589 |
put(cp.charsets(), m); |
590 |
} |
|
591 |
return Collections.unmodifiableSortedMap(m); |
|
592 |
} |
|
593 |
}); |
|
594 |
} |
|
595 |
||
596 |
private static volatile Charset defaultCharset; |
|
597 |
||
598 |
/** |
|
599 |
* Returns the default charset of this Java virtual machine. |
|
600 |
* |
|
601 |
* <p> The default charset is determined during virtual-machine startup and |
|
602 |
* typically depends upon the locale and charset of the underlying |
|
603 |
* operating system. |
|
604 |
* |
|
605 |
* @return A charset object for the default charset |
|
606 |
* |
|
607 |
* @since 1.5 |
|
608 |
*/ |
|
609 |
public static Charset defaultCharset() { |
|
610 |
if (defaultCharset == null) { |
|
611 |
synchronized (Charset.class) { |
|
612 |
String csn = AccessController.doPrivileged( |
|
613 |
new GetPropertyAction("file.encoding")); |
|
614 |
Charset cs = lookup(csn); |
|
615 |
if (cs != null) |
|
616 |
defaultCharset = cs; |
|
617 |
else |
|
618 |
defaultCharset = forName("UTF-8"); |
|
619 |
} |
|
620 |
} |
|
621 |
return defaultCharset; |
|
622 |
} |
|
623 |
||
624 |
||
625 |
/* -- Instance fields and methods -- */ |
|
626 |
||
627 |
private final String name; // tickles a bug in oldjavac |
|
628 |
private final String[] aliases; // tickles a bug in oldjavac |
|
33675
7d9d372a41df
8141652: Rename methods Objects.nonNullElse* to requireNonNullElse*
rriggs
parents:
32143
diff
changeset
|
629 |
private final String[] zeroAliases = new String[0]; |
895 | 630 |
private Set<String> aliasSet = null; |
2 | 631 |
|
632 |
/** |
|
633 |
* Initializes a new charset with the given canonical name and alias |
|
18164 | 634 |
* set. |
2 | 635 |
* |
636 |
* @param canonicalName |
|
637 |
* The canonical name of this charset |
|
638 |
* |
|
639 |
* @param aliases |
|
640 |
* An array of this charset's aliases, or null if it has no aliases |
|
641 |
* |
|
642 |
* @throws IllegalCharsetNameException |
|
643 |
* If the canonical name or any of the aliases are illegal |
|
644 |
*/ |
|
645 |
protected Charset(String canonicalName, String[] aliases) { |
|
646 |
checkName(canonicalName); |
|
33675
7d9d372a41df
8141652: Rename methods Objects.nonNullElse* to requireNonNullElse*
rriggs
parents:
32143
diff
changeset
|
647 |
String[] as = Objects.requireNonNullElse(aliases, zeroAliases); |
2 | 648 |
for (int i = 0; i < as.length; i++) |
649 |
checkName(as[i]); |
|
650 |
this.name = canonicalName; |
|
651 |
this.aliases = as; |
|
652 |
} |
|
653 |
||
654 |
/** |
|
18164 | 655 |
* Returns this charset's canonical name. |
2 | 656 |
* |
657 |
* @return The canonical name of this charset |
|
658 |
*/ |
|
659 |
public final String name() { |
|
660 |
return name; |
|
661 |
} |
|
662 |
||
663 |
/** |
|
18164 | 664 |
* Returns a set containing this charset's aliases. |
2 | 665 |
* |
666 |
* @return An immutable set of this charset's aliases |
|
667 |
*/ |
|
668 |
public final Set<String> aliases() { |
|
669 |
if (aliasSet != null) |
|
670 |
return aliasSet; |
|
671 |
int n = aliases.length; |
|
29986
97167d851fc4
8078467: Update core libraries to use diamond with anonymous classes
darcy
parents:
25859
diff
changeset
|
672 |
HashSet<String> hs = new HashSet<>(n); |
2 | 673 |
for (int i = 0; i < n; i++) |
674 |
hs.add(aliases[i]); |
|
675 |
aliasSet = Collections.unmodifiableSet(hs); |
|
676 |
return aliasSet; |
|
677 |
} |
|
678 |
||
679 |
/** |
|
680 |
* Returns this charset's human-readable name for the default locale. |
|
681 |
* |
|
682 |
* <p> The default implementation of this method simply returns this |
|
683 |
* charset's canonical name. Concrete subclasses of this class may |
|
684 |
* override this method in order to provide a localized display name. </p> |
|
685 |
* |
|
686 |
* @return The display name of this charset in the default locale |
|
687 |
*/ |
|
688 |
public String displayName() { |
|
689 |
return name; |
|
690 |
} |
|
691 |
||
692 |
/** |
|
693 |
* Tells whether or not this charset is registered in the <a |
|
694 |
* href="http://www.iana.org/assignments/character-sets">IANA Charset |
|
18164 | 695 |
* Registry</a>. |
2 | 696 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
697 |
* @return {@code true} if, and only if, this charset is known by its |
2 | 698 |
* implementor to be registered with the IANA |
699 |
*/ |
|
700 |
public final boolean isRegistered() { |
|
701 |
return !name.startsWith("X-") && !name.startsWith("x-"); |
|
702 |
} |
|
703 |
||
704 |
/** |
|
705 |
* Returns this charset's human-readable name for the given locale. |
|
706 |
* |
|
707 |
* <p> The default implementation of this method simply returns this |
|
708 |
* charset's canonical name. Concrete subclasses of this class may |
|
709 |
* override this method in order to provide a localized display name. </p> |
|
710 |
* |
|
711 |
* @param locale |
|
712 |
* The locale for which the display name is to be retrieved |
|
713 |
* |
|
714 |
* @return The display name of this charset in the given locale |
|
715 |
*/ |
|
716 |
public String displayName(Locale locale) { |
|
717 |
return name; |
|
718 |
} |
|
719 |
||
720 |
/** |
|
721 |
* Tells whether or not this charset contains the given charset. |
|
722 |
* |
|
723 |
* <p> A charset <i>C</i> is said to <i>contain</i> a charset <i>D</i> if, |
|
724 |
* and only if, every character representable in <i>D</i> is also |
|
725 |
* representable in <i>C</i>. If this relationship holds then it is |
|
726 |
* guaranteed that every string that can be encoded in <i>D</i> can also be |
|
727 |
* encoded in <i>C</i> without performing any replacements. |
|
728 |
* |
|
729 |
* <p> That <i>C</i> contains <i>D</i> does not imply that each character |
|
730 |
* representable in <i>C</i> by a particular byte sequence is represented |
|
731 |
* in <i>D</i> by the same byte sequence, although sometimes this is the |
|
732 |
* case. |
|
733 |
* |
|
734 |
* <p> Every charset contains itself. |
|
735 |
* |
|
736 |
* <p> This method computes an approximation of the containment relation: |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
737 |
* If it returns {@code true} then the given charset is known to be |
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
738 |
* contained by this charset; if it returns {@code false}, however, then |
2 | 739 |
* it is not necessarily the case that the given charset is not contained |
740 |
* in this charset. |
|
741 |
* |
|
18574
4aeaeb541678
8019380: doclint warnings in java.nio, java.nio.file.**, java.nio.channels.**
alanb
parents:
18164
diff
changeset
|
742 |
* @param cs |
4aeaeb541678
8019380: doclint warnings in java.nio, java.nio.file.**, java.nio.channels.**
alanb
parents:
18164
diff
changeset
|
743 |
* The given charset |
4aeaeb541678
8019380: doclint warnings in java.nio, java.nio.file.**, java.nio.channels.**
alanb
parents:
18164
diff
changeset
|
744 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
745 |
* @return {@code true} if the given charset is contained in this charset |
2 | 746 |
*/ |
747 |
public abstract boolean contains(Charset cs); |
|
748 |
||
749 |
/** |
|
18164 | 750 |
* Constructs a new decoder for this charset. |
2 | 751 |
* |
752 |
* @return A new decoder for this charset |
|
753 |
*/ |
|
754 |
public abstract CharsetDecoder newDecoder(); |
|
755 |
||
756 |
/** |
|
18164 | 757 |
* Constructs a new encoder for this charset. |
2 | 758 |
* |
759 |
* @return A new encoder for this charset |
|
760 |
* |
|
761 |
* @throws UnsupportedOperationException |
|
762 |
* If this charset does not support encoding |
|
763 |
*/ |
|
764 |
public abstract CharsetEncoder newEncoder(); |
|
765 |
||
766 |
/** |
|
767 |
* Tells whether or not this charset supports encoding. |
|
768 |
* |
|
769 |
* <p> Nearly all charsets support encoding. The primary exceptions are |
|
770 |
* special-purpose <i>auto-detect</i> charsets whose decoders can determine |
|
771 |
* which of several possible encoding schemes is in use by examining the |
|
772 |
* input byte sequence. Such charsets do not support encoding because |
|
773 |
* there is no way to determine which encoding should be used on output. |
|
774 |
* Implementations of such charsets should override this method to return |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
775 |
* {@code false}. </p> |
2 | 776 |
* |
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
777 |
* @return {@code true} if, and only if, this charset supports encoding |
2 | 778 |
*/ |
779 |
public boolean canEncode() { |
|
780 |
return true; |
|
781 |
} |
|
782 |
||
783 |
/** |
|
784 |
* Convenience method that decodes bytes in this charset into Unicode |
|
785 |
* characters. |
|
786 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
787 |
* <p> An invocation of this method upon a charset {@code cs} returns the |
2 | 788 |
* same result as the expression |
789 |
* |
|
790 |
* <pre> |
|
791 |
* cs.newDecoder() |
|
792 |
* .onMalformedInput(CodingErrorAction.REPLACE) |
|
793 |
* .onUnmappableCharacter(CodingErrorAction.REPLACE) |
|
794 |
* .decode(bb); </pre> |
|
795 |
* |
|
796 |
* except that it is potentially more efficient because it can cache |
|
797 |
* decoders between successive invocations. |
|
798 |
* |
|
799 |
* <p> This method always replaces malformed-input and unmappable-character |
|
800 |
* sequences with this charset's default replacement byte array. In order |
|
801 |
* to detect such sequences, use the {@link |
|
802 |
* CharsetDecoder#decode(java.nio.ByteBuffer)} method directly. </p> |
|
803 |
* |
|
804 |
* @param bb The byte buffer to be decoded |
|
805 |
* |
|
806 |
* @return A char buffer containing the decoded characters |
|
807 |
*/ |
|
808 |
public final CharBuffer decode(ByteBuffer bb) { |
|
809 |
try { |
|
810 |
return ThreadLocalCoders.decoderFor(this) |
|
811 |
.onMalformedInput(CodingErrorAction.REPLACE) |
|
812 |
.onUnmappableCharacter(CodingErrorAction.REPLACE) |
|
813 |
.decode(bb); |
|
814 |
} catch (CharacterCodingException x) { |
|
815 |
throw new Error(x); // Can't happen |
|
816 |
} |
|
817 |
} |
|
818 |
||
819 |
/** |
|
820 |
* Convenience method that encodes Unicode characters into bytes in this |
|
821 |
* charset. |
|
822 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
823 |
* <p> An invocation of this method upon a charset {@code cs} returns the |
2 | 824 |
* same result as the expression |
825 |
* |
|
826 |
* <pre> |
|
827 |
* cs.newEncoder() |
|
828 |
* .onMalformedInput(CodingErrorAction.REPLACE) |
|
829 |
* .onUnmappableCharacter(CodingErrorAction.REPLACE) |
|
830 |
* .encode(bb); </pre> |
|
831 |
* |
|
832 |
* except that it is potentially more efficient because it can cache |
|
833 |
* encoders between successive invocations. |
|
834 |
* |
|
835 |
* <p> This method always replaces malformed-input and unmappable-character |
|
836 |
* sequences with this charset's default replacement string. In order to |
|
837 |
* detect such sequences, use the {@link |
|
838 |
* CharsetEncoder#encode(java.nio.CharBuffer)} method directly. </p> |
|
839 |
* |
|
840 |
* @param cb The char buffer to be encoded |
|
841 |
* |
|
842 |
* @return A byte buffer containing the encoded characters |
|
843 |
*/ |
|
844 |
public final ByteBuffer encode(CharBuffer cb) { |
|
845 |
try { |
|
846 |
return ThreadLocalCoders.encoderFor(this) |
|
847 |
.onMalformedInput(CodingErrorAction.REPLACE) |
|
848 |
.onUnmappableCharacter(CodingErrorAction.REPLACE) |
|
849 |
.encode(cb); |
|
850 |
} catch (CharacterCodingException x) { |
|
851 |
throw new Error(x); // Can't happen |
|
852 |
} |
|
853 |
} |
|
854 |
||
855 |
/** |
|
856 |
* Convenience method that encodes a string into bytes in this charset. |
|
857 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
858 |
* <p> An invocation of this method upon a charset {@code cs} returns the |
2 | 859 |
* same result as the expression |
860 |
* |
|
861 |
* <pre> |
|
862 |
* cs.encode(CharBuffer.wrap(s)); </pre> |
|
863 |
* |
|
864 |
* @param str The string to be encoded |
|
865 |
* |
|
866 |
* @return A byte buffer containing the encoded characters |
|
867 |
*/ |
|
868 |
public final ByteBuffer encode(String str) { |
|
869 |
return encode(CharBuffer.wrap(str)); |
|
870 |
} |
|
871 |
||
872 |
/** |
|
873 |
* Compares this charset to another. |
|
874 |
* |
|
875 |
* <p> Charsets are ordered by their canonical names, without regard to |
|
876 |
* case. </p> |
|
877 |
* |
|
878 |
* @param that |
|
879 |
* The charset to which this charset is to be compared |
|
880 |
* |
|
881 |
* @return A negative integer, zero, or a positive integer as this charset |
|
882 |
* is less than, equal to, or greater than the specified charset |
|
883 |
*/ |
|
884 |
public final int compareTo(Charset that) { |
|
885 |
return (name().compareToIgnoreCase(that.name())); |
|
886 |
} |
|
887 |
||
888 |
/** |
|
18164 | 889 |
* Computes a hashcode for this charset. |
2 | 890 |
* |
891 |
* @return An integer hashcode |
|
892 |
*/ |
|
893 |
public final int hashCode() { |
|
894 |
return name().hashCode(); |
|
895 |
} |
|
896 |
||
897 |
/** |
|
898 |
* Tells whether or not this object is equal to another. |
|
899 |
* |
|
900 |
* <p> Two charsets are equal if, and only if, they have the same canonical |
|
901 |
* names. A charset is never equal to any other type of object. </p> |
|
902 |
* |
|
32143
394ab6a6658d
8133459: replace <tt> tags (obsolete in html5) in java.nio docs
avstepan
parents:
30818
diff
changeset
|
903 |
* @return {@code true} if, and only if, this charset is equal to the |
2 | 904 |
* given object |
905 |
*/ |
|
906 |
public final boolean equals(Object ob) { |
|
907 |
if (!(ob instanceof Charset)) |
|
908 |
return false; |
|
909 |
if (this == ob) |
|
910 |
return true; |
|
911 |
return name.equals(((Charset)ob).name()); |
|
912 |
} |
|
913 |
||
914 |
/** |
|
18164 | 915 |
* Returns a string describing this charset. |
2 | 916 |
* |
917 |
* @return A string describing this charset |
|
918 |
*/ |
|
919 |
public final String toString() { |
|
920 |
return name(); |
|
921 |
} |
|
922 |
||
923 |
} |