12005
|
1 |
/*
|
|
2 |
* Copyright (c) 2006, Oracle and/or its affiliates. All rights reserved.
|
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 |
*
|
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
|
7 |
* published by the Free Software Foundation. Oracle designates this
|
|
8 |
* particular file as subject to the "Classpath" exception as provided
|
|
9 |
* by Oracle in the LICENSE file that accompanied this code.
|
|
10 |
*
|
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
15 |
* accompanied this code).
|
|
16 |
*
|
|
17 |
* You should have received a copy of the GNU General Public License version
|
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
20 |
*
|
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
22 |
* or visit www.oracle.com if you need additional information or have any
|
|
23 |
* questions.
|
|
24 |
*/
|
|
25 |
|
|
26 |
package com.sun.xml.internal.stream.writers;
|
|
27 |
|
|
28 |
import java.io.Writer;
|
|
29 |
import java.io.OutputStream;
|
|
30 |
import java.io.IOException;
|
|
31 |
|
|
32 |
import com.sun.org.apache.xerces.internal.util.XMLChar;
|
|
33 |
|
|
34 |
/**
|
|
35 |
* <p>This class is used to write a stream of chars as a stream of
|
|
36 |
* bytes using the UTF8 encoding. It assumes that the underlying
|
|
37 |
* output stream is buffered or does not need additional buffering.</p>
|
|
38 |
*
|
|
39 |
* <p>It is more efficient than using a <code>java.io.OutputStreamWriter</code>
|
|
40 |
* because it does not need to be wrapped in a
|
|
41 |
* <code>java.io.BufferedWriter</code>. Creating multiple instances
|
|
42 |
* of <code>java.io.BufferedWriter</code> has been shown to be very
|
|
43 |
* expensive in JAX-WS.</p>
|
|
44 |
*
|
48412
|
45 |
* @author Santiago PericasGeertsen
|
12005
|
46 |
*/
|
|
47 |
public final class UTF8OutputStreamWriter extends Writer {
|
|
48 |
|
|
49 |
/**
|
|
50 |
* Undelying output stream. This class assumes that this
|
|
51 |
* output stream does not need buffering.
|
|
52 |
*/
|
|
53 |
OutputStream out;
|
|
54 |
|
|
55 |
/**
|
|
56 |
* Java represents chars that are not in the Basic Multilingual
|
|
57 |
* Plane (BMP) in UTF-16. This int stores the first code unit
|
|
58 |
* for a code point encoded in two UTF-16 code units.
|
|
59 |
*/
|
|
60 |
int lastUTF16CodePoint = 0;
|
|
61 |
|
|
62 |
public UTF8OutputStreamWriter(OutputStream out) {
|
|
63 |
this.out = out;
|
|
64 |
}
|
|
65 |
|
|
66 |
public String getEncoding() {
|
|
67 |
return "UTF-8";
|
|
68 |
}
|
|
69 |
|
|
70 |
public void write(int c) throws IOException {
|
|
71 |
// Check in we are encoding at high and low surrogates
|
|
72 |
if (lastUTF16CodePoint != 0) {
|
|
73 |
final int uc =
|
|
74 |
(((lastUTF16CodePoint & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000;
|
|
75 |
|
|
76 |
if (uc < 0 || uc >= 0x200000) {
|
|
77 |
throw new IOException("Atttempting to write invalid Unicode code point '" + uc + "'");
|
|
78 |
}
|
|
79 |
|
|
80 |
out.write(0xF0 | (uc >> 18));
|
|
81 |
out.write(0x80 | ((uc >> 12) & 0x3F));
|
|
82 |
out.write(0x80 | ((uc >> 6) & 0x3F));
|
|
83 |
out.write(0x80 | (uc & 0x3F));
|
|
84 |
|
|
85 |
lastUTF16CodePoint = 0;
|
|
86 |
return;
|
|
87 |
}
|
|
88 |
|
|
89 |
// Otherwise, encode char as defined in UTF-8
|
|
90 |
if (c < 0x80) {
|
|
91 |
// 1 byte, 7 bits
|
42802
|
92 |
out.write(c);
|
12005
|
93 |
}
|
|
94 |
else if (c < 0x800) {
|
|
95 |
// 2 bytes, 11 bits
|
|
96 |
out.write(0xC0 | (c >> 6)); // first 5
|
|
97 |
out.write(0x80 | (c & 0x3F)); // second 6
|
|
98 |
}
|
|
99 |
else if (c <= '\uFFFF') {
|
|
100 |
if (!XMLChar.isHighSurrogate(c) && !XMLChar.isLowSurrogate(c)) {
|
|
101 |
// 3 bytes, 16 bits
|
|
102 |
out.write(0xE0 | (c >> 12)); // first 4
|
|
103 |
out.write(0x80 | ((c >> 6) & 0x3F)); // second 6
|
|
104 |
out.write(0x80 | (c & 0x3F)); // third 6
|
|
105 |
}
|
|
106 |
else {
|
|
107 |
lastUTF16CodePoint = c;
|
|
108 |
}
|
|
109 |
}
|
|
110 |
}
|
|
111 |
|
|
112 |
public void write(char cbuf[]) throws IOException {
|
|
113 |
for (int i = 0; i < cbuf.length; i++) {
|
|
114 |
write(cbuf[i]);
|
|
115 |
}
|
|
116 |
}
|
|
117 |
|
|
118 |
public void write(char cbuf[], int off, int len) throws IOException {
|
|
119 |
for (int i = 0; i < len; i++) {
|
|
120 |
write(cbuf[off + i]);
|
|
121 |
}
|
|
122 |
}
|
|
123 |
|
|
124 |
public void write(String str) throws IOException {
|
|
125 |
final int len = str.length();
|
|
126 |
for (int i = 0; i < len; i++) {
|
|
127 |
write(str.charAt(i));
|
|
128 |
}
|
|
129 |
}
|
|
130 |
|
|
131 |
public void write(String str, int off, int len) throws IOException {
|
|
132 |
for (int i = 0; i < len; i++) {
|
|
133 |
write(str.charAt(off + i));
|
|
134 |
}
|
|
135 |
}
|
|
136 |
|
|
137 |
public void flush() throws IOException {
|
|
138 |
out.flush();
|
|
139 |
}
|
|
140 |
|
|
141 |
public void close() throws IOException {
|
|
142 |
if (lastUTF16CodePoint != 0) {
|
|
143 |
throw new IllegalStateException("Attempting to close a UTF8OutputStreamWriter"
|
|
144 |
+ " while awaiting for a UTF-16 code unit");
|
|
145 |
}
|
|
146 |
out.close();
|
|
147 |
}
|
|
148 |
|
|
149 |
}
|