1 /* |
|
2 * Copyright (c) 1996, 1999, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Oracle designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Oracle in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 */ |
|
25 |
|
26 package sun.io; |
|
27 import java.io.*; |
|
28 |
|
29 |
|
30 /** |
|
31 * Convert byte arrays containing Unicode characters into arrays of actual |
|
32 * Unicode characters. This class may be used directly, in which case it |
|
33 * expects the input byte array to begin with a byte-order mark, or it may be |
|
34 * subclassed in order to preset the byte order and mark behavior. |
|
35 * |
|
36 * <p> Whether or not a mark is expected, if a mark that does not match the |
|
37 * established byte order is later discovered then a |
|
38 * <tt>MalformedInputException</tt> will be thrown by the <tt>convert</tt> |
|
39 * method. If a correct mark is seen later in the input stream, it is passed |
|
40 * through as a character. |
|
41 * |
|
42 * @see ByteToCharUnicodeLittle |
|
43 * @see ByteToCharUnicodeLittleUnmarked |
|
44 * @see ByteToCharUnicodeBig |
|
45 * @see ByteToCharUnicodeBigUnmarked |
|
46 * |
|
47 * @author Mark Reinhold |
|
48 */ |
|
49 |
|
50 public class ByteToCharUnicode extends ByteToCharConverter { |
|
51 |
|
52 static final char BYTE_ORDER_MARK = (char) 0xfeff; |
|
53 static final char REVERSED_MARK = (char) 0xfffe; |
|
54 |
|
55 static final int AUTO = 0; |
|
56 static final int BIG = 1; |
|
57 static final int LITTLE = 2; |
|
58 |
|
59 int originalByteOrder; /* Byte order specified at creation */ |
|
60 int byteOrder; /* Byte order in use */ |
|
61 boolean usesMark; /* Look for a mark and interpret it */ |
|
62 |
|
63 /** |
|
64 * Creates a Unicode byte-to-char converter that expects the first pair of |
|
65 * input bytes to be a byte-order mark, which will be interpreted and |
|
66 * discarded. If the first pair of bytes is not such a mark then a |
|
67 * <tt>MalformedInputException</tt> will be thrown by the convert method. |
|
68 */ |
|
69 public ByteToCharUnicode() { |
|
70 originalByteOrder = byteOrder = AUTO; |
|
71 usesMark = true; |
|
72 } |
|
73 |
|
74 /** |
|
75 * Creates a Unicode byte-to-char converter that uses the given byte order |
|
76 * and may or may not insist upon an initial byte-order mark. |
|
77 */ |
|
78 protected ByteToCharUnicode(int bo, boolean m) { |
|
79 originalByteOrder = byteOrder = bo; |
|
80 usesMark = m; |
|
81 } |
|
82 |
|
83 public String getCharacterEncoding() { |
|
84 switch (originalByteOrder) { |
|
85 case BIG: |
|
86 return usesMark ? "UnicodeBig" : "UnicodeBigUnmarked"; |
|
87 case LITTLE: |
|
88 return usesMark ? "UnicodeLittle" : "UnicodeLittleUnmarked"; |
|
89 default: |
|
90 return "Unicode"; |
|
91 } |
|
92 } |
|
93 |
|
94 boolean started = false; |
|
95 int leftOverByte; |
|
96 boolean leftOver = false; |
|
97 |
|
98 public int convert(byte[] in, int inOff, int inEnd, |
|
99 char[] out, int outOff, int outEnd) |
|
100 throws ConversionBufferFullException, MalformedInputException |
|
101 { |
|
102 byteOff = inOff; |
|
103 charOff = outOff; |
|
104 |
|
105 if (inOff >= inEnd) |
|
106 return 0; |
|
107 |
|
108 int b1, b2; |
|
109 int bc = 0; |
|
110 int inI = inOff, outI = outOff; |
|
111 |
|
112 if (leftOver) { |
|
113 b1 = leftOverByte & 0xff; |
|
114 leftOver = false; |
|
115 } |
|
116 else { |
|
117 b1 = in[inI++] & 0xff; |
|
118 } |
|
119 bc = 1; |
|
120 |
|
121 if (usesMark && !started) { /* Read initial byte-order mark */ |
|
122 if (inI < inEnd) { |
|
123 b2 = in[inI++] & 0xff; |
|
124 bc = 2; |
|
125 |
|
126 char c = (char) ((b1 << 8) | b2); |
|
127 int bo = AUTO; |
|
128 |
|
129 if (c == BYTE_ORDER_MARK) |
|
130 bo = BIG; |
|
131 else if (c == REVERSED_MARK) |
|
132 bo = LITTLE; |
|
133 |
|
134 if (byteOrder == AUTO) { |
|
135 if (bo == AUTO) { |
|
136 badInputLength = bc; |
|
137 throw new |
|
138 MalformedInputException("Missing byte-order mark"); |
|
139 } |
|
140 byteOrder = bo; |
|
141 if (inI < inEnd) { |
|
142 b1 = in[inI++] & 0xff; |
|
143 bc = 1; |
|
144 } |
|
145 } |
|
146 else if (bo == AUTO) { |
|
147 inI--; |
|
148 bc = 1; |
|
149 } |
|
150 else if (byteOrder == bo) { |
|
151 if (inI < inEnd) { |
|
152 b1 = in[inI++] & 0xff; |
|
153 bc = 1; |
|
154 } |
|
155 } |
|
156 else { |
|
157 badInputLength = bc; |
|
158 throw new |
|
159 MalformedInputException("Incorrect byte-order mark"); |
|
160 } |
|
161 |
|
162 started = true; |
|
163 } |
|
164 } |
|
165 |
|
166 /* Loop invariant: (b1 contains the next input byte) && (bc == 1) */ |
|
167 while (inI < inEnd) { |
|
168 b2 = in[inI++] & 0xff; |
|
169 bc = 2; |
|
170 |
|
171 char c; |
|
172 if (byteOrder == BIG) |
|
173 c = (char) ((b1 << 8) | b2); |
|
174 else |
|
175 c = (char) ((b2 << 8) | b1); |
|
176 |
|
177 if (c == REVERSED_MARK) |
|
178 throw new |
|
179 MalformedInputException("Reversed byte-order mark"); |
|
180 |
|
181 if (outI >= outEnd) |
|
182 throw new ConversionBufferFullException(); |
|
183 out[outI++] = c; |
|
184 byteOff = inI; |
|
185 charOff = outI; |
|
186 |
|
187 if (inI < inEnd) { |
|
188 b1 = in[inI++] & 0xff; |
|
189 bc = 1; |
|
190 } |
|
191 } |
|
192 |
|
193 if (bc == 1) { |
|
194 leftOverByte = b1; |
|
195 byteOff = inI; |
|
196 leftOver = true; |
|
197 } |
|
198 |
|
199 return outI - outOff; |
|
200 } |
|
201 |
|
202 public void reset() { |
|
203 leftOver = false; |
|
204 byteOff = charOff = 0; |
|
205 started = false; |
|
206 byteOrder = originalByteOrder; |
|
207 } |
|
208 |
|
209 public int flush(char buf[], int off, int len) |
|
210 throws MalformedInputException |
|
211 { |
|
212 if (leftOver) { |
|
213 reset(); |
|
214 throw new MalformedInputException(); |
|
215 } |
|
216 byteOff = charOff = 0; |
|
217 return 0; |
|
218 } |
|
219 |
|
220 } |
|