2
|
1 |
/*
|
5506
|
2 |
* Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved.
|
2
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 |
*
|
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
5506
|
7 |
* published by the Free Software Foundation. Oracle designates this
|
2
|
8 |
* particular file as subject to the "Classpath" exception as provided
|
5506
|
9 |
* by Oracle in the LICENSE file that accompanied this code.
|
2
|
10 |
*
|
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
15 |
* accompanied this code).
|
|
16 |
*
|
|
17 |
* You should have received a copy of the GNU General Public License version
|
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
20 |
*
|
5506
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
22 |
* or visit www.oracle.com if you need additional information or have any
|
|
23 |
* questions.
|
2
|
24 |
*/
|
|
25 |
|
|
26 |
/**
|
|
27 |
* This FilterReader class processes a sequence of characters from
|
|
28 |
* a source stream containing a mixture of 7-bit ASCII data and
|
|
29 |
* 'back-tick U' escaped sequences representing characters which have
|
|
30 |
* the possibility of being encoded in a user specified encoding
|
|
31 |
* The filter relies on knowing the target encoding and makes a
|
|
32 |
* determination as to whether a given supplied character in its
|
|
33 |
* source character stream is encodeable in the target encoding.
|
|
34 |
* If not, it is remains in its back-tick U escaped form.
|
|
35 |
*/
|
|
36 |
|
|
37 |
package sun.tools.native2ascii;
|
|
38 |
import java.io.*;
|
|
39 |
|
|
40 |
|
|
41 |
class A2NFilter extends FilterReader {
|
|
42 |
|
|
43 |
// maintain a trailing buffer to hold any incompleted
|
|
44 |
// unicode escaped sequences
|
|
45 |
private char[] trailChars = null;
|
|
46 |
|
|
47 |
public A2NFilter(Reader in) {
|
|
48 |
super(in);
|
|
49 |
}
|
|
50 |
|
|
51 |
public int read(char[] buf, int off, int len) throws IOException {
|
|
52 |
int numChars = 0; // how many characters have been read
|
|
53 |
int retChars = 0; // how many characters we'll return
|
|
54 |
|
|
55 |
char[] cBuf = new char[len];
|
|
56 |
int cOffset = 0; // offset at which we'll start reading
|
|
57 |
boolean eof = false;
|
|
58 |
|
|
59 |
// copy trailing chars from previous invocation to input buffer
|
|
60 |
if (trailChars != null) {
|
|
61 |
for (int i = 0; i < trailChars.length; i++)
|
|
62 |
cBuf[i] = trailChars[i];
|
|
63 |
numChars = trailChars.length;
|
|
64 |
trailChars = null;
|
|
65 |
}
|
|
66 |
|
|
67 |
int n = in.read(cBuf, numChars, len - numChars);
|
|
68 |
if (n < 0) {
|
|
69 |
eof = true;
|
|
70 |
if (numChars == 0)
|
|
71 |
return -1; // EOF;
|
|
72 |
} else {
|
|
73 |
numChars += n;
|
|
74 |
}
|
|
75 |
|
|
76 |
for (int i = 0; i < numChars;) {
|
|
77 |
char c = cBuf[i++];
|
|
78 |
|
|
79 |
if (c != '\\' || (eof && numChars <= 5)) {
|
|
80 |
// Not a backslash, so copy and continue
|
|
81 |
// Always pass non backslash chars straight thru
|
|
82 |
// for regular encoding. If backslash occurs in
|
|
83 |
// input stream at the final 5 chars then don't
|
|
84 |
// attempt to read-ahead and de-escape since these
|
|
85 |
// are literal occurrences of U+005C which need to
|
|
86 |
// be encoded verbatim in the target encoding.
|
|
87 |
buf[retChars++] = c;
|
|
88 |
continue;
|
|
89 |
}
|
|
90 |
|
|
91 |
int remaining = numChars - i;
|
|
92 |
if (remaining < 5) {
|
|
93 |
// Might be the first character of a unicode escape, but we
|
|
94 |
// don't have enough characters to tell, so save it and finish
|
|
95 |
trailChars = new char[1 + remaining];
|
|
96 |
trailChars[0] = c;
|
|
97 |
for (int j = 0; j < remaining; j++)
|
|
98 |
trailChars[1 + j] = cBuf[i + j];
|
|
99 |
break;
|
|
100 |
}
|
|
101 |
// At this point we have at least five characters remaining
|
|
102 |
|
|
103 |
c = cBuf[i++];
|
|
104 |
if (c != 'u') {
|
|
105 |
// Not a unicode escape, so copy and continue
|
|
106 |
buf[retChars++] = '\\';
|
|
107 |
buf[retChars++] = c;
|
|
108 |
continue;
|
|
109 |
}
|
|
110 |
|
|
111 |
// The next four characters are the hex part of a unicode escape
|
|
112 |
char rc = 0;
|
|
113 |
boolean isUE = true;
|
|
114 |
try {
|
|
115 |
rc = (char)Integer.parseInt(new String(cBuf, i, 4), 16);
|
|
116 |
} catch (NumberFormatException x) {
|
|
117 |
isUE = false;
|
|
118 |
}
|
|
119 |
if (isUE && Main.canConvert(rc)) {
|
|
120 |
// We'll be able to convert this
|
|
121 |
buf[retChars++] = rc;
|
|
122 |
i += 4; // Align beyond the current uXXXX sequence
|
|
123 |
} else {
|
|
124 |
// We won't, so just retain the original sequence
|
|
125 |
buf[retChars++] = '\\';
|
|
126 |
buf[retChars++] = 'u';
|
|
127 |
continue;
|
|
128 |
}
|
|
129 |
|
|
130 |
}
|
|
131 |
|
|
132 |
return retChars;
|
|
133 |
}
|
|
134 |
|
|
135 |
public int read() throws IOException {
|
|
136 |
char[] buf = new char[1];
|
|
137 |
|
|
138 |
if (read(buf, 0, 1) == -1)
|
|
139 |
return -1;
|
|
140 |
else
|
|
141 |
return (int)buf[0];
|
|
142 |
}
|
|
143 |
|
|
144 |
}
|