30844
|
1 |
/*
|
|
2 |
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 |
*
|
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
|
7 |
* published by the Free Software Foundation.
|
|
8 |
*
|
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
13 |
* accompanied this code).
|
|
14 |
*
|
|
15 |
* You should have received a copy of the GNU General Public License version
|
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
18 |
*
|
|
19 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
20 |
* or visit www.oracle.com if you need additional information or have any
|
|
21 |
* questions.
|
|
22 |
*/
|
|
23 |
|
|
24 |
import java.io.BufferedReader;
|
|
25 |
import java.io.BufferedWriter;
|
|
26 |
import java.io.FilterReader;
|
|
27 |
import java.io.FilterWriter;
|
|
28 |
import java.io.IOException;
|
|
29 |
import java.io.Reader;
|
|
30 |
import java.io.Writer;
|
|
31 |
import java.nio.charset.Charset;
|
|
32 |
import java.nio.charset.CharsetEncoder;
|
|
33 |
import java.nio.file.Files;
|
|
34 |
import java.nio.file.Path;
|
|
35 |
import static java.nio.charset.StandardCharsets.*;
|
|
36 |
|
|
37 |
/**
|
|
38 |
* Simple utility to convert from native encoding file to ascii or reverse
|
|
39 |
* including \udddd Unicode notation.
|
|
40 |
*/
|
|
41 |
public class Native2Ascii {
|
|
42 |
final Charset cs;
|
|
43 |
final CharsetEncoder encoder;
|
|
44 |
public Native2Ascii(Charset cs) {
|
|
45 |
this.cs = cs;
|
|
46 |
this.encoder = cs.newEncoder();
|
|
47 |
}
|
|
48 |
|
|
49 |
/**
|
|
50 |
* ASCII to Native conversion
|
|
51 |
*/
|
|
52 |
public void asciiToNative(Path infile, Path outfile) throws IOException {
|
|
53 |
try (BufferedReader in = Files.newBufferedReader(infile, US_ASCII);
|
|
54 |
BufferedReader reader = new BufferedReader(new A2NFilter(in));
|
|
55 |
BufferedWriter writer = Files.newBufferedWriter(outfile, cs)) {
|
|
56 |
String line;
|
|
57 |
while ((line = reader.readLine()) != null) {
|
|
58 |
writer.write(line.toCharArray());
|
|
59 |
writer.newLine();
|
|
60 |
}
|
|
61 |
}
|
|
62 |
}
|
|
63 |
|
|
64 |
/**
|
|
65 |
* Native to ASCII conversion
|
|
66 |
*/
|
|
67 |
public void nativeToAscii(Path infile, Path outfile) throws IOException {
|
|
68 |
try (BufferedReader reader = Files.newBufferedReader(infile, cs);
|
|
69 |
BufferedWriter out = Files.newBufferedWriter(outfile, US_ASCII);
|
|
70 |
BufferedWriter writer = new BufferedWriter(new N2AFilter(out))) {
|
|
71 |
String line;
|
|
72 |
while ((line = reader.readLine()) != null) {
|
|
73 |
writer.write(line.toCharArray());
|
|
74 |
writer.newLine();
|
|
75 |
}
|
|
76 |
}
|
|
77 |
}
|
|
78 |
|
|
79 |
// A copy of native2ascii N2AFilter
|
|
80 |
class N2AFilter extends FilterWriter {
|
|
81 |
public N2AFilter(Writer out) { super(out); }
|
|
82 |
public void write(char b) throws IOException {
|
|
83 |
char[] buf = new char[1];
|
|
84 |
buf[0] = b;
|
|
85 |
write(buf, 0, 1);
|
|
86 |
}
|
|
87 |
|
|
88 |
public void write(char[] buf, int off, int len) throws IOException {
|
|
89 |
for (int i = 0; i < len; i++) {
|
|
90 |
if ((buf[i] > '\u007f')) {
|
|
91 |
// write \udddd
|
|
92 |
out.write('\\');
|
|
93 |
out.write('u');
|
|
94 |
String hex = Integer.toHexString(buf[i]);
|
|
95 |
StringBuilder hex4 = new StringBuilder(hex);
|
|
96 |
hex4.reverse();
|
|
97 |
int length = 4 - hex4.length();
|
|
98 |
for (int j = 0; j < length; j++) {
|
|
99 |
hex4.append('0');
|
|
100 |
}
|
|
101 |
for (int j = 0; j < 4; j++) {
|
|
102 |
out.write(hex4.charAt(3 - j));
|
|
103 |
}
|
|
104 |
} else
|
|
105 |
out.write(buf[i]);
|
|
106 |
}
|
|
107 |
}
|
|
108 |
}
|
|
109 |
|
|
110 |
// A copy of native2ascii A2NFilter
|
|
111 |
class A2NFilter extends FilterReader {
|
|
112 |
// maintain a trailing buffer to hold any incompleted
|
|
113 |
// unicode escaped sequences
|
|
114 |
private char[] trailChars = null;
|
|
115 |
|
|
116 |
public A2NFilter(Reader in) {
|
|
117 |
super(in);
|
|
118 |
}
|
|
119 |
|
|
120 |
public int read(char[] buf, int off, int len) throws IOException {
|
|
121 |
int numChars = 0; // how many characters have been read
|
|
122 |
int retChars = 0; // how many characters we'll return
|
|
123 |
|
|
124 |
char[] cBuf = new char[len];
|
|
125 |
int cOffset = 0; // offset at which we'll start reading
|
|
126 |
boolean eof = false;
|
|
127 |
|
|
128 |
// copy trailing chars from previous invocation to input buffer
|
|
129 |
if (trailChars != null) {
|
|
130 |
for (int i = 0; i < trailChars.length; i++)
|
|
131 |
cBuf[i] = trailChars[i];
|
|
132 |
numChars = trailChars.length;
|
|
133 |
trailChars = null;
|
|
134 |
}
|
|
135 |
|
|
136 |
int n = in.read(cBuf, numChars, len - numChars);
|
|
137 |
if (n < 0) {
|
|
138 |
eof = true;
|
|
139 |
if (numChars == 0)
|
|
140 |
return -1; // EOF;
|
|
141 |
} else {
|
|
142 |
numChars += n;
|
|
143 |
}
|
|
144 |
|
|
145 |
for (int i = 0; i < numChars; ) {
|
|
146 |
char c = cBuf[i++];
|
|
147 |
|
|
148 |
if (c != '\\' || (eof && numChars <= 5)) {
|
|
149 |
// Not a backslash, so copy and continue
|
|
150 |
// Always pass non backslash chars straight thru
|
|
151 |
// for regular encoding. If backslash occurs in
|
|
152 |
// input stream at the final 5 chars then don't
|
|
153 |
// attempt to read-ahead and de-escape since these
|
|
154 |
// are literal occurrences of U+005C which need to
|
|
155 |
// be encoded verbatim in the target encoding.
|
|
156 |
buf[retChars++] = c;
|
|
157 |
continue;
|
|
158 |
}
|
|
159 |
|
|
160 |
int remaining = numChars - i;
|
|
161 |
if (remaining < 5) {
|
|
162 |
// Might be the first character of a unicode escape, but we
|
|
163 |
// don't have enough characters to tell, so save it and finish
|
|
164 |
trailChars = new char[1 + remaining];
|
|
165 |
trailChars[0] = c;
|
|
166 |
for (int j = 0; j < remaining; j++)
|
|
167 |
trailChars[1 + j] = cBuf[i + j];
|
|
168 |
break;
|
|
169 |
}
|
|
170 |
// At this point we have at least five characters remaining
|
|
171 |
|
|
172 |
c = cBuf[i++];
|
|
173 |
if (c != 'u') {
|
|
174 |
// Not a unicode escape, so copy and continue
|
|
175 |
buf[retChars++] = '\\';
|
|
176 |
buf[retChars++] = c;
|
|
177 |
continue;
|
|
178 |
}
|
|
179 |
|
|
180 |
// The next four characters are the hex part of a unicode escape
|
|
181 |
char rc = 0;
|
|
182 |
boolean isUE = true;
|
|
183 |
try {
|
|
184 |
rc = (char) Integer.parseInt(new String(cBuf, i, 4), 16);
|
|
185 |
} catch (NumberFormatException x) {
|
|
186 |
isUE = false;
|
|
187 |
}
|
|
188 |
if (isUE && encoder.canEncode(rc)) {
|
|
189 |
// We'll be able to convert this
|
|
190 |
buf[retChars++] = rc;
|
|
191 |
i += 4; // Align beyond the current uXXXX sequence
|
|
192 |
} else {
|
|
193 |
// We won't, so just retain the original sequence
|
|
194 |
buf[retChars++] = '\\';
|
|
195 |
buf[retChars++] = 'u';
|
|
196 |
continue;
|
|
197 |
}
|
|
198 |
|
|
199 |
}
|
|
200 |
|
|
201 |
return retChars;
|
|
202 |
}
|
|
203 |
|
|
204 |
public int read() throws IOException {
|
|
205 |
char[] buf = new char[1];
|
|
206 |
|
|
207 |
if (read(buf, 0, 1) == -1)
|
|
208 |
return -1;
|
|
209 |
else
|
|
210 |
return (int) buf[0];
|
|
211 |
}
|
|
212 |
}
|
|
213 |
}
|