1 /* |
|
2 * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Sun designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Sun in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
|
22 * CA 95054 USA or visit www.sun.com if you need additional information or |
|
23 * have any questions. |
|
24 */ |
|
25 |
|
26 package build.tools.charsetmapping; |
|
27 |
|
28 import java.io.*; |
|
29 import java.util.ArrayList; |
|
30 import java.util.Scanner; |
|
31 import java.util.Formatter; |
|
32 import java.util.regex.*; |
|
33 import java.nio.charset.*; |
|
34 import static build.tools.charsetmapping.CharsetMapping.*; |
|
35 |
|
36 public class GenerateEUC_TW { |
|
37 |
|
38 public static void genEUC_TW(String args[]) throws Exception { |
|
39 genClass(new FileInputStream(new File(args[0], "euc_tw.map")), |
|
40 new PrintStream(new File(args[1], "EUC_TWMapping.java"), "ISO-8859-1"), |
|
41 getCopyright(new File(args[3]))); |
|
42 } |
|
43 |
|
44 private static String getCopyright(File f) throws IOException { |
|
45 Scanner s = new Scanner(f, "ISO-8859-1"); |
|
46 StringBuilder sb = new StringBuilder(); |
|
47 while (s.hasNextLine()) { |
|
48 String ln = s.nextLine(); |
|
49 sb.append(ln + "\n"); |
|
50 // assume we have the copyright as the first comment |
|
51 if (ln.matches("^\\s\\*\\/$")) |
|
52 break; |
|
53 } |
|
54 s.close(); |
|
55 return sb.toString(); |
|
56 } |
|
57 |
|
58 private static char[] toCharArray(int[] db, |
|
59 int b1Min, int b1Max, |
|
60 int b2Min, int b2Max) |
|
61 { |
|
62 char[] ca = new char[(b1Max - b1Min + 1) * (b2Max - b2Min + 1)]; |
|
63 int off = 0; |
|
64 for (int b1 = b1Min; b1 <= b1Max; b1++) { |
|
65 for (int b2 = b2Min; b2 <= b2Max; b2++) { |
|
66 ca[off++] = (char)(db[b1 * 256 + b2] & 0xffff); |
|
67 } |
|
68 } |
|
69 return ca; |
|
70 } |
|
71 |
|
72 private static void toChar(Formatter out, String fmt, char c) { |
|
73 switch (c) { |
|
74 case '\b': |
|
75 out.format("\\b"); break; |
|
76 case '\t': |
|
77 out.format("\\t"); break; |
|
78 case '\n': |
|
79 out.format("\\n"); break; |
|
80 case '\f': |
|
81 out.format("\\f"); break; |
|
82 case '\r': |
|
83 out.format("\\r"); break; |
|
84 case '\"': |
|
85 out.format("\\\""); break; |
|
86 case '\'': |
|
87 out.format("\\'"); break; |
|
88 case '\\': |
|
89 out.format("\\\\"); break; |
|
90 default: |
|
91 out.format(fmt, c & 0xffff); |
|
92 } |
|
93 } |
|
94 |
|
95 private static void toString(Formatter out, char[] date, String endStr) |
|
96 { |
|
97 int off = 0; |
|
98 int end = date.length; |
|
99 while (off < end) { |
|
100 out.format(" \""); |
|
101 for (int j = 0; j < 8 && off < end; j++) { |
|
102 toChar(out, "\\u%04X", date[off++]); |
|
103 } |
|
104 if (off == end) |
|
105 out.format("\"%s%n", endStr); |
|
106 else |
|
107 out.format("\" +%n"); |
|
108 } |
|
109 } |
|
110 |
|
111 private static char[] toCharArray(byte[] ba, |
|
112 int b1Min, int b1Max, |
|
113 int b2Min, int b2Max) |
|
114 { |
|
115 char[] ca = new char[(b1Max - b1Min + 1) * (b2Max - b2Min + 1)]; |
|
116 int off = 0; |
|
117 for (int b1 = b1Min; b1 <= b1Max; b1++) { |
|
118 int b2 = b2Min; |
|
119 while (b2 <= b2Max) { |
|
120 ca[off++] = (char)(((ba[b1 * 256 + b2++] & 0xff) << 8) | |
|
121 (ba[b1 * 256 + b2++] & 0xff)); |
|
122 } |
|
123 } |
|
124 return ca; |
|
125 } |
|
126 |
|
127 private static void toCharArray(Formatter out, char[] date) { |
|
128 int off = 0; |
|
129 int end = date.length; |
|
130 while (off < end) { |
|
131 out.format(" "); |
|
132 for (int j = 0; j < 8 && off < end; j++) { |
|
133 toChar(out, "'\\u%04X',", date[off++]); |
|
134 } |
|
135 out.format("%n"); |
|
136 } |
|
137 } |
|
138 |
|
139 private static int initC2BIndex(char[] index) { |
|
140 int off = 0; |
|
141 for (int i = 0; i < index.length; i++) { |
|
142 if (index[i] != 0) { |
|
143 index[i] = (char)off; |
|
144 off += 0x100; |
|
145 } else { |
|
146 index[i] = CharsetMapping.UNMAPPABLE_ENCODING; |
|
147 } |
|
148 } |
|
149 return off; |
|
150 } |
|
151 |
|
152 private static Pattern euctw = Pattern.compile("(?:8ea)?(\\p{XDigit}++)\\s++(\\p{XDigit}++)?\\s*+.*"); |
|
153 |
|
154 private static void genClass(InputStream is, PrintStream ps, String copyright) |
|
155 throws Exception |
|
156 { |
|
157 // ranges of byte1 and byte2, something should come from a "config" file |
|
158 int b1Min = 0xa1; |
|
159 int b1Max = 0xfe; |
|
160 int b2Min = 0xa1; |
|
161 int b2Max = 0xfe; |
|
162 |
|
163 try { |
|
164 int[][] db = new int[8][0x10000]; // doublebyte |
|
165 byte[] suppFlag = new byte[0x10000]; // doublebyte |
|
166 char[] indexC2B = new char[256]; |
|
167 char[] indexC2BSupp = new char[256]; |
|
168 |
|
169 for (int i = 0; i < 8; i++) |
|
170 for (int j = 0; j < 0x10000; j++) |
|
171 db[i][j] = CharsetMapping.UNMAPPABLE_DECODING; |
|
172 |
|
173 CharsetMapping.Parser p = new CharsetMapping.Parser(is, euctw); |
|
174 CharsetMapping.Entry e = null; |
|
175 while ((e = p.next()) != null) { |
|
176 int plane = 0; |
|
177 if (e.bs >= 0x10000) { |
|
178 plane = ((e.bs >> 16) & 0xff) - 1; |
|
179 if (plane >= 14) |
|
180 plane = 7; |
|
181 e.bs = e.bs & 0xffff; |
|
182 } |
|
183 db[plane][e.bs] = e.cp; |
|
184 if (e.cp < 0x10000) { |
|
185 indexC2B[e.cp>>8] = 1; |
|
186 } else { |
|
187 indexC2BSupp[(e.cp&0xffff)>>8] = 1; |
|
188 suppFlag[e.bs] |= (1 << plane); |
|
189 } |
|
190 } |
|
191 |
|
192 StringBuilder out = new StringBuilder(); |
|
193 Formatter fm = new Formatter(out); |
|
194 |
|
195 fm.format(copyright); |
|
196 fm.format("%n// -- This file was mechanically generated: Do not edit! -- //%n"); |
|
197 fm.format("package sun.nio.cs.ext;%n%n"); |
|
198 fm.format("class EUC_TWMapping {%n%n"); |
|
199 |
|
200 // boundaries |
|
201 fm.format(" final static int b1Min = 0x%x;%n", b1Min); |
|
202 fm.format(" final static int b1Max = 0x%x;%n", b1Max); |
|
203 fm.format(" final static int b2Min = 0x%x;%n", b2Min); |
|
204 fm.format(" final static int b2Max = 0x%x;%n", b2Max); |
|
205 |
|
206 // b2c tables |
|
207 fm.format("%n final static String[] b2c = {%n"); |
|
208 for (int plane = 0; plane < 8; plane++) { |
|
209 fm.format(" // Plane %d%n", plane); |
|
210 toString(fm, toCharArray(db[plane], |
|
211 b1Min, b1Max, b2Min, b2Max), |
|
212 ","); |
|
213 fm.format("%n"); |
|
214 } |
|
215 fm.format(" };%n"); |
|
216 |
|
217 // c2bIndex |
|
218 fm.format("%n static final int C2BSIZE = 0x%x;%n", |
|
219 initC2BIndex(indexC2B)); |
|
220 fm.format("%n static char[] c2bIndex = new char[] {%n"); |
|
221 toCharArray(fm, indexC2B); |
|
222 fm.format(" };%n"); |
|
223 |
|
224 // c2bIndexSupp |
|
225 fm.format("%n static final int C2BSUPPSIZE = 0x%x;%n", |
|
226 initC2BIndex(indexC2BSupp)); |
|
227 fm.format("%n static char[] c2bSuppIndex = new char[] {%n"); |
|
228 toCharArray(fm, indexC2BSupp); |
|
229 fm.format(" };%n"); |
|
230 |
|
231 // suppFlags |
|
232 fm.format("%n static String b2cIsSuppStr =%n"); |
|
233 toString(fm, toCharArray(suppFlag, |
|
234 b1Min, b1Max, b2Min, b2Max), |
|
235 ";"); |
|
236 |
|
237 fm.format("}"); |
|
238 fm.close(); |
|
239 |
|
240 ps.println(out.toString()); |
|
241 ps.close(); |
|
242 } catch (Exception x) { |
|
243 x.printStackTrace(); |
|
244 } |
|
245 } |
|
246 } |
|