2
|
1 |
/*
|
|
2 |
* Copyright 1999 Sun Microsystems, Inc. All Rights Reserved.
|
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 |
*
|
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
|
7 |
* published by the Free Software Foundation. Sun designates this
|
|
8 |
* particular file as subject to the "Classpath" exception as provided
|
|
9 |
* by Sun in the LICENSE file that accompanied this code.
|
|
10 |
*
|
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
15 |
* accompanied this code).
|
|
16 |
*
|
|
17 |
* You should have received a copy of the GNU General Public License version
|
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
20 |
*
|
|
21 |
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
|
|
22 |
* CA 95054 USA or visit www.sun.com if you need additional information or
|
|
23 |
* have any questions.
|
|
24 |
*/
|
|
25 |
|
|
26 |
package sun.io;
|
|
27 |
|
|
28 |
import sun.nio.cs.ext.ISCII91;
|
|
29 |
|
|
30 |
/*
|
|
31 |
* Copyright (c) 1998 International Business Machines.
|
|
32 |
* All Rights Reserved.
|
|
33 |
*
|
|
34 |
* Author : Sunanda Bera, C. Thirumalesh
|
|
35 |
* Last Modified : 11,December,1998
|
|
36 |
*
|
|
37 |
* Purpose : Defines class CharToByteISCII91.
|
|
38 |
*
|
|
39 |
*
|
|
40 |
* Revision History
|
|
41 |
* ======== =======
|
|
42 |
*
|
|
43 |
* Date By Description
|
|
44 |
* ---- -- -----------
|
|
45 |
* March 29, 1999 John Raley Removed MalformedInputException; modified substitution logic
|
|
46 |
*
|
|
47 |
*/
|
|
48 |
|
|
49 |
/**
|
|
50 |
* Converter class. Converts between ISCII91 encoding and Unicode encoding.
|
|
51 |
* ISCII91 is the character encoding as defined in Indian Standard document
|
|
52 |
* IS 13194:1991 ( Indian Script Code for Information Interchange ).
|
|
53 |
*
|
|
54 |
* @see sun.io.CharToByteConverter
|
|
55 |
*/
|
|
56 |
|
|
57 |
/*
|
|
58 |
* {jbr} I am not sure this class adheres to code converter conventions.
|
|
59 |
* Need to investigate.
|
|
60 |
* Might should recode as a subclass of CharToByteSingleByte.
|
|
61 |
*/
|
|
62 |
|
|
63 |
public class CharToByteISCII91 extends CharToByteConverter {
|
|
64 |
|
|
65 |
private static final byte NO_CHAR = (byte)255;
|
|
66 |
|
|
67 |
//private final static ISCII91 nioCoder = new ISCII91();
|
|
68 |
private final static byte[] directMapTable = ISCII91.getEncoderMappingTable();
|
|
69 |
|
|
70 |
private static final char NUKTA_CHAR = '\u093c';
|
|
71 |
private static final char HALANT_CHAR = '\u094d';
|
|
72 |
|
|
73 |
|
|
74 |
/**
|
|
75 |
* @return true for Devanagari and ASCII range and for the special characters
|
|
76 |
* Zero Width Joiner and Zero Width Non-Joiner
|
|
77 |
* @see sun.io.CharToByteConverter#canConvert
|
|
78 |
*
|
|
79 |
*/
|
|
80 |
public boolean canConvert(char ch) {
|
|
81 |
//check for Devanagari range,ZWJ,ZWNJ and ASCII range.
|
|
82 |
return ((ch >= 0x0900 && ch <= 0x097f) || (ch == 0x200d || ch == 0x200c)
|
|
83 |
|| (ch >= 0x0000 && ch <= 0x007f) );
|
|
84 |
} //canConvert()
|
|
85 |
/**
|
|
86 |
* Converts both Devanagari and ASCII range of characters.
|
|
87 |
* @see sun.io.CharToByteConverter#convert
|
|
88 |
*/
|
|
89 |
public int convert(char[] input, int inStart, int inEnd, byte[] output, int outStart, int outEnd) throws MalformedInputException, UnknownCharacterException, ConversionBufferFullException {
|
|
90 |
|
|
91 |
charOff = inStart;
|
|
92 |
byteOff = outStart;
|
|
93 |
|
|
94 |
for (;charOff < inEnd; charOff++) {
|
|
95 |
|
|
96 |
char inputChar = input[charOff];
|
|
97 |
int index = Integer.MIN_VALUE;
|
|
98 |
boolean isSurrogatePair = false;
|
|
99 |
|
|
100 |
//check if input is in ASCII RANGE
|
|
101 |
if (inputChar >= 0x0000 && inputChar <= 0x007f) {
|
|
102 |
if (byteOff >= outEnd) {
|
|
103 |
throw new ConversionBufferFullException();
|
|
104 |
}
|
|
105 |
output[byteOff++] = (byte) inputChar;
|
|
106 |
continue;
|
|
107 |
}
|
|
108 |
|
|
109 |
// if inputChar == ZWJ replace it with halant
|
|
110 |
// if inputChar == ZWNJ replace it with Nukta
|
|
111 |
if (inputChar == 0x200c) {
|
|
112 |
inputChar = HALANT_CHAR;
|
|
113 |
}
|
|
114 |
else if (inputChar == 0x200d) {
|
|
115 |
inputChar = NUKTA_CHAR;
|
|
116 |
}
|
|
117 |
|
|
118 |
if (inputChar >= 0x0900 && inputChar <= 0x097f) {
|
|
119 |
index = ((int)(inputChar) - 0x0900)*2;
|
|
120 |
}
|
|
121 |
|
|
122 |
// If input char is a high surrogate, ensure that the following
|
|
123 |
// char is a low surrogate. If not, throw a MalformedInputException.
|
|
124 |
// Leave index untouched so substitution or an UnknownCharacterException
|
|
125 |
// will result.
|
|
126 |
else if (inputChar >= 0xd800 && inputChar <= 0xdbff) {
|
|
127 |
if (charOff < inEnd-1) {
|
|
128 |
char nextChar = input[charOff];
|
|
129 |
if (nextChar >= 0xdc00 && nextChar <= 0xdfff) {
|
|
130 |
charOff++;
|
|
131 |
isSurrogatePair = true;
|
|
132 |
}
|
|
133 |
}
|
|
134 |
if (!isSurrogatePair) {
|
|
135 |
badInputLength = 1;
|
|
136 |
throw new MalformedInputException();
|
|
137 |
}
|
|
138 |
}
|
|
139 |
else if (inputChar >= 0xdc00 && inputChar <= 0xdfff) {
|
|
140 |
badInputLength = 1;
|
|
141 |
throw new MalformedInputException();
|
|
142 |
}
|
|
143 |
|
|
144 |
if (index == Integer.MIN_VALUE || directMapTable[index] == NO_CHAR) {
|
|
145 |
if (subMode) {
|
|
146 |
if (byteOff + subBytes.length >= outEnd) {
|
|
147 |
throw new ConversionBufferFullException();
|
|
148 |
}
|
|
149 |
System.arraycopy(subBytes, 0, output, byteOff, subBytes.length);
|
|
150 |
byteOff += subBytes.length;
|
|
151 |
} else {
|
|
152 |
badInputLength = isSurrogatePair? 2 : 1;
|
|
153 |
throw new UnknownCharacterException();
|
|
154 |
}
|
|
155 |
}
|
|
156 |
else {
|
|
157 |
if(byteOff >= outEnd) {
|
|
158 |
throw new ConversionBufferFullException();
|
|
159 |
}
|
|
160 |
output[byteOff++] = directMapTable[index++];
|
|
161 |
if(directMapTable[index] != NO_CHAR) {
|
|
162 |
if(byteOff >= outEnd) {
|
|
163 |
throw new ConversionBufferFullException();
|
|
164 |
}
|
|
165 |
output[byteOff++] = directMapTable[index];
|
|
166 |
}
|
|
167 |
}
|
|
168 |
|
|
169 |
} //end for
|
|
170 |
|
|
171 |
return byteOff - outStart;
|
|
172 |
} //end of routine convert.
|
|
173 |
|
|
174 |
/**
|
|
175 |
* @see sun.io.CharToByteConverter#flush
|
|
176 |
*/
|
|
177 |
public int flush( byte[] output, int outStart, int outEnd )
|
|
178 |
throws MalformedInputException, ConversionBufferFullException {
|
|
179 |
byteOff = charOff = 0;
|
|
180 |
return 0;
|
|
181 |
}//flush()
|
|
182 |
/**
|
|
183 |
* @return The character encoding as a String.
|
|
184 |
*/
|
|
185 |
public String getCharacterEncoding() {
|
|
186 |
return "ISCII91";
|
|
187 |
}//getCharacterEncoding
|
|
188 |
/**
|
|
189 |
* @see sun.io.CharToByteConverter#getMaxBytesPerChar
|
|
190 |
*/
|
|
191 |
public int getMaxBytesPerChar() {
|
|
192 |
return 2;
|
|
193 |
}//getMaxBytesPerChar()
|
|
194 |
/**
|
|
195 |
* @see sun.io.CharToByteConverter#reset
|
|
196 |
*/
|
|
197 |
public void reset() {
|
|
198 |
byteOff = charOff = 0;
|
|
199 |
}
|
|
200 |
} //end of class definition
|