diff -r 345857eb0f3b -r d9d491a5a169 jdk/src/share/classes/sun/io/CharToByteCp970.java --- a/jdk/src/share/classes/sun/io/CharToByteCp970.java Wed May 20 13:55:50 2009 -0700 +++ b/jdk/src/share/classes/sun/io/CharToByteCp970.java Thu May 21 23:32:46 2009 -0700 @@ -24,419 +24,17 @@ */ package sun.io; -import sun.nio.cs.ext.IBM970; - -/** -* @author Malcolm Ayres -*/ - -public class CharToByteCp970 extends CharToByteConverter -{ - private static final char SBase = '\uAC00'; - private static final char LBase = '\u1100'; - private static final char VBase = '\u1161'; - private static final char TBase = '\u11A7'; - private static final int VCount = 21; - private static final int TCount = 28; - private static final byte G0 = 0; - private static final byte G1 = 1; - private static final byte G2 = 2; - private static final byte G3 = 3; - private byte charState = G0; - private char l, v, t; - - private byte[] outputByte; - - private char highHalfZoneCode; - private int mask1; - private int mask2; - private int shift; - - private short[] index1; - private String index2; - private String index2a; - - private final static IBM970 nioCoder = new IBM970(); - - public CharToByteCp970() { - super(); - highHalfZoneCode = 0; - outputByte = new byte[2]; - mask1 = 0xFFF8; - mask2 = 0x0007; - shift = 3; - index1 = nioCoder.getEncoderIndex1(); - index2 = nioCoder.getEncoderIndex2(); - index2a = nioCoder.getEncoderIndex2a(); - } - - /** - * flush out any residual data and reset the buffer state - */ - public int flush(byte[] output, int outStart, int outEnd) - throws MalformedInputException, - ConversionBufferFullException - { - int bytesOut; +import sun.nio.cs.ext.*; - byteOff = outStart; - - if (highHalfZoneCode != 0) { - reset(); - badInputLength = 0; - throw new MalformedInputException(); - } - - if (charState != G0) { - try { - unicodeToBuffer(composeHangul() ,output, outEnd); - } - catch(UnknownCharacterException e) { - reset(); - badInputLength = 0; - throw new MalformedInputException(); - } - charState = G0; - } - - bytesOut = byteOff - outStart; - - reset(); - return bytesOut; - } +// EUC_Simple is the same as DBCS_ASCII +public class CharToByteCp970 extends CharToByteDBCS_ASCII { - /** - * Resets converter to its initial state. - */ - public void reset() { - highHalfZoneCode = 0; - charState = G0; - charOff = byteOff = 0; - } - - /** - * Returns true if the given character can be converted to the - * target character encoding. - */ - public boolean canConvert(char ch) { - int index; - int theBytes; - - index = index1[((ch & mask1) >> shift)] + (ch & mask2); - if (index < 15000) - theBytes = (int)(index2.charAt(index)); - else - theBytes = (int)(index2a.charAt(index-15000)); - - if (theBytes != 0) - return (true); - - // only return true if input char was unicode null - all others are - // undefined - return( ch == '\u0000'); + // Return the character set id + public String getCharacterEncoding() { + return "Cp970"; } - /** - * Character conversion - */ - - public int convert(char[] input, int inOff, int inEnd, - byte[] output, int outOff, int outEnd) - throws UnknownCharacterException, MalformedInputException, - ConversionBufferFullException - { - char inputChar; - int inputSize; - - charOff = inOff; - byteOff = outOff; - - while (charOff < inEnd) { - - if (highHalfZoneCode == 0) { - inputChar = input[charOff]; - inputSize = 1; - } else { - inputChar = highHalfZoneCode; - inputSize = 0; - highHalfZoneCode = 0; - } - - switch (charState) { - case G0: - - l = LBase; - v = VBase; - t = TBase; - - if ( isLeadingC(inputChar) ) { // Leading Consonant - l = inputChar; - charState = G1; - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = inputChar; - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - break; - - case G1: - if ( isLeadingC(inputChar) ) { // Leading Consonant - l = composeLL(l, inputChar); - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = inputChar; - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - break; - - case G2: - if ( isLeadingC(inputChar) ) { // Leading Consonant - - unicodeToBuffer(composeHangul(), output, outEnd); - - l = inputChar; - v = VBase; - t = TBase; - charState = G1; - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = composeVV(l, inputChar); - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - - break; - - case G3: - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = composeTT(t, inputChar); - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - - break; - } - - if (charState != G0) - charOff++; - else { - - // Is this a high surrogate? - if(inputChar >= '\ud800' && inputChar <= '\udbff') { - // Is this the last character of the input? - if (charOff + inputSize >= inEnd) { - highHalfZoneCode = inputChar; - charOff += inputSize; - break; - } - - // Is there a low surrogate following? - inputChar = input[charOff + inputSize]; - if (inputChar >= '\udc00' && inputChar <= '\udfff') { - // We have a valid surrogate pair. Too bad we don't do - // surrogates. Is substitution enabled? - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; - } else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; - } - - bytesToBuffer(outputByte, output, outEnd); - inputSize++; - } else { - badInputLength = 2; - throw new UnknownCharacterException(); - } - } else { - // We have a malformed surrogate pair - badInputLength = 1; - throw new MalformedInputException(); - } - } - - // Is this an unaccompanied low surrogate? - else - if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { - badInputLength = 1; - throw new MalformedInputException(); - } else { - unicodeToBuffer(inputChar, output, outEnd); - } - - charOff += inputSize; - - } - - } - - return byteOff - outOff; - + public CharToByteCp970() { + super((DoubleByte.Encoder)new IBM970().newEncoder()); } - - private char composeHangul() { - int lIndex, vIndex, tIndex; - - lIndex = l - LBase; - vIndex = v - VBase; - tIndex = t - TBase; - - return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase); - } - - private char composeLL(char l1, char l2) { - return l2; - } - - private char composeVV(char v1, char v2) { - return v2; - } - - private char composeTT(char t1, char t2) { - return t2; - } - - private boolean isLeadingC(char c) { - return (c >= LBase && c <= '\u1159'); - } - - private boolean isVowel(char c) { - return (c >= VBase && c <= '\u11a2'); - } - - private boolean isTrailingC(char c) { - return (c >= TBase && c <= '\u11f9'); - } - - /** - * returns the maximum number of bytes needed to convert a char - */ - public int getMaxBytesPerChar() { - return 2; - } - - - /** - * Return the character set ID - */ - public String getCharacterEncoding() { - return "Cp970"; - } - - /** - * private function to add the bytes to the output buffer - */ - private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd) - throws ConversionBufferFullException, - UnknownCharacterException { - - int spaceNeeded; - - // ensure sufficient space for the bytes(s) - - if (theBytes[0] == 0x00) - spaceNeeded = 1; - else - spaceNeeded = 2; - - if (byteOff + spaceNeeded > outEnd) - throw new ConversionBufferFullException(); - - // move the data into the buffer - - if (spaceNeeded == 1) - output[byteOff++] = theBytes[1]; - else { - output[byteOff++] = theBytes[0]; - output[byteOff++] = theBytes[1]; - } - - } - - /** - * private function to add a unicode character to the output buffer - */ - private void unicodeToBuffer(char unicode, byte[] output, int outEnd) - throws ConversionBufferFullException, - UnknownCharacterException { - - int index; - int theBytes; - - // first we convert the unicode to its byte representation - - index = index1[((unicode & mask1) >> shift)] + (unicode & mask2); - if (index < 15000) { - theBytes = (int)(index2.charAt(index)); - } else { - theBytes = (int)(index2a.charAt(index-15000)); - } - outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8); - outputByte[1] = (byte)(theBytes & 0x000000ff); - - // if the unicode was not mappable - look for the substitution bytes - - if (outputByte[0] == 0x00 && outputByte[1] == 0x00 - && unicode != '\u0000') { - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; - } else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; - } - } else { - badInputLength = 1; - throw new UnknownCharacterException(); - } - } - - // now put the bytes in the buffer - - bytesToBuffer(outputByte, output, outEnd); - - } - }