diff -r 17adc050898b -r 2a753e3fc714 jdk/src/jdk.charsets/unix/classes/sun/nio/cs/ext/COMPOUND_TEXT_Encoder.java --- a/jdk/src/jdk.charsets/unix/classes/sun/nio/cs/ext/COMPOUND_TEXT_Encoder.java Mon Apr 13 19:39:40 2015 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,349 +0,0 @@ -/* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ -package sun.nio.cs.ext; - -import java.io.ByteArrayOutputStream; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.*; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -public class COMPOUND_TEXT_Encoder extends CharsetEncoder { - - /** - * NOTE: The following four static variables should be used *only* for - * testing whether a encoder can encode a specific character. They - * cannot be used for actual encoding because they are shared across all - * COMPOUND_TEXT encoders and may be stateful. - */ - private static final Map encodingToEncoderMap = - Collections.synchronizedMap(new HashMap(21, 1.0f)); - private static final CharsetEncoder latin1Encoder; - private static final CharsetEncoder defaultEncoder; - private static final boolean defaultEncodingSupported; - - static { - CharsetEncoder encoder = Charset.defaultCharset().newEncoder(); - String encoding = encoder.charset().name(); - if ("ISO8859_1".equals(encoding)) { - latin1Encoder = encoder; - defaultEncoder = encoder; - defaultEncodingSupported = true; - } else { - try { - latin1Encoder = - Charset.forName("ISO8859_1").newEncoder(); - } catch (IllegalArgumentException e) { - throw new ExceptionInInitializerError - ("ISO8859_1 unsupported"); - } - defaultEncoder = encoder; - defaultEncodingSupported = CompoundTextSupport.getEncodings(). - contains(defaultEncoder.charset().name()); - } - } - - private CharsetEncoder encoder; - private char[] charBuf = new char[1]; - private CharBuffer charbuf = CharBuffer.wrap(charBuf); - private ByteArrayOutputStream nonStandardCharsetBuffer; - private byte[] byteBuf; - private ByteBuffer bytebuf; - private int numNonStandardChars, nonStandardEncodingLen; - - public COMPOUND_TEXT_Encoder(Charset cs) { - super(cs, - (float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2), - (float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2)); - try { - encoder = Charset.forName("ISO8859_1").newEncoder(); - } catch (IllegalArgumentException cannotHappen) {} - initEncoder(encoder); - } - - protected CoderResult encodeLoop(CharBuffer src, ByteBuffer des) { - CoderResult cr = CoderResult.UNDERFLOW; - char[] input = src.array(); - int inOff = src.arrayOffset() + src.position(); - int inEnd = src.arrayOffset() + src.limit(); - - try { - while (inOff < inEnd && cr.isUnderflow()) { - charBuf[0] = input[inOff]; - if (charBuf[0] <= '\u0008' || - (charBuf[0] >= '\u000B' && charBuf[0] <= '\u001F') || - (charBuf[0] >= '\u0080' && charBuf[0] <= '\u009F')) { - // The compound text specification only permits the octets - // 0x09, 0x0A, 0x1B, and 0x9B in C0 and C1. Of these, 1B and - // 9B must also be removed because they initiate control - // sequences. - charBuf[0] = '?'; - } - - CharsetEncoder enc = getEncoder(charBuf[0]); - //System.out.println("char=" + charBuf[0] + ", enc=" + enc); - if (enc == null) { - if (unmappableCharacterAction() - == CodingErrorAction.REPORT) { - charBuf[0] = '?'; - enc = latin1Encoder; - } else { - return CoderResult.unmappableForLength(1); - } - } - if (enc != encoder) { - if (nonStandardCharsetBuffer != null) { - cr = flushNonStandardCharsetBuffer(des); - } else { - //cr= encoder.flush(des); - flushEncoder(encoder, des); - } - if (!cr.isUnderflow()) - return cr; - byte[] escSequence = CompoundTextSupport. - getEscapeSequence(enc.charset().name()); - if (escSequence == null) { - throw new InternalError("Unknown encoding: " + - enc.charset().name()); - } else if (escSequence[1] == (byte)0x25 && - escSequence[2] == (byte)0x2F) { - initNonStandardCharsetBuffer(enc, escSequence); - } else if (des.remaining() >= escSequence.length) { - des.put(escSequence, 0, escSequence.length); - } else { - return CoderResult.OVERFLOW; - } - encoder = enc; - continue; - } - charbuf.rewind(); - if (nonStandardCharsetBuffer == null) { - cr = encoder.encode(charbuf, des, false); - } else { - bytebuf.clear(); - cr = encoder.encode(charbuf, bytebuf, false); - bytebuf.flip(); - nonStandardCharsetBuffer.write(byteBuf, - 0, bytebuf.limit()); - numNonStandardChars++; - } - inOff++; - } - return cr; - } finally { - src.position(inOff - src.arrayOffset()); - } - } - - protected CoderResult implFlush(ByteBuffer out) { - CoderResult cr = (nonStandardCharsetBuffer != null) - ? flushNonStandardCharsetBuffer(out) - //: encoder.flush(out); - : flushEncoder(encoder, out); - reset(); - return cr; - } - - private void initNonStandardCharsetBuffer(CharsetEncoder c, - byte[] escSequence) - { - nonStandardCharsetBuffer = new ByteArrayOutputStream(); - byteBuf = new byte[(int)c.maxBytesPerChar()]; - bytebuf = ByteBuffer.wrap(byteBuf); - nonStandardCharsetBuffer.write(escSequence, 0, escSequence.length); - nonStandardCharsetBuffer.write(0); // M placeholder - nonStandardCharsetBuffer.write(0); // L placeholder - byte[] encoding = CompoundTextSupport. - getEncoding(c.charset().name()); - if (encoding == null) { - throw new InternalError - ("Unknown encoding: " + encoder.charset().name()); - } - nonStandardCharsetBuffer.write(encoding, 0, encoding.length); - nonStandardCharsetBuffer.write(0x02); // divider - nonStandardEncodingLen = encoding.length + 1; - } - - private CoderResult flushNonStandardCharsetBuffer(ByteBuffer out) { - if (numNonStandardChars > 0) { - byte[] flushBuf = new byte[(int)encoder.maxBytesPerChar() * - numNonStandardChars]; - ByteBuffer bb = ByteBuffer.wrap(flushBuf); - flushEncoder(encoder, bb); - bb.flip(); - nonStandardCharsetBuffer.write(flushBuf, 0, bb.limit()); - numNonStandardChars = 0; - } - - int numBytes = nonStandardCharsetBuffer.size(); - int nonStandardBytesOff = 6 + nonStandardEncodingLen; - - if (out.remaining() < (numBytes - nonStandardBytesOff) + - nonStandardBytesOff * (((numBytes - nonStandardBytesOff) / - ((1 << 14) - 1)) + 1)) - { - return CoderResult.OVERFLOW; - } - - byte[] nonStandardBytes = - nonStandardCharsetBuffer.toByteArray(); - - // The non-standard charset header only supports 2^14-1 bytes of data. - // If we have more than that, we have to repeat the header. - do { - out.put((byte)0x1B); - out.put((byte)0x25); - out.put((byte)0x2F); - out.put(nonStandardBytes[3]); - - int toWrite = Math.min(numBytes - nonStandardBytesOff, - (1 << 14) - 1 - nonStandardEncodingLen); - - out.put((byte) - (((toWrite + nonStandardEncodingLen) / 0x80) | 0x80)); // M - out.put((byte) - (((toWrite + nonStandardEncodingLen) % 0x80) | 0x80)); // L - out.put(nonStandardBytes, 6, nonStandardEncodingLen); - out.put(nonStandardBytes, nonStandardBytesOff, toWrite); - nonStandardBytesOff += toWrite; - } while (nonStandardBytesOff < numBytes); - - nonStandardCharsetBuffer = null; - byteBuf = null; - nonStandardEncodingLen = 0; - return CoderResult.UNDERFLOW; - } - - /** - * Resets the encoder. - * Call this method to reset the encoder to its initial state - */ - protected void implReset() { - numNonStandardChars = nonStandardEncodingLen = 0; - nonStandardCharsetBuffer = null; - byteBuf = null; - try { - encoder = Charset.forName("ISO8859_1").newEncoder(); - } catch (IllegalArgumentException cannotHappen) { - } - initEncoder(encoder); - } - - /** - * Return whether a character is mappable or not - * @return true if a character is mappable - */ - public boolean canEncode(char ch) { - return getEncoder(ch) != null; - } - - protected void implOnMalformedInput(CodingErrorAction newAction) { - encoder.onUnmappableCharacter(newAction); - } - - protected void implOnUnmappableCharacter(CodingErrorAction newAction) { - encoder.onUnmappableCharacter(newAction); - } - - protected void implReplaceWith(byte[] newReplacement) { - if (encoder != null) - encoder.replaceWith(newReplacement); - } - - /** - * Try to figure out which CharsetEncoder to use for conversion - * of the specified Unicode character. The target character encoding - * of the returned encoder is approved to be used with Compound Text. - * - * @param ch Unicode character - * @return CharsetEncoder to convert the given character - */ - private CharsetEncoder getEncoder(char ch) { - // 1. Try the current encoder. - if (encoder.canEncode(ch)) { - return encoder; - } - - // 2. Try the default encoder. - if (defaultEncodingSupported && defaultEncoder.canEncode(ch)) { - CharsetEncoder retval = null; - try { - retval = defaultEncoder.charset().newEncoder(); - } catch (UnsupportedOperationException cannotHappen) { - } - initEncoder(retval); - return retval; - } - - // 3. Try ISO8859-1. - if (latin1Encoder.canEncode(ch)) { - CharsetEncoder retval = null; - try { - retval = latin1Encoder.charset().newEncoder(); - } catch (UnsupportedOperationException cannotHappen) {} - initEncoder(retval); - return retval; - } - - // 4. Brute force search of all supported encodings. - for (String encoding : CompoundTextSupport.getEncodings()) - { - CharsetEncoder enc = encodingToEncoderMap.get(encoding); - if (enc == null) { - enc = CompoundTextSupport.getEncoder(encoding); - if (enc == null) { - throw new InternalError("Unsupported encoding: " + - encoding); - } - encodingToEncoderMap.put(encoding, enc); - } - if (enc.canEncode(ch)) { - CharsetEncoder retval = CompoundTextSupport.getEncoder(encoding); - initEncoder(retval); - return retval; - } - } - - return null; - } - - private void initEncoder(CharsetEncoder enc) { - try { - enc.onUnmappableCharacter(CodingErrorAction.REPLACE) - .replaceWith(replacement()); - } catch (IllegalArgumentException x) {} - } - - private CharBuffer fcb= CharBuffer.allocate(0); - private CoderResult flushEncoder(CharsetEncoder enc, ByteBuffer bb) { - enc.encode(fcb, bb, true); - return enc.flush(bb); - } -}