jdk/src/jdk.charsets/unix/classes/sun/nio/cs/ext/COMPOUND_TEXT_Encoder.java
changeset 30456 2a753e3fc714
parent 30455 17adc050898b
child 30457 8934b8474110
equal deleted inserted replaced
30455:17adc050898b 30456:2a753e3fc714
     1 /*
       
     2  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 package sun.nio.cs.ext;
       
    26 
       
    27 import java.io.ByteArrayOutputStream;
       
    28 import java.nio.ByteBuffer;
       
    29 import java.nio.CharBuffer;
       
    30 import java.nio.charset.*;
       
    31 
       
    32 import java.util.Collections;
       
    33 import java.util.HashMap;
       
    34 import java.util.Iterator;
       
    35 import java.util.List;
       
    36 import java.util.Map;
       
    37 
       
    38 public class COMPOUND_TEXT_Encoder extends CharsetEncoder {
       
    39 
       
    40     /**
       
    41      * NOTE: The following four static variables should be used *only* for
       
    42      * testing whether a encoder can encode a specific character. They
       
    43      * cannot be used for actual encoding because they are shared across all
       
    44      * COMPOUND_TEXT encoders and may be stateful.
       
    45      */
       
    46     private static final Map<String,CharsetEncoder> encodingToEncoderMap =
       
    47       Collections.synchronizedMap(new HashMap<String,CharsetEncoder>(21, 1.0f));
       
    48     private static final CharsetEncoder latin1Encoder;
       
    49     private static final CharsetEncoder defaultEncoder;
       
    50     private static final boolean defaultEncodingSupported;
       
    51 
       
    52     static {
       
    53         CharsetEncoder encoder = Charset.defaultCharset().newEncoder();
       
    54         String encoding = encoder.charset().name();
       
    55         if ("ISO8859_1".equals(encoding)) {
       
    56             latin1Encoder = encoder;
       
    57             defaultEncoder = encoder;
       
    58             defaultEncodingSupported = true;
       
    59         } else {
       
    60             try {
       
    61                 latin1Encoder =
       
    62                     Charset.forName("ISO8859_1").newEncoder();
       
    63             } catch (IllegalArgumentException e) {
       
    64                 throw new ExceptionInInitializerError
       
    65                     ("ISO8859_1 unsupported");
       
    66             }
       
    67             defaultEncoder = encoder;
       
    68             defaultEncodingSupported = CompoundTextSupport.getEncodings().
       
    69                 contains(defaultEncoder.charset().name());
       
    70         }
       
    71     }
       
    72 
       
    73     private CharsetEncoder encoder;
       
    74     private char[] charBuf = new char[1];
       
    75     private CharBuffer charbuf = CharBuffer.wrap(charBuf);
       
    76     private ByteArrayOutputStream nonStandardCharsetBuffer;
       
    77     private byte[] byteBuf;
       
    78     private ByteBuffer bytebuf;
       
    79     private int numNonStandardChars, nonStandardEncodingLen;
       
    80 
       
    81     public COMPOUND_TEXT_Encoder(Charset cs) {
       
    82         super(cs,
       
    83               (float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2),
       
    84               (float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2));
       
    85         try {
       
    86             encoder = Charset.forName("ISO8859_1").newEncoder();
       
    87         } catch (IllegalArgumentException cannotHappen) {}
       
    88         initEncoder(encoder);
       
    89     }
       
    90 
       
    91     protected CoderResult encodeLoop(CharBuffer src, ByteBuffer des) {
       
    92         CoderResult cr = CoderResult.UNDERFLOW;
       
    93         char[] input = src.array();
       
    94         int inOff = src.arrayOffset() + src.position();
       
    95         int inEnd = src.arrayOffset() + src.limit();
       
    96 
       
    97         try {
       
    98             while (inOff < inEnd && cr.isUnderflow()) {
       
    99                 charBuf[0] = input[inOff];
       
   100                 if (charBuf[0] <= '\u0008' ||
       
   101                     (charBuf[0] >= '\u000B' && charBuf[0] <= '\u001F') ||
       
   102                     (charBuf[0] >= '\u0080' && charBuf[0] <= '\u009F')) {
       
   103                     // The compound text specification only permits the octets
       
   104                     // 0x09, 0x0A, 0x1B, and 0x9B in C0 and C1. Of these, 1B and
       
   105                     // 9B must also be removed because they initiate control
       
   106                     // sequences.
       
   107                     charBuf[0] = '?';
       
   108                 }
       
   109 
       
   110                 CharsetEncoder enc = getEncoder(charBuf[0]);
       
   111                 //System.out.println("char=" + charBuf[0] + ", enc=" + enc);
       
   112                 if (enc == null) {
       
   113                     if (unmappableCharacterAction()
       
   114                         == CodingErrorAction.REPORT) {
       
   115                         charBuf[0] = '?';
       
   116                         enc = latin1Encoder;
       
   117                     } else {
       
   118                         return CoderResult.unmappableForLength(1);
       
   119                     }
       
   120                 }
       
   121                 if (enc != encoder) {
       
   122                     if (nonStandardCharsetBuffer != null) {
       
   123                         cr = flushNonStandardCharsetBuffer(des);
       
   124                     } else {
       
   125                         //cr= encoder.flush(des);
       
   126                         flushEncoder(encoder, des);
       
   127                     }
       
   128                     if (!cr.isUnderflow())
       
   129                         return cr;
       
   130                     byte[] escSequence = CompoundTextSupport.
       
   131                         getEscapeSequence(enc.charset().name());
       
   132                     if (escSequence == null) {
       
   133                         throw new InternalError("Unknown encoding: " +
       
   134                                                 enc.charset().name());
       
   135                     } else if (escSequence[1] == (byte)0x25 &&
       
   136                                escSequence[2] == (byte)0x2F) {
       
   137                         initNonStandardCharsetBuffer(enc, escSequence);
       
   138                     } else if (des.remaining() >= escSequence.length) {
       
   139                         des.put(escSequence, 0, escSequence.length);
       
   140                     } else {
       
   141                         return CoderResult.OVERFLOW;
       
   142                     }
       
   143                     encoder = enc;
       
   144                     continue;
       
   145                 }
       
   146                 charbuf.rewind();
       
   147                 if (nonStandardCharsetBuffer == null) {
       
   148                     cr = encoder.encode(charbuf, des, false);
       
   149                 } else {
       
   150                     bytebuf.clear();
       
   151                     cr = encoder.encode(charbuf, bytebuf, false);
       
   152                     bytebuf.flip();
       
   153                     nonStandardCharsetBuffer.write(byteBuf,
       
   154                                                    0, bytebuf.limit());
       
   155                     numNonStandardChars++;
       
   156                 }
       
   157                 inOff++;
       
   158             }
       
   159             return cr;
       
   160         } finally {
       
   161             src.position(inOff - src.arrayOffset());
       
   162         }
       
   163     }
       
   164 
       
   165     protected CoderResult implFlush(ByteBuffer out) {
       
   166         CoderResult cr = (nonStandardCharsetBuffer != null)
       
   167             ? flushNonStandardCharsetBuffer(out)
       
   168             //: encoder.flush(out);
       
   169             : flushEncoder(encoder, out);
       
   170         reset();
       
   171         return cr;
       
   172     }
       
   173 
       
   174     private void initNonStandardCharsetBuffer(CharsetEncoder c,
       
   175                                               byte[] escSequence)
       
   176     {
       
   177         nonStandardCharsetBuffer = new ByteArrayOutputStream();
       
   178         byteBuf = new byte[(int)c.maxBytesPerChar()];
       
   179         bytebuf = ByteBuffer.wrap(byteBuf);
       
   180         nonStandardCharsetBuffer.write(escSequence, 0, escSequence.length);
       
   181         nonStandardCharsetBuffer.write(0); // M placeholder
       
   182         nonStandardCharsetBuffer.write(0); // L placeholder
       
   183         byte[] encoding = CompoundTextSupport.
       
   184             getEncoding(c.charset().name());
       
   185         if (encoding == null) {
       
   186             throw new InternalError
       
   187                 ("Unknown encoding: " + encoder.charset().name());
       
   188         }
       
   189         nonStandardCharsetBuffer.write(encoding, 0, encoding.length);
       
   190         nonStandardCharsetBuffer.write(0x02); // divider
       
   191         nonStandardEncodingLen = encoding.length + 1;
       
   192     }
       
   193 
       
   194     private CoderResult flushNonStandardCharsetBuffer(ByteBuffer out) {
       
   195         if (numNonStandardChars > 0) {
       
   196             byte[] flushBuf = new byte[(int)encoder.maxBytesPerChar() *
       
   197                                        numNonStandardChars];
       
   198             ByteBuffer bb = ByteBuffer.wrap(flushBuf);
       
   199             flushEncoder(encoder, bb);
       
   200             bb.flip();
       
   201             nonStandardCharsetBuffer.write(flushBuf, 0, bb.limit());
       
   202             numNonStandardChars = 0;
       
   203         }
       
   204 
       
   205         int numBytes = nonStandardCharsetBuffer.size();
       
   206         int nonStandardBytesOff = 6 + nonStandardEncodingLen;
       
   207 
       
   208         if (out.remaining() < (numBytes - nonStandardBytesOff) +
       
   209             nonStandardBytesOff * (((numBytes - nonStandardBytesOff) /
       
   210                                     ((1 << 14) - 1)) + 1))
       
   211         {
       
   212             return CoderResult.OVERFLOW;
       
   213         }
       
   214 
       
   215         byte[] nonStandardBytes =
       
   216             nonStandardCharsetBuffer.toByteArray();
       
   217 
       
   218         // The non-standard charset header only supports 2^14-1 bytes of data.
       
   219         // If we have more than that, we have to repeat the header.
       
   220         do {
       
   221             out.put((byte)0x1B);
       
   222             out.put((byte)0x25);
       
   223             out.put((byte)0x2F);
       
   224             out.put(nonStandardBytes[3]);
       
   225 
       
   226             int toWrite = Math.min(numBytes - nonStandardBytesOff,
       
   227                                    (1 << 14) - 1 - nonStandardEncodingLen);
       
   228 
       
   229             out.put((byte)
       
   230                 (((toWrite + nonStandardEncodingLen) / 0x80) | 0x80)); // M
       
   231             out.put((byte)
       
   232                 (((toWrite + nonStandardEncodingLen) % 0x80) | 0x80)); // L
       
   233             out.put(nonStandardBytes, 6, nonStandardEncodingLen);
       
   234             out.put(nonStandardBytes, nonStandardBytesOff, toWrite);
       
   235             nonStandardBytesOff += toWrite;
       
   236         } while (nonStandardBytesOff < numBytes);
       
   237 
       
   238         nonStandardCharsetBuffer = null;
       
   239         byteBuf = null;
       
   240         nonStandardEncodingLen = 0;
       
   241         return CoderResult.UNDERFLOW;
       
   242     }
       
   243 
       
   244     /**
       
   245      * Resets the encoder.
       
   246      * Call this method to reset the encoder to its initial state
       
   247      */
       
   248     protected void implReset() {
       
   249         numNonStandardChars = nonStandardEncodingLen = 0;
       
   250         nonStandardCharsetBuffer = null;
       
   251         byteBuf = null;
       
   252         try {
       
   253             encoder = Charset.forName("ISO8859_1").newEncoder();
       
   254         } catch (IllegalArgumentException cannotHappen) {
       
   255         }
       
   256         initEncoder(encoder);
       
   257     }
       
   258 
       
   259     /**
       
   260      * Return whether a character is mappable or not
       
   261      * @return true if a character is mappable
       
   262      */
       
   263     public boolean canEncode(char ch) {
       
   264         return getEncoder(ch) != null;
       
   265     }
       
   266 
       
   267     protected void implOnMalformedInput(CodingErrorAction newAction) {
       
   268         encoder.onUnmappableCharacter(newAction);
       
   269     }
       
   270 
       
   271     protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
       
   272         encoder.onUnmappableCharacter(newAction);
       
   273     }
       
   274 
       
   275     protected void implReplaceWith(byte[] newReplacement) {
       
   276         if (encoder != null)
       
   277             encoder.replaceWith(newReplacement);
       
   278     }
       
   279 
       
   280     /**
       
   281      * Try to figure out which CharsetEncoder to use for conversion
       
   282      * of the specified Unicode character. The target character encoding
       
   283      * of the returned encoder is approved to be used with Compound Text.
       
   284      *
       
   285      * @param ch Unicode character
       
   286      * @return CharsetEncoder to convert the given character
       
   287      */
       
   288     private CharsetEncoder getEncoder(char ch) {
       
   289         // 1. Try the current encoder.
       
   290         if (encoder.canEncode(ch)) {
       
   291             return encoder;
       
   292         }
       
   293 
       
   294         // 2. Try the default encoder.
       
   295         if (defaultEncodingSupported && defaultEncoder.canEncode(ch)) {
       
   296             CharsetEncoder retval = null;
       
   297             try {
       
   298                 retval = defaultEncoder.charset().newEncoder();
       
   299             } catch (UnsupportedOperationException cannotHappen) {
       
   300             }
       
   301             initEncoder(retval);
       
   302             return retval;
       
   303         }
       
   304 
       
   305         // 3. Try ISO8859-1.
       
   306         if (latin1Encoder.canEncode(ch)) {
       
   307             CharsetEncoder retval = null;
       
   308             try {
       
   309                 retval = latin1Encoder.charset().newEncoder();
       
   310             } catch (UnsupportedOperationException cannotHappen) {}
       
   311             initEncoder(retval);
       
   312             return retval;
       
   313         }
       
   314 
       
   315         // 4. Brute force search of all supported encodings.
       
   316         for (String encoding : CompoundTextSupport.getEncodings())
       
   317         {
       
   318             CharsetEncoder enc = encodingToEncoderMap.get(encoding);
       
   319             if (enc == null) {
       
   320                 enc = CompoundTextSupport.getEncoder(encoding);
       
   321                 if (enc == null) {
       
   322                     throw new InternalError("Unsupported encoding: " +
       
   323                                             encoding);
       
   324                 }
       
   325                 encodingToEncoderMap.put(encoding, enc);
       
   326             }
       
   327             if (enc.canEncode(ch)) {
       
   328                 CharsetEncoder retval = CompoundTextSupport.getEncoder(encoding);
       
   329                 initEncoder(retval);
       
   330                 return retval;
       
   331             }
       
   332         }
       
   333 
       
   334         return null;
       
   335     }
       
   336 
       
   337     private void initEncoder(CharsetEncoder enc) {
       
   338         try {
       
   339             enc.onUnmappableCharacter(CodingErrorAction.REPLACE)
       
   340                 .replaceWith(replacement());
       
   341         } catch (IllegalArgumentException x) {}
       
   342     }
       
   343 
       
   344     private CharBuffer fcb= CharBuffer.allocate(0);
       
   345     private CoderResult flushEncoder(CharsetEncoder enc, ByteBuffer bb) {
       
   346         enc.encode(fcb, bb, true);
       
   347         return enc.flush(bb);
       
   348     }
       
   349 }