jdk/src/java.base/share/classes/sun/misc/CharacterDecoder.java
changeset 34763 138d9e3f9da7
parent 34762 d68b7daca533
parent 34748 3b2cde99bd99
child 34764 f9bcdce2df26
equal deleted inserted replaced
34762:d68b7daca533 34763:138d9e3f9da7
     1 /*
       
     2  * Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package sun.misc;
       
    27 
       
    28 import java.io.OutputStream;
       
    29 import java.io.ByteArrayOutputStream;
       
    30 import java.io.InputStream;
       
    31 import java.io.PushbackInputStream;
       
    32 import java.io.ByteArrayInputStream;
       
    33 import java.io.IOException;
       
    34 import java.nio.ByteBuffer;
       
    35 
       
    36 /**
       
    37  * This class defines the decoding half of character encoders.
       
    38  * A character decoder is an algorithim for transforming 8 bit
       
    39  * binary data that has been encoded into text by a character
       
    40  * encoder, back into original binary form.
       
    41  *
       
    42  * The character encoders, in general, have been structured
       
    43  * around a central theme that binary data can be encoded into
       
    44  * text that has the form:
       
    45  *
       
    46  * <pre>
       
    47  *      [Buffer Prefix]
       
    48  *      [Line Prefix][encoded data atoms][Line Suffix]
       
    49  *      [Buffer Suffix]
       
    50  * </pre>
       
    51  *
       
    52  * Of course in the simplest encoding schemes, the buffer has no
       
    53  * distinct prefix of suffix, however all have some fixed relationship
       
    54  * between the text in an 'atom' and the binary data itself.
       
    55  *
       
    56  * In the CharacterEncoder and CharacterDecoder classes, one complete
       
    57  * chunk of data is referred to as a <i>buffer</i>. Encoded buffers
       
    58  * are all text, and decoded buffers (sometimes just referred to as
       
    59  * buffers) are binary octets.
       
    60  *
       
    61  * To create a custom decoder, you must, at a minimum,  overide three
       
    62  * abstract methods in this class.
       
    63  * <DL>
       
    64  * <DD>bytesPerAtom which tells the decoder how many bytes to
       
    65  * expect from decodeAtom
       
    66  * <DD>decodeAtom which decodes the bytes sent to it as text.
       
    67  * <DD>bytesPerLine which tells the encoder the maximum number of
       
    68  * bytes per line.
       
    69  * </DL>
       
    70  *
       
    71  * In general, the character decoders return error in the form of a
       
    72  * CEFormatException. The syntax of the detail string is
       
    73  * <pre>
       
    74  *      DecoderClassName: Error message.
       
    75  * </pre>
       
    76  *
       
    77  * Several useful decoders have already been written and are
       
    78  * referenced in the See Also list below.
       
    79  *
       
    80  * @author      Chuck McManis
       
    81  * @see         CEFormatException
       
    82  * @see         CharacterEncoder
       
    83  * @see         UCDecoder
       
    84  * @see         UUDecoder
       
    85  * @see         BASE64Decoder
       
    86  */
       
    87 
       
    88 public abstract class CharacterDecoder {
       
    89 
       
    90     /** Return the number of bytes per atom of decoding */
       
    91     protected abstract int bytesPerAtom();
       
    92 
       
    93     /** Return the maximum number of bytes that can be encoded per line */
       
    94     protected abstract int bytesPerLine();
       
    95 
       
    96     /** decode the beginning of the buffer, by default this is a NOP. */
       
    97     protected void decodeBufferPrefix(PushbackInputStream aStream, OutputStream bStream) throws IOException { }
       
    98 
       
    99     /** decode the buffer suffix, again by default it is a NOP. */
       
   100     protected void decodeBufferSuffix(PushbackInputStream aStream, OutputStream bStream) throws IOException { }
       
   101 
       
   102     /**
       
   103      * This method should return, if it knows, the number of bytes
       
   104      * that will be decoded. Many formats such as uuencoding provide
       
   105      * this information. By default we return the maximum bytes that
       
   106      * could have been encoded on the line.
       
   107      */
       
   108     protected int decodeLinePrefix(PushbackInputStream aStream, OutputStream bStream) throws IOException {
       
   109         return (bytesPerLine());
       
   110     }
       
   111 
       
   112     /**
       
   113      * This method post processes the line, if there are error detection
       
   114      * or correction codes in a line, they are generally processed by
       
   115      * this method. The simplest version of this method looks for the
       
   116      * (newline) character.
       
   117      */
       
   118     protected void decodeLineSuffix(PushbackInputStream aStream, OutputStream bStream) throws IOException { }
       
   119 
       
   120     /**
       
   121      * This method does an actual decode. It takes the decoded bytes and
       
   122      * writes them to the OutputStream. The integer <i>l</i> tells the
       
   123      * method how many bytes are required. This is always {@literal <=} bytesPerAtom().
       
   124      */
       
   125     protected void decodeAtom(PushbackInputStream aStream, OutputStream bStream, int l) throws IOException {
       
   126         throw new CEStreamExhausted();
       
   127     }
       
   128 
       
   129     /**
       
   130      * This method works around the bizarre semantics of BufferedInputStream's
       
   131      * read method.
       
   132      */
       
   133     protected int readFully(InputStream in, byte buffer[], int offset, int len)
       
   134         throws java.io.IOException {
       
   135         for (int i = 0; i < len; i++) {
       
   136             int q = in.read();
       
   137             if (q == -1)
       
   138                 return ((i == 0) ? -1 : i);
       
   139             buffer[i+offset] = (byte)q;
       
   140         }
       
   141         return len;
       
   142     }
       
   143 
       
   144     /**
       
   145      * Decode the text from the InputStream and write the decoded
       
   146      * octets to the OutputStream. This method runs until the stream
       
   147      * is exhausted.
       
   148      * @exception CEFormatException An error has occurred while decoding
       
   149      * @exception CEStreamExhausted The input stream is unexpectedly out of data
       
   150      */
       
   151     public void decodeBuffer(InputStream aStream, OutputStream bStream) throws IOException {
       
   152         int     i;
       
   153         int     totalBytes = 0;
       
   154 
       
   155         PushbackInputStream ps = new PushbackInputStream (aStream);
       
   156         decodeBufferPrefix(ps, bStream);
       
   157         while (true) {
       
   158             int length;
       
   159 
       
   160             try {
       
   161                 length = decodeLinePrefix(ps, bStream);
       
   162                 for (i = 0; (i+bytesPerAtom()) < length; i += bytesPerAtom()) {
       
   163                     decodeAtom(ps, bStream, bytesPerAtom());
       
   164                     totalBytes += bytesPerAtom();
       
   165                 }
       
   166                 if ((i + bytesPerAtom()) == length) {
       
   167                     decodeAtom(ps, bStream, bytesPerAtom());
       
   168                     totalBytes += bytesPerAtom();
       
   169                 } else {
       
   170                     decodeAtom(ps, bStream, length - i);
       
   171                     totalBytes += (length - i);
       
   172                 }
       
   173                 decodeLineSuffix(ps, bStream);
       
   174             } catch (CEStreamExhausted e) {
       
   175                 break;
       
   176             }
       
   177         }
       
   178         decodeBufferSuffix(ps, bStream);
       
   179     }
       
   180 
       
   181     /**
       
   182      * Alternate decode interface that takes a String containing the encoded
       
   183      * buffer and returns a byte array containing the data.
       
   184      * @exception CEFormatException An error has occurred while decoding
       
   185      */
       
   186     public byte[] decodeBuffer(String inputString) throws IOException {
       
   187         byte inputBuffer[] = inputString.getBytes("ISO-8859-1");
       
   188         ByteArrayInputStream inStream = new ByteArrayInputStream(inputBuffer);
       
   189         ByteArrayOutputStream outStream = new ByteArrayOutputStream();
       
   190         decodeBuffer(inStream, outStream);
       
   191         return outStream.toByteArray();
       
   192     }
       
   193 
       
   194     /**
       
   195      * Decode the contents of the inputstream into a buffer.
       
   196      */
       
   197     public byte[] decodeBuffer(InputStream in) throws IOException {
       
   198         ByteArrayOutputStream outStream = new ByteArrayOutputStream();
       
   199         decodeBuffer(in, outStream);
       
   200         return outStream.toByteArray();
       
   201     }
       
   202 
       
   203     /**
       
   204      * Decode the contents of the String into a ByteBuffer.
       
   205      */
       
   206     public ByteBuffer decodeBufferToByteBuffer(String inputString)
       
   207         throws IOException {
       
   208         return ByteBuffer.wrap(decodeBuffer(inputString));
       
   209     }
       
   210 
       
   211     /**
       
   212      * Decode the contents of the inputStream into a ByteBuffer.
       
   213      */
       
   214     public ByteBuffer decodeBufferToByteBuffer(InputStream in)
       
   215         throws IOException {
       
   216         return ByteBuffer.wrap(decodeBuffer(in));
       
   217     }
       
   218 }