jdk/src/java.base/share/classes/sun/misc/CharacterEncoder.java
changeset 25859 3317bb8137f4
parent 23742 c2b6216ef41d
child 30655 d83f50188ca9
equal deleted inserted replaced
25858:836adbf7a2cd 25859:3317bb8137f4
       
     1 /*
       
     2  * Copyright (c) 1995, 2005, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package sun.misc;
       
    27 
       
    28 import java.io.InputStream;
       
    29 import java.io.ByteArrayInputStream;
       
    30 import java.io.OutputStream;
       
    31 import java.io.ByteArrayOutputStream;
       
    32 import java.io.PrintStream;
       
    33 import java.io.IOException;
       
    34 import java.nio.ByteBuffer;
       
    35 
       
    36 
       
    37 /**
       
    38  * This class defines the encoding half of character encoders.
       
    39  * A character encoder is an algorithim for transforming 8 bit binary
       
    40  * data into text (generally 7 bit ASCII or 8 bit ISO-Latin-1 text)
       
    41  * for transmition over text channels such as e-mail and network news.
       
    42  *
       
    43  * The character encoders have been structured around a central theme
       
    44  * that, in general, the encoded text has the form:
       
    45  *
       
    46  * <pre>
       
    47  *      [Buffer Prefix]
       
    48  *      [Line Prefix][encoded data atoms][Line Suffix]
       
    49  *      [Buffer Suffix]
       
    50  * </pre>
       
    51  *
       
    52  * In the CharacterEncoder and CharacterDecoder classes, one complete
       
    53  * chunk of data is referred to as a <i>buffer</i>. Encoded buffers
       
    54  * are all text, and decoded buffers (sometimes just referred to as
       
    55  * buffers) are binary octets.
       
    56  *
       
    57  * To create a custom encoder, you must, at a minimum,  overide three
       
    58  * abstract methods in this class.
       
    59  * <DL>
       
    60  * <DD>bytesPerAtom which tells the encoder how many bytes to
       
    61  * send to encodeAtom
       
    62  * <DD>encodeAtom which encodes the bytes sent to it as text.
       
    63  * <DD>bytesPerLine which tells the encoder the maximum number of
       
    64  * bytes per line.
       
    65  * </DL>
       
    66  *
       
    67  * Several useful encoders have already been written and are
       
    68  * referenced in the See Also list below.
       
    69  *
       
    70  * @author      Chuck McManis
       
    71  * @see         CharacterDecoder;
       
    72  * @see         UCEncoder
       
    73  * @see         UUEncoder
       
    74  * @see         BASE64Encoder
       
    75  */
       
    76 public abstract class CharacterEncoder {
       
    77 
       
    78     /** Stream that understands "printing" */
       
    79     protected PrintStream pStream;
       
    80 
       
    81     /** Return the number of bytes per atom of encoding */
       
    82     abstract protected int bytesPerAtom();
       
    83 
       
    84     /** Return the number of bytes that can be encoded per line */
       
    85     abstract protected int bytesPerLine();
       
    86 
       
    87     /**
       
    88      * Encode the prefix for the entire buffer. By default is simply
       
    89      * opens the PrintStream for use by the other functions.
       
    90      */
       
    91     protected void encodeBufferPrefix(OutputStream aStream) throws IOException {
       
    92         pStream = new PrintStream(aStream);
       
    93     }
       
    94 
       
    95     /**
       
    96      * Encode the suffix for the entire buffer.
       
    97      */
       
    98     protected void encodeBufferSuffix(OutputStream aStream) throws IOException {
       
    99     }
       
   100 
       
   101     /**
       
   102      * Encode the prefix that starts every output line.
       
   103      */
       
   104     protected void encodeLinePrefix(OutputStream aStream, int aLength)
       
   105     throws IOException {
       
   106     }
       
   107 
       
   108     /**
       
   109      * Encode the suffix that ends every output line. By default
       
   110      * this method just prints a <newline> into the output stream.
       
   111      */
       
   112     protected void encodeLineSuffix(OutputStream aStream) throws IOException {
       
   113         pStream.println();
       
   114     }
       
   115 
       
   116     /** Encode one "atom" of information into characters. */
       
   117     abstract protected void encodeAtom(OutputStream aStream, byte someBytes[],
       
   118                 int anOffset, int aLength) throws IOException;
       
   119 
       
   120     /**
       
   121      * This method works around the bizarre semantics of BufferedInputStream's
       
   122      * read method.
       
   123      */
       
   124     protected int readFully(InputStream in, byte buffer[])
       
   125         throws java.io.IOException {
       
   126         for (int i = 0; i < buffer.length; i++) {
       
   127             int q = in.read();
       
   128             if (q == -1)
       
   129                 return i;
       
   130             buffer[i] = (byte)q;
       
   131         }
       
   132         return buffer.length;
       
   133     }
       
   134 
       
   135     /**
       
   136      * Encode bytes from the input stream, and write them as text characters
       
   137      * to the output stream. This method will run until it exhausts the
       
   138      * input stream, but does not print the line suffix for a final
       
   139      * line that is shorter than bytesPerLine().
       
   140      */
       
   141     public void encode(InputStream inStream, OutputStream outStream)
       
   142         throws IOException {
       
   143         int     j;
       
   144         int     numBytes;
       
   145         byte    tmpbuffer[] = new byte[bytesPerLine()];
       
   146 
       
   147         encodeBufferPrefix(outStream);
       
   148 
       
   149         while (true) {
       
   150             numBytes = readFully(inStream, tmpbuffer);
       
   151             if (numBytes == 0) {
       
   152                 break;
       
   153             }
       
   154             encodeLinePrefix(outStream, numBytes);
       
   155             for (j = 0; j < numBytes; j += bytesPerAtom()) {
       
   156 
       
   157                 if ((j + bytesPerAtom()) <= numBytes) {
       
   158                     encodeAtom(outStream, tmpbuffer, j, bytesPerAtom());
       
   159                 } else {
       
   160                     encodeAtom(outStream, tmpbuffer, j, (numBytes)- j);
       
   161                 }
       
   162             }
       
   163             if (numBytes < bytesPerLine()) {
       
   164                 break;
       
   165             } else {
       
   166                 encodeLineSuffix(outStream);
       
   167             }
       
   168         }
       
   169         encodeBufferSuffix(outStream);
       
   170     }
       
   171 
       
   172     /**
       
   173      * Encode the buffer in <i>aBuffer</i> and write the encoded
       
   174      * result to the OutputStream <i>aStream</i>.
       
   175      */
       
   176     public void encode(byte aBuffer[], OutputStream aStream)
       
   177     throws IOException {
       
   178         ByteArrayInputStream inStream = new ByteArrayInputStream(aBuffer);
       
   179         encode(inStream, aStream);
       
   180     }
       
   181 
       
   182     /**
       
   183      * A 'streamless' version of encode that simply takes a buffer of
       
   184      * bytes and returns a string containing the encoded buffer.
       
   185      */
       
   186     public String encode(byte aBuffer[]) {
       
   187         ByteArrayOutputStream   outStream = new ByteArrayOutputStream();
       
   188         ByteArrayInputStream    inStream = new ByteArrayInputStream(aBuffer);
       
   189         String retVal = null;
       
   190         try {
       
   191             encode(inStream, outStream);
       
   192             // explicit ascii->unicode conversion
       
   193             retVal = outStream.toString("ISO-8859-1");
       
   194         } catch (Exception IOException) {
       
   195             // This should never happen.
       
   196             throw new Error("CharacterEncoder.encode internal error");
       
   197         }
       
   198         return (retVal);
       
   199     }
       
   200 
       
   201     /**
       
   202      * Return a byte array from the remaining bytes in this ByteBuffer.
       
   203      * <P>
       
   204      * The ByteBuffer's position will be advanced to ByteBuffer's limit.
       
   205      * <P>
       
   206      * To avoid an extra copy, the implementation will attempt to return the
       
   207      * byte array backing the ByteBuffer.  If this is not possible, a
       
   208      * new byte array will be created.
       
   209      */
       
   210     private byte [] getBytes(ByteBuffer bb) {
       
   211         /*
       
   212          * This should never return a BufferOverflowException, as we're
       
   213          * careful to allocate just the right amount.
       
   214          */
       
   215         byte [] buf = null;
       
   216 
       
   217         /*
       
   218          * If it has a usable backing byte buffer, use it.  Use only
       
   219          * if the array exactly represents the current ByteBuffer.
       
   220          */
       
   221         if (bb.hasArray()) {
       
   222             byte [] tmp = bb.array();
       
   223             if ((tmp.length == bb.capacity()) &&
       
   224                     (tmp.length == bb.remaining())) {
       
   225                 buf = tmp;
       
   226                 bb.position(bb.limit());
       
   227             }
       
   228         }
       
   229 
       
   230         if (buf == null) {
       
   231             /*
       
   232              * This class doesn't have a concept of encode(buf, len, off),
       
   233              * so if we have a partial buffer, we must reallocate
       
   234              * space.
       
   235              */
       
   236             buf = new byte[bb.remaining()];
       
   237 
       
   238             /*
       
   239              * position() automatically updated
       
   240              */
       
   241             bb.get(buf);
       
   242         }
       
   243 
       
   244         return buf;
       
   245     }
       
   246 
       
   247     /**
       
   248      * Encode the <i>aBuffer</i> ByteBuffer and write the encoded
       
   249      * result to the OutputStream <i>aStream</i>.
       
   250      * <P>
       
   251      * The ByteBuffer's position will be advanced to ByteBuffer's limit.
       
   252      */
       
   253     public void encode(ByteBuffer aBuffer, OutputStream aStream)
       
   254         throws IOException {
       
   255         byte [] buf = getBytes(aBuffer);
       
   256         encode(buf, aStream);
       
   257     }
       
   258 
       
   259     /**
       
   260      * A 'streamless' version of encode that simply takes a ByteBuffer
       
   261      * and returns a string containing the encoded buffer.
       
   262      * <P>
       
   263      * The ByteBuffer's position will be advanced to ByteBuffer's limit.
       
   264      */
       
   265     public String encode(ByteBuffer aBuffer) {
       
   266         byte [] buf = getBytes(aBuffer);
       
   267         return encode(buf);
       
   268     }
       
   269 
       
   270     /**
       
   271      * Encode bytes from the input stream, and write them as text characters
       
   272      * to the output stream. This method will run until it exhausts the
       
   273      * input stream. It differs from encode in that it will add the
       
   274      * line at the end of a final line that is shorter than bytesPerLine().
       
   275      */
       
   276     public void encodeBuffer(InputStream inStream, OutputStream outStream)
       
   277         throws IOException {
       
   278         int     j;
       
   279         int     numBytes;
       
   280         byte    tmpbuffer[] = new byte[bytesPerLine()];
       
   281 
       
   282         encodeBufferPrefix(outStream);
       
   283 
       
   284         while (true) {
       
   285             numBytes = readFully(inStream, tmpbuffer);
       
   286             if (numBytes == 0) {
       
   287                 break;
       
   288             }
       
   289             encodeLinePrefix(outStream, numBytes);
       
   290             for (j = 0; j < numBytes; j += bytesPerAtom()) {
       
   291                 if ((j + bytesPerAtom()) <= numBytes) {
       
   292                     encodeAtom(outStream, tmpbuffer, j, bytesPerAtom());
       
   293                 } else {
       
   294                     encodeAtom(outStream, tmpbuffer, j, (numBytes)- j);
       
   295                 }
       
   296             }
       
   297             encodeLineSuffix(outStream);
       
   298             if (numBytes < bytesPerLine()) {
       
   299                 break;
       
   300             }
       
   301         }
       
   302         encodeBufferSuffix(outStream);
       
   303     }
       
   304 
       
   305     /**
       
   306      * Encode the buffer in <i>aBuffer</i> and write the encoded
       
   307      * result to the OutputStream <i>aStream</i>.
       
   308      */
       
   309     public void encodeBuffer(byte aBuffer[], OutputStream aStream)
       
   310     throws IOException {
       
   311         ByteArrayInputStream inStream = new ByteArrayInputStream(aBuffer);
       
   312         encodeBuffer(inStream, aStream);
       
   313     }
       
   314 
       
   315     /**
       
   316      * A 'streamless' version of encode that simply takes a buffer of
       
   317      * bytes and returns a string containing the encoded buffer.
       
   318      */
       
   319     public String encodeBuffer(byte aBuffer[]) {
       
   320         ByteArrayOutputStream   outStream = new ByteArrayOutputStream();
       
   321         ByteArrayInputStream    inStream = new ByteArrayInputStream(aBuffer);
       
   322         try {
       
   323             encodeBuffer(inStream, outStream);
       
   324         } catch (Exception IOException) {
       
   325             // This should never happen.
       
   326             throw new Error("CharacterEncoder.encodeBuffer internal error");
       
   327         }
       
   328         return (outStream.toString());
       
   329     }
       
   330 
       
   331     /**
       
   332      * Encode the <i>aBuffer</i> ByteBuffer and write the encoded
       
   333      * result to the OutputStream <i>aStream</i>.
       
   334      * <P>
       
   335      * The ByteBuffer's position will be advanced to ByteBuffer's limit.
       
   336      */
       
   337     public void encodeBuffer(ByteBuffer aBuffer, OutputStream aStream)
       
   338         throws IOException {
       
   339         byte [] buf = getBytes(aBuffer);
       
   340         encodeBuffer(buf, aStream);
       
   341     }
       
   342 
       
   343     /**
       
   344      * A 'streamless' version of encode that simply takes a ByteBuffer
       
   345      * and returns a string containing the encoded buffer.
       
   346      * <P>
       
   347      * The ByteBuffer's position will be advanced to ByteBuffer's limit.
       
   348      */
       
   349     public String encodeBuffer(ByteBuffer aBuffer) {
       
   350         byte [] buf = getBytes(aBuffer);
       
   351         return encodeBuffer(buf);
       
   352     }
       
   353 
       
   354 }