jaxws/src/share/jaxws_classes/com/sun/xml/internal/messaging/saaj/packaging/mime/internet/MimeUtility.java
changeset 12009 4abb694f273a
child 16791 fe5141eabb0e
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jaxws/src/share/jaxws_classes/com/sun/xml/internal/messaging/saaj/packaging/mime/internet/MimeUtility.java	Tue Mar 06 16:09:35 2012 -0800
@@ -0,0 +1,1507 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @(#)MimeUtility.java       1.45 03/03/10
+ */
+
+
+
+package com.sun.xml.internal.messaging.saaj.packaging.mime.internet;
+
+import java.io.*;
+import java.util.*;
+
+import javax.activation.DataHandler;
+import javax.activation.DataSource;
+
+import com.sun.xml.internal.messaging.saaj.packaging.mime.MessagingException;
+import com.sun.xml.internal.messaging.saaj.packaging.mime.util.*;
+import com.sun.xml.internal.messaging.saaj.util.SAAJUtil;
+
+/**
+ * This is a utility class that provides various MIME related
+ * functionality. <p>
+ *
+ * There are a set of methods to encode and decode MIME headers as
+ * per RFC 2047. A brief description on handling such headers is
+ * given below: <p>
+ *
+ * RFC 822 mail headers <strong>must</strong> contain only US-ASCII
+ * characters. Headers that contain non US-ASCII characters must be
+ * encoded so that they contain only US-ASCII characters. Basically,
+ * this process involves using either BASE64 or QP to encode certain
+ * characters. RFC 2047 describes this in detail. <p>
+ *
+ * In Java, Strings contain (16 bit) Unicode characters. ASCII is a
+ * subset of Unicode (and occupies the range 0 - 127). A String
+ * that contains only ASCII characters is already mail-safe. If the
+ * String contains non US-ASCII characters, it must be encoded. An
+ * additional complexity in this step is that since Unicode is not
+ * yet a widely used charset, one might want to first charset-encode
+ * the String into another charset and then do the transfer-encoding.
+ * <p>
+ * Note that to get the actual bytes of a mail-safe String (say,
+ * for sending over SMTP), one must do
+ * <p><blockquote><pre>
+ *
+ *      byte[] bytes = string.getBytes("iso-8859-1");
+ *
+ * </pre></blockquote><p>
+ *
+ * The <code>setHeader</code> and <code>addHeader</code> methods
+ * on MimeMessage and MimeBodyPart assume that the given header values
+ * are Unicode strings that contain only US-ASCII characters. Hence
+ * the callers of those methods must insure that the values they pass
+ * do not contain non US-ASCII characters. The methods in this class
+ * help do this. <p>
+ *
+ * The <code>getHeader</code> family of methods on MimeMessage and
+ * MimeBodyPart return the raw header value. These might be encoded
+ * as per RFC 2047, and if so, must be decoded into Unicode Strings.
+ * The methods in this class help to do this. <p>
+ *
+ * Several System properties control strict conformance to the MIME
+ * spec.  Note that these are not session properties but must be set
+ * globally as System properties. <p>
+ *
+ * The <code>mail.mime.decodetext.strict</code> property controls
+ * decoding of MIME encoded words.  The MIME spec requires that encoded
+ * words start at the beginning of a whitespace separated word.  Some
+ * mailers incorrectly include encoded words in the middle of a word.
+ * If the <code>mail.mime.decodetext.strict</code> System property is
+ * set to <code>"false"</code>, an attempt will be made to decode these
+ * illegal encoded words. The default is true. <p>
+ *
+ * The <code>mail.mime.encodeeol.strict</code> property controls the
+ * choice of Content-Transfer-Encoding for MIME parts that are not of
+ * type "text".  Often such parts will contain textual data for which
+ * an encoding that allows normal end of line conventions is appropriate.
+ * In rare cases, such a part will appear to contain entirely textual
+ * data, but will require an encoding that preserves CR and LF characters
+ * without change.  If the <code>mail.mime.decodetext.strict</code>
+ * System property is set to <code>"true"</code>, such an encoding will
+ * be used when necessary.  The default is false. <p>
+ *
+ * In addition, the <code>mail.mime.charset</code> System property can
+ * be used to specify the default MIME charset to use for encoded words
+ * and text parts that don't otherwise specify a charset.  Normally, the
+ * default MIME charset is derived from the default Java charset, as
+ * specified in the <code>file.encoding</code> System property.  Most
+ * applications will have no need to explicitly set the default MIME
+ * charset.  In cases where the default MIME charset to be used for
+ * mail messages is different than the charset used for files stored on
+ * the system, this property should be set.
+ *
+ * @version 1.45, 03/03/10
+ * @author  John Mani
+ * @author  Bill Shannon
+ */
+
+public class MimeUtility {
+
+    // This class cannot be instantiated
+    private MimeUtility() { }
+
+    public static final int ALL = -1;
+
+    private static final int BUFFER_SIZE = 1024;
+    private static boolean decodeStrict = true;
+    private static boolean encodeEolStrict = false;
+    private static boolean foldEncodedWords = false;
+    private static boolean foldText = true;
+
+    static {
+        try {
+            String s = SAAJUtil.getSystemProperty("mail.mime.decodetext.strict");
+            // default to true
+            decodeStrict = s == null || !s.equalsIgnoreCase("false");
+            s = SAAJUtil.getSystemProperty("mail.mime.encodeeol.strict");
+            // default to false
+            encodeEolStrict = s != null && s.equalsIgnoreCase("true");
+            s = SAAJUtil.getSystemProperty("mail.mime.foldencodedwords");
+            // default to false
+            foldEncodedWords = s != null && s.equalsIgnoreCase("true");
+            s = SAAJUtil.getSystemProperty("mail.mime.foldtext");
+            // default to true
+            foldText = s == null || !s.equalsIgnoreCase("false");
+        } catch (SecurityException sex) {
+            // ignore it
+        }
+    }
+
+
+    /**
+     * Get the content-transfer-encoding that should be applied
+     * to the input stream of this datasource, to make it mailsafe. <p>
+     *
+     * The algorithm used here is: <br>
+     * <ul>
+     * <li>
+     * If the primary type of this datasource is "text" and if all
+     * the bytes in its input stream are US-ASCII, then the encoding
+     * is "7bit". If more than half of the bytes are non-US-ASCII, then
+     * the encoding is "base64". If less than half of the bytes are
+     * non-US-ASCII, then the encoding is "quoted-printable".
+     * <li>
+     * If the primary type of this datasource is not "text", then if
+     * all the bytes of its input stream are US-ASCII, the encoding
+     * is "7bit". If there is even one non-US-ASCII character, the
+     * encoding is "base64".
+     * </ul>
+     *
+     * @param   ds      DataSource
+     * @return          the encoding. This is either "7bit",
+     *                  "quoted-printable" or "base64"
+     */
+    public static String getEncoding(DataSource ds) {
+        ContentType cType = null;
+        InputStream is = null;
+        String encoding = null;
+
+        try {
+            cType = new ContentType(ds.getContentType());
+            is = ds.getInputStream();
+        } catch (Exception ex) {
+            return "base64"; // what else ?!
+        }
+
+        boolean isText = cType.match("text/*");
+        // if not text, stop processing when we see non-ASCII
+        int i = checkAscii(is, ALL, !isText);
+        switch (i) {
+        case ALL_ASCII:
+            encoding = "7bit"; // all ascii
+            break;
+        case MOSTLY_ASCII:
+            encoding = "quoted-printable"; // mostly ascii
+            break;
+        default:
+            encoding = "base64"; // mostly binary
+            break;
+        }
+
+        // Close the input stream
+        try {
+            is.close();
+        } catch (IOException ioex) { }
+
+        return encoding;
+    }
+
+    /**
+     * Same as <code>getEncoding(DataSource)</code> except that instead
+     * of reading the data from an <code>InputStream</code> it uses the
+     * <code>writeTo</code> method to examine the data.  This is more
+     * efficient in the common case of a <code>DataHandler</code>
+     * created with an object and a MIME type (for example, a
+     * "text/plain" String) because all the I/O is done in this
+     * thread.  In the case requiring an <code>InputStream</code> the
+     * <code>DataHandler</code> uses a thread, a pair of pipe streams,
+     * and the <code>writeTo</code> method to produce the data. <p>
+     *
+     * @since   JavaMail 1.2
+     */
+    public static String getEncoding(DataHandler dh) {
+        ContentType cType = null;
+        String encoding = null;
+
+        /*
+         * Try to pick the most efficient means of determining the
+         * encoding.  If this DataHandler was created using a DataSource,
+         * the getEncoding(DataSource) method is typically faster.  If
+         * the DataHandler was created with an object, this method is
+         * much faster.  To distinguish the two cases, we use a heuristic.
+         * A DataHandler created with an object will always have a null name.
+         * A DataHandler created with a DataSource will usually have a
+         * non-null name.
+         *
+         * XXX - This is actually quite a disgusting hack, but it makes
+         *       a common case run over twice as fast.
+         */
+        if (dh.getName() != null)
+            return getEncoding(dh.getDataSource());
+
+        try {
+            cType = new ContentType(dh.getContentType());
+        } catch (Exception ex) {
+            return "base64"; // what else ?!
+        }
+
+        if (cType.match("text/*")) {
+            // Check all of the available bytes
+            AsciiOutputStream aos = new AsciiOutputStream(false, false);
+            try {
+                dh.writeTo(aos);
+            } catch (IOException ex) { }        // ignore it
+            switch (aos.getAscii()) {
+            case ALL_ASCII:
+                encoding = "7bit"; // all ascii
+                break;
+            case MOSTLY_ASCII:
+                encoding = "quoted-printable"; // mostly ascii
+                break;
+            default:
+                encoding = "base64"; // mostly binary
+                break;
+            }
+        } else { // not "text"
+            // Check all of available bytes, break out if we find
+            // at least one non-US-ASCII character
+            AsciiOutputStream aos =
+                        new AsciiOutputStream(true, encodeEolStrict);
+            try {
+                dh.writeTo(aos);
+            } catch (IOException ex) { }        // ignore it
+            if (aos.getAscii() == ALL_ASCII) // all ascii
+                encoding = "7bit";
+            else // found atleast one non-ascii character, use b64
+                encoding = "base64";
+        }
+
+        return encoding;
+    }
+
+    /**
+     * Decode the given input stream. The Input stream returned is
+     * the decoded input stream. All the encodings defined in RFC 2045
+     * are supported here. They include "base64", "quoted-printable",
+     * "7bit", "8bit", and "binary". In addition, "uuencode" is also
+     * supported.
+     *
+     * @param   is              input stream
+     * @param   encoding        the encoding of the stream.
+     * @return                  decoded input stream.
+     */
+    public static InputStream decode(InputStream is, String encoding)
+                throws MessagingException {
+        if (encoding.equalsIgnoreCase("base64"))
+            return new BASE64DecoderStream(is);
+        else if (encoding.equalsIgnoreCase("quoted-printable"))
+            return new QPDecoderStream(is);
+        else if (encoding.equalsIgnoreCase("uuencode") ||
+                 encoding.equalsIgnoreCase("x-uuencode") ||
+                 encoding.equalsIgnoreCase("x-uue"))
+            return new UUDecoderStream(is);
+        else if (encoding.equalsIgnoreCase("binary") ||
+                 encoding.equalsIgnoreCase("7bit") ||
+                 encoding.equalsIgnoreCase("8bit"))
+            return is;
+        else
+            throw new MessagingException("Unknown encoding: " + encoding);
+    }
+
+    /**
+     * Wrap an encoder around the given output stream.
+     * All the encodings defined in RFC 2045 are supported here.
+     * They include "base64", "quoted-printable", "7bit", "8bit" and
+     * "binary". In addition, "uuencode" is also supported.
+     *
+     * @param   os              output stream
+     * @param   encoding        the encoding of the stream.
+     * @return                  output stream that applies the
+     *                          specified encoding.
+     */
+    public static OutputStream encode(OutputStream os, String encoding)
+                throws MessagingException {
+        if (encoding == null)
+            return os;
+        else if (encoding.equalsIgnoreCase("base64"))
+            return new BASE64EncoderStream(os);
+        else if (encoding.equalsIgnoreCase("quoted-printable"))
+            return new QPEncoderStream(os);
+        else if (encoding.equalsIgnoreCase("uuencode") ||
+                 encoding.equalsIgnoreCase("x-uuencode") ||
+                 encoding.equalsIgnoreCase("x-uue"))
+            return new UUEncoderStream(os);
+        else if (encoding.equalsIgnoreCase("binary") ||
+                 encoding.equalsIgnoreCase("7bit") ||
+                 encoding.equalsIgnoreCase("8bit"))
+            return os;
+        else
+            throw new MessagingException("Unknown encoding: " +encoding);
+    }
+
+    /**
+     * Wrap an encoder around the given output stream.
+     * All the encodings defined in RFC 2045 are supported here.
+     * They include "base64", "quoted-printable", "7bit", "8bit" and
+     * "binary". In addition, "uuencode" is also supported.
+     * The <code>filename</code> parameter is used with the "uuencode"
+     * encoding and is included in the encoded output.
+     *
+     * @param   os              output stream
+     * @param   encoding        the encoding of the stream.
+     * @param   filename        name for the file being encoded (only used
+     *                          with uuencode)
+     * @return                  output stream that applies the
+     *                          specified encoding.
+     * @since                   JavaMail 1.2
+     */
+    public static OutputStream encode(OutputStream os, String encoding,
+                                      String filename)
+                throws MessagingException {
+        if (encoding == null)
+            return os;
+        else if (encoding.equalsIgnoreCase("base64"))
+            return new BASE64EncoderStream(os);
+        else if (encoding.equalsIgnoreCase("quoted-printable"))
+            return new QPEncoderStream(os);
+        else if (encoding.equalsIgnoreCase("uuencode") ||
+                 encoding.equalsIgnoreCase("x-uuencode") ||
+                 encoding.equalsIgnoreCase("x-uue"))
+            return new UUEncoderStream(os, filename);
+        else if (encoding.equalsIgnoreCase("binary") ||
+                 encoding.equalsIgnoreCase("7bit") ||
+                 encoding.equalsIgnoreCase("8bit"))
+            return os;
+        else
+            throw new MessagingException("Unknown encoding: " +encoding);
+    }
+
+    /**
+     * Encode a RFC 822 "text" token into mail-safe form as per
+     * RFC 2047. <p>
+     *
+     * The given Unicode string is examined for non US-ASCII
+     * characters. If the string contains only US-ASCII characters,
+     * it is returned as-is.  If the string contains non US-ASCII
+     * characters, it is first character-encoded using the platform's
+     * default charset, then transfer-encoded using either the B or
+     * Q encoding. The resulting bytes are then returned as a Unicode
+     * string containing only ASCII  characters. <p>
+     *
+     * Note that this method should be used to encode only
+     * "unstructured" RFC 822 headers. <p>
+     *
+     * Example of usage:
+     * <p><blockquote><pre>
+     *
+     *  MimeBodyPart part = ...
+     *  String rawvalue = "FooBar Mailer, Japanese version 1.1"
+     *  try {
+     *    // If we know for sure that rawvalue contains only US-ASCII
+     *    // characters, we can skip the encoding part
+     *    part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));
+     *  } catch (UnsupportedEncodingException e) {
+     *    // encoding failure
+     *  } catch (MessagingException me) {
+     *   // setHeader() failure
+     *  }
+     *
+     * </pre></blockquote><p>
+     *
+     * @param   text    unicode string
+     * @return  Unicode string containing only US-ASCII characters
+     * @exception UnsupportedEncodingException if the encoding fails
+     */
+    public static String encodeText(String text)
+                        throws UnsupportedEncodingException {
+        return encodeText(text, null, null);
+    }
+
+    /**
+     * Encode a RFC 822 "text" token into mail-safe form as per
+     * RFC 2047. <p>
+     *
+     * The given Unicode string is examined for non US-ASCII
+     * characters. If the string contains only US-ASCII characters,
+     * it is returned as-is.  If the string contains non US-ASCII
+     * characters, it is first character-encoded using the specified
+     * charset, then transfer-encoded using either the B or Q encoding.
+     * The resulting bytes are then returned as a Unicode string
+     * containing only ASCII characters. <p>
+     *
+     * Note that this method should be used to encode only
+     * "unstructured" RFC 822 headers.
+     *
+     * @param   text    the header value
+     * @param   charset the charset. If this parameter is null, the
+     *          platform's default chatset is used.
+     * @param   encoding the encoding to be used. Currently supported
+     *          values are "B" and "Q". If this parameter is null, then
+     *          the "Q" encoding is used if most of characters to be
+     *          encoded are in the ASCII charset, otherwise "B" encoding
+     *          is used.
+     * @return  Unicode string containing only US-ASCII characters
+     */
+    public static String encodeText(String text, String charset,
+                                    String encoding)
+                        throws UnsupportedEncodingException {
+        return encodeWord(text, charset, encoding, false);
+    }
+
+    /**
+     * Decode "unstructured" headers, that is, headers that are defined
+     * as '*text' as per RFC 822. <p>
+     *
+     * The string is decoded using the algorithm specified in
+     * RFC 2047, Section 6.1.1. If the charset-conversion fails
+     * for any sequence, an UnsupportedEncodingException is thrown.
+     * If the String is not an RFC 2047 style encoded header, it is
+     * returned as-is <p>
+     *
+     * Example of usage:
+     * <p><blockquote><pre>
+     *
+     *  MimeBodyPart part = ...
+     *  String rawvalue = null;
+     *  String  value = null;
+     *  try {
+     *    if ((rawvalue = part.getHeader("X-mailer")[0]) != null)
+     *      value = MimeUtility.decodeText(rawvalue);
+     *  } catch (UnsupportedEncodingException e) {
+     *      // Don't care
+     *      value = rawvalue;
+     *  } catch (MessagingException me) { }
+     *
+     *  return value;
+     *
+     * </pre></blockquote><p>
+     *
+     * @param   etext   the possibly encoded value
+     * @exception       UnsupportedEncodingException if the charset
+     *                  conversion failed.
+     */
+    public static String decodeText(String etext)
+                throws UnsupportedEncodingException {
+        /*
+         * We look for sequences separated by "linear-white-space".
+         * (as per RFC 2047, Section 6.1.1)
+         * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.
+         */
+        String lwsp = " \t\n\r";
+        StringTokenizer st;
+
+        /*
+         * First, lets do a quick run thru the string and check
+         * whether the sequence "=?"  exists at all. If none exists,
+         * we know there are no encoded-words in here and we can just
+         * return the string as-is, without suffering thru the later
+         * decoding logic.
+         * This handles the most common case of unencoded headers
+         * efficiently.
+         */
+        if (etext.indexOf("=?") == -1)
+            return etext;
+
+        // Encoded words found. Start decoding ...
+
+        st = new StringTokenizer(etext, lwsp, true);
+        StringBuffer sb = new StringBuffer();  // decode buffer
+        StringBuffer wsb = new StringBuffer(); // white space buffer
+        boolean prevWasEncoded = false;
+
+        while (st.hasMoreTokens()) {
+            char c;
+            String s = st.nextToken();
+            // If whitespace, append it to the whitespace buffer
+            if (((c = s.charAt(0)) == ' ') || (c == '\t') ||
+                (c == '\r') || (c == '\n'))
+                wsb.append(c);
+            else {
+                // Check if token is an 'encoded-word' ..
+                String word;
+                try {
+                    word = decodeWord(s);
+                    // Yes, this IS an 'encoded-word'.
+                    if (!prevWasEncoded && wsb.length() > 0) {
+                        // if the previous word was also encoded, we
+                        // should ignore the collected whitespace. Else
+                        // we include the whitespace as well.
+                        sb.append(wsb);
+                    }
+                    prevWasEncoded = true;
+                } catch (ParseException pex) {
+                    // This is NOT an 'encoded-word'.
+                    word = s;
+                    // possibly decode inner encoded words
+                    if (!decodeStrict)
+                        word = decodeInnerWords(word);
+                    // include colleced whitespace ..
+                    if (wsb.length() > 0)
+                        sb.append(wsb);
+                    prevWasEncoded = false;
+                }
+                sb.append(word); // append the actual word
+                wsb.setLength(0); // reset wsb for reuse
+            }
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Encode a RFC 822 "word" token into mail-safe form as per
+     * RFC 2047. <p>
+     *
+     * The given Unicode string is examined for non US-ASCII
+     * characters. If the string contains only US-ASCII characters,
+     * it is returned as-is.  If the string contains non US-ASCII
+     * characters, it is first character-encoded using the platform's
+     * default charset, then transfer-encoded using either the B or
+     * Q encoding. The resulting bytes are then returned as a Unicode
+     * string containing only ASCII  characters. <p>
+     *
+     * This method is meant to be used when creating RFC 822 "phrases".
+     * The InternetAddress class, for example, uses this to encode
+     * it's 'phrase' component.
+     *
+     * @param   text    unicode string
+     * @return  Array of Unicode strings containing only US-ASCII
+     *          characters.
+     * @exception UnsupportedEncodingException if the encoding fails
+     */
+    public static String encodeWord(String word)
+                        throws UnsupportedEncodingException {
+        return encodeWord(word, null, null);
+    }
+
+    /**
+     * Encode a RFC 822 "word" token into mail-safe form as per
+     * RFC 2047. <p>
+     *
+     * The given Unicode string is examined for non US-ASCII
+     * characters. If the string contains only US-ASCII characters,
+     * it is returned as-is.  If the string contains non US-ASCII
+     * characters, it is first character-encoded using the specified
+     * charset, then transfer-encoded using either the B or Q encoding.
+     * The resulting bytes are then returned as a Unicode string
+     * containing only ASCII characters. <p>
+     *
+     * @param   text    unicode string
+     * @param   charset the MIME charset
+     * @param   encoding the encoding to be used. Currently supported
+     *          values are "B" and "Q". If this parameter is null, then
+     *          the "Q" encoding is used if most of characters to be
+     *          encoded are in the ASCII charset, otherwise "B" encoding
+     *          is used.
+     * @return  Unicode string containing only US-ASCII characters
+     * @exception UnsupportedEncodingException if the encoding fails
+     */
+    public static String encodeWord(String word, String charset,
+                                    String encoding)
+                        throws UnsupportedEncodingException {
+        return encodeWord(word, charset, encoding, true);
+    }
+
+    /*
+     * Encode the given string. The parameter 'encodingWord' should
+     * be true if a RFC 822 "word" token is being encoded and false if a
+     * RFC 822 "text" token is being encoded. This is because the
+     * "Q" encoding defined in RFC 2047 has more restrictions when
+     * encoding "word" tokens. (Sigh)
+     */
+    private static String encodeWord(String string, String charset,
+                                     String encoding, boolean encodingWord)
+                        throws UnsupportedEncodingException {
+
+        // If 'string' contains only US-ASCII characters, just
+        // return it.
+        int ascii = checkAscii(string);
+        if (ascii == ALL_ASCII)
+            return string;
+
+        // Else, apply the specified charset conversion.
+        String jcharset;
+        if (charset == null) { // use default charset
+            jcharset = getDefaultJavaCharset(); // the java charset
+            charset = getDefaultMIMECharset(); // the MIME equivalent
+        } else // MIME charset -> java charset
+            jcharset = javaCharset(charset);
+
+        // If no transfer-encoding is specified, figure one out.
+        if (encoding == null) {
+            if (ascii != MOSTLY_NONASCII)
+                encoding = "Q";
+            else
+                encoding = "B";
+        }
+
+        boolean b64;
+        if (encoding.equalsIgnoreCase("B"))
+            b64 = true;
+        else if (encoding.equalsIgnoreCase("Q"))
+            b64 = false;
+        else
+            throw new UnsupportedEncodingException(
+                        "Unknown transfer encoding: " + encoding);
+
+        StringBuffer outb = new StringBuffer(); // the output buffer
+        doEncode(string, b64, jcharset,
+                 // As per RFC 2047, size of an encoded string should not
+                 // exceed 75 bytes.
+                 // 7 = size of "=?", '?', 'B'/'Q', '?', "?="
+                 75 - 7 - charset.length(), // the available space
+                 "=?" + charset + "?" + encoding + "?", // prefix
+                 true, encodingWord, outb);
+
+        return outb.toString();
+    }
+
+    private static void doEncode(String string, boolean b64,
+                String jcharset, int avail, String prefix,
+                boolean first, boolean encodingWord, StringBuffer buf)
+                        throws UnsupportedEncodingException {
+
+        // First find out what the length of the encoded version of
+        // 'string' would be.
+        byte[] bytes = string.getBytes(jcharset);
+        int len;
+        if (b64) // "B" encoding
+            len = BEncoderStream.encodedLength(bytes);
+        else // "Q"
+            len = QEncoderStream.encodedLength(bytes, encodingWord);
+
+        int size;
+        if ((len > avail) && ((size = string.length()) > 1)) {
+            // If the length is greater than 'avail', split 'string'
+            // into two and recurse.
+            doEncode(string.substring(0, size/2), b64, jcharset,
+                     avail, prefix, first, encodingWord, buf);
+            doEncode(string.substring(size/2, size), b64, jcharset,
+                     avail, prefix, false, encodingWord, buf);
+        } else {
+            // length <= than 'avail'. Encode the given string
+            ByteArrayOutputStream os = new ByteArrayOutputStream(BUFFER_SIZE);
+            OutputStream eos; // the encoder
+            if (b64) // "B" encoding
+                eos = new BEncoderStream(os);
+            else // "Q" encoding
+                eos = new QEncoderStream(os, encodingWord);
+
+            try { // do the encoding
+                eos.write(bytes);
+                eos.close();
+            } catch (IOException ioex) { }
+
+            byte[] encodedBytes = os.toByteArray(); // the encoded stuff
+            // Now write out the encoded (all ASCII) bytes into our
+            // StringBuffer
+            if (!first) // not the first line of this sequence
+                if (foldEncodedWords)
+                    buf.append("\r\n "); // start a continuation line
+                else
+                    buf.append(" "); // line will be folded later
+
+            buf.append(prefix);
+            for (int i = 0; i < encodedBytes.length; i++)
+                buf.append((char)encodedBytes[i]);
+            buf.append("?="); // terminate the current sequence
+        }
+    }
+
+    /**
+     * The string is parsed using the rules in RFC 2047 for parsing
+     * an "encoded-word". If the parse fails, a ParseException is
+     * thrown. Otherwise, it is transfer-decoded, and then
+     * charset-converted into Unicode. If the charset-conversion
+     * fails, an UnsupportedEncodingException is thrown.<p>
+     *
+     * @param   eword   the possibly encoded value
+     * @exception       ParseException if the string is not an
+     *                  encoded-word as per RFC 2047.
+     * @exception       UnsupportedEncodingException if the charset
+     *                  conversion failed.
+     */
+    public static String decodeWord(String eword)
+                throws ParseException, UnsupportedEncodingException {
+
+        if (!eword.startsWith("=?")) // not an encoded word
+            throw new ParseException();
+
+        // get charset
+        int start = 2; int pos;
+        if ((pos = eword.indexOf('?', start)) == -1)
+            throw new ParseException();
+        String charset = javaCharset(eword.substring(start, pos));
+
+        // get encoding
+        start = pos+1;
+        if ((pos = eword.indexOf('?', start)) == -1)
+            throw new ParseException();
+        String encoding = eword.substring(start, pos);
+
+        // get encoded-sequence
+        start = pos+1;
+        if ((pos = eword.indexOf("?=", start)) == -1)
+            throw new ParseException();
+        String word = eword.substring(start, pos);
+
+        try {
+            // Extract the bytes from word
+            ByteArrayInputStream bis =
+                new ByteArrayInputStream(ASCIIUtility.getBytes(word));
+
+            // Get the appropriate decoder
+            InputStream is;
+            if (encoding.equalsIgnoreCase("B"))
+                is = new BASE64DecoderStream(bis);
+            else if (encoding.equalsIgnoreCase("Q"))
+                is = new QDecoderStream(bis);
+            else
+                throw new UnsupportedEncodingException(
+                                "unknown encoding: " + encoding);
+
+            // For b64 & q, size of decoded word <= size of word. So
+            // the decoded bytes must fit into the 'bytes' array. This
+            // is certainly more efficient than writing bytes into a
+            // ByteArrayOutputStream and then pulling out the byte[]
+            // from it.
+            int count = bis.available();
+            byte[] bytes = new byte[count];
+            // count is set to the actual number of decoded bytes
+            count = is.read(bytes, 0, count);
+
+            // Finally, convert the decoded bytes into a String using
+            // the specified charset
+            String s = new String(bytes, 0, count, charset);
+            if (pos + 2 < eword.length()) {
+                // there's still more text in the string
+                String rest = eword.substring(pos + 2);
+                if (!decodeStrict)
+                    rest = decodeInnerWords(rest);
+                s += rest;
+            }
+            return s;
+        } catch (UnsupportedEncodingException uex) {
+            // explicitly catch and rethrow this exception, otherwise
+            // the below IOException catch will swallow this up!
+            throw uex;
+        } catch (IOException ioex) {
+            // Shouldn't happen.
+            throw new ParseException();
+        } catch (IllegalArgumentException iex) {
+            /* An unknown charset of the form ISO-XXX-XXX, will cause
+             * the JDK to throw an IllegalArgumentException ... Since the
+             * JDK will attempt to create a classname using this string,
+             * but valid classnames must not contain the character '-',
+             * and this results in an IllegalArgumentException, rather than
+             * the expected UnsupportedEncodingException. Yikes
+             */
+            throw new UnsupportedEncodingException();
+        }
+    }
+
+    /**
+     * Look for encoded words within a word.  The MIME spec doesn't
+     * allow this, but many broken mailers, especially Japanese mailers,
+     * produce such incorrect encodings.
+     */
+    private static String decodeInnerWords(String word)
+                                throws UnsupportedEncodingException {
+        int start = 0, i;
+        StringBuffer buf = new StringBuffer();
+        while ((i = word.indexOf("=?", start)) >= 0) {
+            buf.append(word.substring(start, i));
+            int end = word.indexOf("?=", i);
+            if (end < 0)
+                break;
+            String s = word.substring(i, end + 2);
+            try {
+                s = decodeWord(s);
+            } catch (ParseException pex) {
+                // ignore it, just use the original string
+            }
+            buf.append(s);
+            start = end + 2;
+        }
+        if (start == 0)
+            return word;
+        if (start < word.length())
+            buf.append(word.substring(start));
+        return buf.toString();
+    }
+
+    /**
+     * A utility method to quote a word, if the word contains any
+     * characters from the specified 'specials' list.<p>
+     *
+     * The <code>HeaderTokenizer</code> class defines two special
+     * sets of delimiters - MIME and RFC 822. <p>
+     *
+     * This method is typically used during the generation of
+     * RFC 822 and MIME header fields.
+     *
+     * @param   word    word to be quoted
+     * @param   specials the set of special characters
+     * @return          the possibly quoted word
+     * @see     javax.mail.internet.HeaderTokenizer#MIME
+     * @see     javax.mail.internet.HeaderTokenizer#RFC822
+     */
+    public static String quote(String word, String specials) {
+        int len = word.length();
+
+        /*
+         * Look for any "bad" characters, Escape and
+         *  quote the entire string if necessary.
+         */
+        boolean needQuoting = false;
+        for (int i = 0; i < len; i++) {
+            char c = word.charAt(i);
+            if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
+                // need to escape them and then quote the whole string
+                StringBuffer sb = new StringBuffer(len + 3);
+                sb.append('"');
+                sb.append(word.substring(0, i));
+                int lastc = 0;
+                for (int j = i; j < len; j++) {
+                    char cc = word.charAt(j);
+                    if ((cc == '"') || (cc == '\\') ||
+                        (cc == '\r') || (cc == '\n'))
+                        if (cc == '\n' && lastc == '\r')
+                            ;   // do nothing, CR was already escaped
+                        else
+                            sb.append('\\');    // Escape the character
+                    sb.append(cc);
+                    lastc = cc;
+                }
+                sb.append('"');
+                return sb.toString();
+            } else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)
+                // These characters cause the string to be quoted
+                needQuoting = true;
+        }
+
+        if (needQuoting) {
+            StringBuffer sb = new StringBuffer(len + 2);
+            sb.append('"').append(word).append('"');
+            return sb.toString();
+        } else
+            return word;
+    }
+
+    /**
+     * Fold a string at linear whitespace so that each line is no longer
+     * than 76 characters, if possible.  If there are more than 76
+     * non-whitespace characters consecutively, the string is folded at
+     * the first whitespace after that sequence.  The parameter
+     * <code>used</code> indicates how many characters have been used in
+     * the current line; it is usually the length of the header name. <p>
+     *
+     * Note that line breaks in the string aren't escaped; they probably
+     * should be.
+     *
+     * @param   used    characters used in line so far
+     * @param   s       the string to fold
+     * @return          the folded string
+     */
+    /*public*/ static String fold(int used, String s) {
+        if (!foldText)
+            return s;
+
+        int end;
+        char c;
+        // Strip trailing spaces
+        for (end = s.length() - 1; end >= 0; end--) {
+            c = s.charAt(end);
+            if (c != ' ' && c != '\t')
+                break;
+        }
+        if (end != s.length() - 1)
+            s = s.substring(0, end + 1);
+
+        // if the string fits now, just return it
+        if (used + s.length() <= 76)
+            return s;
+
+        // have to actually fold the string
+        StringBuffer sb = new StringBuffer(s.length() + 4);
+        char lastc = 0;
+        while (used + s.length() > 76) {
+            int lastspace = -1;
+            for (int i = 0; i < s.length(); i++) {
+                if (lastspace != -1 && used + i > 76)
+                    break;
+                c = s.charAt(i);
+                if (c == ' ' || c == '\t')
+                    if (!(lastc == ' ' || lastc == '\t'))
+                        lastspace = i;
+                lastc = c;
+            }
+            if (lastspace == -1) {
+                // no space, use the whole thing
+                sb.append(s);
+                s = "";
+                used = 0;
+                break;
+            }
+            sb.append(s.substring(0, lastspace));
+            sb.append("\r\n");
+            lastc = s.charAt(lastspace);
+            sb.append(lastc);
+            s = s.substring(lastspace + 1);
+            used = 1;
+        }
+        sb.append(s);
+        return sb.toString();
+    }
+
+    /**
+     * Unfold a folded header.  Any line breaks that aren't escaped and
+     * are followed by whitespace are removed.
+     *
+     * @param   s       the string to unfold
+     * @return          the unfolded string
+     */
+    /*public*/ static String unfold(String s) {
+        if (!foldText)
+            return s;
+
+        StringBuffer sb = null;
+        int i;
+        while ((i = indexOfAny(s, "\r\n")) >= 0) {
+            int start = i;
+            int l = s.length();
+            i++;                // skip CR or NL
+            if (i < l && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n')
+                i++;    // skip LF
+            if (start == 0 || s.charAt(start - 1) != '\\') {
+                char c;
+                // if next line starts with whitespace, skip all of it
+                // XXX - always has to be true?
+                if (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) {
+                    i++;        // skip whitespace
+                    while (i < l && ((c = s.charAt(i)) == ' ' || c == '\t'))
+                        i++;
+                    if (sb == null)
+                        sb = new StringBuffer(s.length());
+                    if (start != 0) {
+                        sb.append(s.substring(0, start));
+                        sb.append(' ');
+                    }
+                    s = s.substring(i);
+                    continue;
+                }
+                // it's not a continuation line, just leave it in
+                if (sb == null)
+                    sb = new StringBuffer(s.length());
+                sb.append(s.substring(0, i));
+                s = s.substring(i);
+            } else {
+                // there's a backslash at "start - 1"
+                // strip it out, but leave in the line break
+                if (sb == null)
+                    sb = new StringBuffer(s.length());
+                sb.append(s.substring(0, start - 1));
+                sb.append(s.substring(start, i));
+                s = s.substring(i);
+            }
+        }
+        if (sb != null) {
+            sb.append(s);
+            return sb.toString();
+        } else
+            return s;
+    }
+
+    /**
+     * Return the first index of any of the characters in "any" in "s",
+     * or -1 if none are found.
+     *
+     * This should be a method on String.
+     */
+    private static int indexOfAny(String s, String any) {
+        return indexOfAny(s, any, 0);
+    }
+
+    private static int indexOfAny(String s, String any, int start) {
+        try {
+            int len = s.length();
+            for (int i = start; i < len; i++) {
+                if (any.indexOf(s.charAt(i)) >= 0)
+                    return i;
+            }
+            return -1;
+        } catch (StringIndexOutOfBoundsException e) {
+            return -1;
+        }
+    }
+
+    /**
+     * Convert a MIME charset name into a valid Java charset name. <p>
+     *
+     * @param charset   the MIME charset name
+     * @return  the Java charset equivalent. If a suitable mapping is
+     *          not available, the passed in charset is itself returned.
+     */
+    public static String javaCharset(String charset) {
+        if (mime2java == null || charset == null)
+            // no mapping table, or charset parameter is null
+            return charset;
+
+        String alias = (String)mime2java.get(charset.toLowerCase());
+        return alias == null ? charset : alias;
+    }
+
+    /**
+     * Convert a java charset into its MIME charset name. <p>
+     *
+     * Note that a future version of JDK (post 1.2) might provide
+     * this functionality, in which case, we may deprecate this
+     * method then.
+     *
+     * @param   charset    the JDK charset
+     * @return          the MIME/IANA equivalent. If a mapping
+     *                  is not possible, the passed in charset itself
+     *                  is returned.
+     * @since           JavaMail 1.1
+     */
+    public static String mimeCharset(String charset) {
+        if (java2mime == null || charset == null)
+            // no mapping table or charset param is null
+            return charset;
+
+        String alias = (String)java2mime.get(charset.toLowerCase());
+        return alias == null ? charset : alias;
+    }
+
+    private static String defaultJavaCharset;
+    private static String defaultMIMECharset;
+
+    /**
+     * Get the default charset corresponding to the system's current
+     * default locale.  If the System property <code>mail.mime.charset</code>
+     * is set, a system charset corresponding to this MIME charset will be
+     * returned. <p>
+     *
+     * @return  the default charset of the system's default locale,
+     *          as a Java charset. (NOT a MIME charset)
+     * @since   JavaMail 1.1
+     */
+    public static String getDefaultJavaCharset() {
+        if (defaultJavaCharset == null) {
+            /*
+             * If mail.mime.charset is set, it controls the default
+             * Java charset as well.
+             */
+            String mimecs = null;
+
+            mimecs = SAAJUtil.getSystemProperty("mail.mime.charset");
+
+            if (mimecs != null && mimecs.length() > 0) {
+                defaultJavaCharset = javaCharset(mimecs);
+                return defaultJavaCharset;
+            }
+
+            try {
+                defaultJavaCharset = System.getProperty("file.encoding",
+                                                        "8859_1");
+            } catch (SecurityException sex) {
+
+                class NullInputStream extends InputStream {
+                    public int read() {
+                        return 0;
+                    }
+                }
+                InputStreamReader reader =
+                        new InputStreamReader(new NullInputStream());
+                defaultJavaCharset = reader.getEncoding();
+                if (defaultJavaCharset == null)
+                    defaultJavaCharset = "8859_1";
+            }
+        }
+
+        return defaultJavaCharset;
+    }
+
+    /*
+     * Get the default MIME charset for this locale.
+     */
+    static String getDefaultMIMECharset() {
+        if (defaultMIMECharset == null) {
+                defaultMIMECharset = SAAJUtil.getSystemProperty("mail.mime.charset");
+        }
+        if (defaultMIMECharset == null)
+            defaultMIMECharset = mimeCharset(getDefaultJavaCharset());
+        return defaultMIMECharset;
+    }
+
+    // Tables to map MIME charset names to Java names and vice versa.
+    // XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset
+    private static Hashtable mime2java;
+    private static Hashtable java2mime;
+
+    static {
+        java2mime = new Hashtable(40);
+        mime2java = new Hashtable(10);
+
+        try {
+            // Use this class's classloader to load the mapping file
+            // XXX - we should use SecuritySupport, but it's in another package
+            InputStream is =
+                    com.sun.xml.internal.messaging.saaj.packaging.mime.internet.MimeUtility.class.getResourceAsStream(
+                    "/META-INF/javamail.charset.map");
+
+            if (is != null) {
+                is = new LineInputStream(is);
+
+                // Load the JDK-to-MIME charset mapping table
+                loadMappings((LineInputStream)is, java2mime);
+
+                // Load the MIME-to-JDK charset mapping table
+                loadMappings((LineInputStream)is, mime2java);
+            }
+        } catch (Exception ex) { }
+
+        // If we didn't load the tables, e.g., because we didn't have
+        // permission, load them manually.  The entries here should be
+        // the same as the default javamail.charset.map.
+        if (java2mime.isEmpty()) {
+            java2mime.put("8859_1", "ISO-8859-1");
+            java2mime.put("iso8859_1", "ISO-8859-1");
+            java2mime.put("ISO8859-1", "ISO-8859-1");
+
+            java2mime.put("8859_2", "ISO-8859-2");
+            java2mime.put("iso8859_2", "ISO-8859-2");
+            java2mime.put("ISO8859-2", "ISO-8859-2");
+
+            java2mime.put("8859_3", "ISO-8859-3");
+            java2mime.put("iso8859_3", "ISO-8859-3");
+            java2mime.put("ISO8859-3", "ISO-8859-3");
+
+            java2mime.put("8859_4", "ISO-8859-4");
+            java2mime.put("iso8859_4", "ISO-8859-4");
+            java2mime.put("ISO8859-4", "ISO-8859-4");
+
+            java2mime.put("8859_5", "ISO-8859-5");
+            java2mime.put("iso8859_5", "ISO-8859-5");
+            java2mime.put("ISO8859-5", "ISO-8859-5");
+
+            java2mime.put("8859_6", "ISO-8859-6");
+            java2mime.put("iso8859_6", "ISO-8859-6");
+            java2mime.put("ISO8859-6", "ISO-8859-6");
+
+            java2mime.put("8859_7", "ISO-8859-7");
+            java2mime.put("iso8859_7", "ISO-8859-7");
+            java2mime.put("ISO8859-7", "ISO-8859-7");
+
+            java2mime.put("8859_8", "ISO-8859-8");
+            java2mime.put("iso8859_8", "ISO-8859-8");
+            java2mime.put("ISO8859-8", "ISO-8859-8");
+
+            java2mime.put("8859_9", "ISO-8859-9");
+            java2mime.put("iso8859_9", "ISO-8859-9");
+            java2mime.put("ISO8859-9", "ISO-8859-9");
+
+            java2mime.put("SJIS", "Shift_JIS");
+            java2mime.put("MS932", "Shift_JIS");
+            java2mime.put("JIS", "ISO-2022-JP");
+            java2mime.put("ISO2022JP", "ISO-2022-JP");
+            java2mime.put("EUC_JP", "euc-jp");
+            java2mime.put("KOI8_R", "koi8-r");
+            java2mime.put("EUC_CN", "euc-cn");
+            java2mime.put("EUC_TW", "euc-tw");
+            java2mime.put("EUC_KR", "euc-kr");
+        }
+        if (mime2java.isEmpty()) {
+            mime2java.put("iso-2022-cn", "ISO2022CN");
+            mime2java.put("iso-2022-kr", "ISO2022KR");
+            mime2java.put("utf-8", "UTF8");
+            mime2java.put("utf8", "UTF8");
+            mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
+            mime2java.put("ja_jp.eucjp", "EUCJIS");
+            mime2java.put("euc-kr", "KSC5601");
+            mime2java.put("euckr", "KSC5601");
+            mime2java.put("us-ascii", "ISO-8859-1");
+            mime2java.put("x-us-ascii", "ISO-8859-1");
+        }
+    }
+
+    private static void loadMappings(LineInputStream is, Hashtable table) {
+        String currLine;
+
+        while (true) {
+            try {
+                currLine = is.readLine();
+            } catch (IOException ioex) {
+                break; // error in reading, stop
+            }
+
+            if (currLine == null) // end of file, stop
+                break;
+            if (currLine.startsWith("--") && currLine.endsWith("--"))
+                // end of this table
+                break;
+
+            // ignore empty lines and comments
+            if (currLine.trim().length() == 0 || currLine.startsWith("#"))
+                continue;
+
+            // A valid entry is of the form <key><separator><value>
+            // where, <separator> := SPACE | HT. Parse this
+            StringTokenizer tk = new StringTokenizer(currLine, " \t");
+            try {
+                String key = tk.nextToken();
+                String value = tk.nextToken();
+                table.put(key.toLowerCase(), value);
+            } catch (NoSuchElementException nex) { }
+        }
+    }
+
+    static final int ALL_ASCII          = 1;
+    static final int MOSTLY_ASCII       = 2;
+    static final int MOSTLY_NONASCII    = 3;
+
+    /**
+     * Check if the given string contains non US-ASCII characters.
+     * @param   s       string
+     * @return          ALL_ASCII if all characters in the string
+     *                  belong to the US-ASCII charset. MOSTLY_ASCII
+     *                  if more than half of the available characters
+     *                  are US-ASCII characters. Else MOSTLY_NONASCII.
+     */
+    static int checkAscii(String s) {
+        int ascii = 0, non_ascii = 0;
+        int l = s.length();
+
+        for (int i = 0; i < l; i++) {
+            if (nonascii((int)s.charAt(i))) // non-ascii
+                non_ascii++;
+            else
+                ascii++;
+        }
+
+        if (non_ascii == 0)
+            return ALL_ASCII;
+        if (ascii > non_ascii)
+            return MOSTLY_ASCII;
+
+        return MOSTLY_NONASCII;
+    }
+
+    /**
+     * Check if the given byte array contains non US-ASCII characters.
+     * @param   b       byte array
+     * @return          ALL_ASCII if all characters in the string
+     *                  belong to the US-ASCII charset. MOSTLY_ASCII
+     *                  if more than half of the available characters
+     *                  are US-ASCII characters. Else MOSTLY_NONASCII.
+     *
+     * XXX - this method is no longer used
+     */
+    static int checkAscii(byte[] b) {
+        int ascii = 0, non_ascii = 0;
+
+        for (int i=0; i < b.length; i++) {
+            // The '&' operator automatically causes b[i] to be promoted
+            // to an int, and we mask out the higher bytes in the int
+            // so that the resulting value is not a negative integer.
+            if (nonascii(b[i] & 0xff)) // non-ascii
+                non_ascii++;
+            else
+                ascii++;
+        }
+
+        if (non_ascii == 0)
+            return ALL_ASCII;
+        if (ascii > non_ascii)
+            return MOSTLY_ASCII;
+
+        return MOSTLY_NONASCII;
+    }
+
+    /**
+     * Check if the given input stream contains non US-ASCII characters.
+     * Upto <code>max</code> bytes are checked. If <code>max</code> is
+     * set to <code>ALL</code>, then all the bytes available in this
+     * input stream are checked. If <code>breakOnNonAscii</code> is true
+     * the check terminates when the first non-US-ASCII character is
+     * found and MOSTLY_NONASCII is returned. Else, the check continues
+     * till <code>max</code> bytes or till the end of stream.
+     *
+     * @param   is      the input stream
+     * @param   max     maximum bytes to check for. The special value
+     *                  ALL indicates that all the bytes in this input
+     *                  stream must be checked.
+     * @param   breakOnNonAscii if <code>true</code>, then terminate the
+     *                  the check when the first non-US-ASCII character
+     *                  is found.
+     * @return          ALL_ASCII if all characters in the string
+     *                  belong to the US-ASCII charset. MOSTLY_ASCII
+     *                  if more than half of the available characters
+     *                  are US-ASCII characters. Else MOSTLY_NONASCII.
+     */
+    static int checkAscii(InputStream is, int max, boolean breakOnNonAscii) {
+        int ascii = 0, non_ascii = 0;
+        int len;
+        int block = 4096;
+        int linelen = 0;
+        boolean longLine = false, badEOL = false;
+        boolean checkEOL = encodeEolStrict && breakOnNonAscii;
+        byte buf[] = null;
+        if (max != 0) {
+            block = (max == ALL) ? 4096 : Math.min(max, 4096);
+            buf = new byte[block];
+        }
+        while (max != 0) {
+            try {
+                if ((len = is.read(buf, 0, block)) == -1)
+                    break;
+                int lastb = 0;
+                for (int i = 0; i < len; i++) {
+                    // The '&' operator automatically causes b[i] to
+                    // be promoted to an int, and we mask out the higher
+                    // bytes in the int so that the resulting value is
+                    // not a negative integer.
+                    int b = buf[i] & 0xff;
+                    if (checkEOL &&
+                            ((lastb == '\r' && b != '\n') ||
+                            (lastb != '\r' && b == '\n')))
+                        badEOL = true;
+                    if (b == '\r' || b == '\n')
+                        linelen = 0;
+                    else {
+                        linelen++;
+                        if (linelen > 998)      // 1000 - CRLF
+                            longLine = true;
+                    }
+                    if (nonascii(b)) {  // non-ascii
+                        if (breakOnNonAscii) // we are done
+                            return MOSTLY_NONASCII;
+                        else
+                            non_ascii++;
+                    } else
+                        ascii++;
+                    lastb = b;
+                }
+            } catch (IOException ioex) {
+                break;
+            }
+            if (max != ALL)
+                max -= len;
+        }
+
+        if (max == 0 && breakOnNonAscii)
+            // We have been told to break on the first non-ascii character.
+            // We haven't got any non-ascii character yet, but then we
+            // have not checked all of the available bytes either. So we
+            // cannot say for sure that this input stream is ALL_ASCII,
+            // and hence we must play safe and return MOSTLY_NONASCII
+
+            return MOSTLY_NONASCII;
+
+        if (non_ascii == 0) { // no non-us-ascii characters so far
+            // If we're looking at non-text data, and we saw CR without LF
+            // or vice versa, consider this mostly non-ASCII so that it
+            // will be base64 encoded (since the quoted-printable encoder
+            // doesn't encode this case properly).
+            if (badEOL)
+                return MOSTLY_NONASCII;
+            // if we've seen a long line, we degrade to mostly ascii
+            else if (longLine)
+                return MOSTLY_ASCII;
+            else
+                return ALL_ASCII;
+        }
+        if (ascii > non_ascii) // mostly ascii
+            return MOSTLY_ASCII;
+        return MOSTLY_NONASCII;
+    }
+
+    static final boolean nonascii(int b) {
+        return b >= 0177 || (b < 040 && b != '\r' && b != '\n' && b != '\t');
+    }
+}
+
+/**
+ * An OutputStream that determines whether the data written to
+ * it is all ASCII, mostly ASCII, or mostly non-ASCII.
+ */
+class AsciiOutputStream extends OutputStream {
+    private boolean breakOnNonAscii;
+    private int ascii = 0, non_ascii = 0;
+    private int linelen = 0;
+    private boolean longLine = false;
+    private boolean badEOL = false;
+    private boolean checkEOL = false;
+    private int lastb = 0;
+    private int ret = 0;
+
+    public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) {
+        this.breakOnNonAscii = breakOnNonAscii;
+        checkEOL = encodeEolStrict && breakOnNonAscii;
+    }
+
+    public void write(int b) throws IOException {
+        check(b);
+    }
+
+    public void write(byte b[]) throws IOException {
+        write(b, 0, b.length);
+    }
+
+    public void write(byte b[], int off, int len) throws IOException {
+        len += off;
+        for (int i = off; i < len ; i++)
+            check(b[i]);
+    }
+
+    private final void check(int b) throws IOException {
+        b &= 0xff;
+        if (checkEOL &&
+                ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))
+            badEOL = true;
+        if (b == '\r' || b == '\n')
+            linelen = 0;
+        else {
+            linelen++;
+            if (linelen > 998)  // 1000 - CRLF
+                longLine = true;
+        }
+        if (MimeUtility.nonascii(b)) { // non-ascii
+            non_ascii++;
+            if (breakOnNonAscii) {      // we are done
+                ret = MimeUtility.MOSTLY_NONASCII;
+                throw new EOFException();
+            }
+        } else
+            ascii++;
+        lastb = b;
+    }
+
+    /**
+     * Return ASCII-ness of data stream.
+     */
+    public int getAscii() {
+        if (ret != 0)
+            return ret;
+        // If we're looking at non-text data, and we saw CR without LF
+        // or vice versa, consider this mostly non-ASCII so that it
+        // will be base64 encoded (since the quoted-printable encoder
+        // doesn't encode this case properly).
+        if (badEOL)
+            return MimeUtility.MOSTLY_NONASCII;
+        else if (non_ascii == 0) { // no non-us-ascii characters so far
+            // if we've seen a long line, we degrade to mostly ascii
+            if (longLine)
+                return MimeUtility.MOSTLY_ASCII;
+            else
+                return MimeUtility.ALL_ASCII;
+        }
+        if (ascii > non_ascii) // mostly ascii
+            return MimeUtility.MOSTLY_ASCII;
+        return MimeUtility.MOSTLY_NONASCII;
+    }
+}