diff -r cb15fc6cc038 -r 9a2e5d103695 jaxws/src/java.xml.soap/share/classes/com/sun/xml/internal/messaging/saaj/packaging/mime/internet/MimeUtility.java --- a/jaxws/src/java.xml.soap/share/classes/com/sun/xml/internal/messaging/saaj/packaging/mime/internet/MimeUtility.java Thu Jan 15 11:18:14 2015 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1507 +0,0 @@ -/* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -/* - * @(#)MimeUtility.java 1.45 03/03/10 - */ - - - -package com.sun.xml.internal.messaging.saaj.packaging.mime.internet; - -import java.io.*; -import java.util.*; - -import javax.activation.DataHandler; -import javax.activation.DataSource; - -import com.sun.xml.internal.messaging.saaj.packaging.mime.MessagingException; -import com.sun.xml.internal.messaging.saaj.packaging.mime.util.*; -import com.sun.xml.internal.messaging.saaj.util.SAAJUtil; - -/** - * This is a utility class that provides various MIME related - * functionality.

- * - * There are a set of methods to encode and decode MIME headers as - * per RFC 2047. A brief description on handling such headers is - * given below:

- * - * RFC 822 mail headers must contain only US-ASCII - * characters. Headers that contain non US-ASCII characters must be - * encoded so that they contain only US-ASCII characters. Basically, - * this process involves using either BASE64 or QP to encode certain - * characters. RFC 2047 describes this in detail.

- * - * In Java, Strings contain (16 bit) Unicode characters. ASCII is a - * subset of Unicode (and occupies the range 0 - 127). A String - * that contains only ASCII characters is already mail-safe. If the - * String contains non US-ASCII characters, it must be encoded. An - * additional complexity in this step is that since Unicode is not - * yet a widely used charset, one might want to first charset-encode - * the String into another charset and then do the transfer-encoding. - *

- * Note that to get the actual bytes of a mail-safe String (say, - * for sending over SMTP), one must do - *

- *
- *      byte[] bytes = string.getBytes("iso-8859-1");
- *
- * 

- * - * The setHeader and addHeader methods - * on MimeMessage and MimeBodyPart assume that the given header values - * are Unicode strings that contain only US-ASCII characters. Hence - * the callers of those methods must insure that the values they pass - * do not contain non US-ASCII characters. The methods in this class - * help do this.

- * - * The getHeader family of methods on MimeMessage and - * MimeBodyPart return the raw header value. These might be encoded - * as per RFC 2047, and if so, must be decoded into Unicode Strings. - * The methods in this class help to do this.

- * - * Several System properties control strict conformance to the MIME - * spec. Note that these are not session properties but must be set - * globally as System properties.

- * - * The mail.mime.decodetext.strict property controls - * decoding of MIME encoded words. The MIME spec requires that encoded - * words start at the beginning of a whitespace separated word. Some - * mailers incorrectly include encoded words in the middle of a word. - * If the mail.mime.decodetext.strict System property is - * set to "false", an attempt will be made to decode these - * illegal encoded words. The default is true.

- * - * The mail.mime.encodeeol.strict property controls the - * choice of Content-Transfer-Encoding for MIME parts that are not of - * type "text". Often such parts will contain textual data for which - * an encoding that allows normal end of line conventions is appropriate. - * In rare cases, such a part will appear to contain entirely textual - * data, but will require an encoding that preserves CR and LF characters - * without change. If the mail.mime.decodetext.strict - * System property is set to "true", such an encoding will - * be used when necessary. The default is false.

- * - * In addition, the mail.mime.charset System property can - * be used to specify the default MIME charset to use for encoded words - * and text parts that don't otherwise specify a charset. Normally, the - * default MIME charset is derived from the default Java charset, as - * specified in the file.encoding System property. Most - * applications will have no need to explicitly set the default MIME - * charset. In cases where the default MIME charset to be used for - * mail messages is different than the charset used for files stored on - * the system, this property should be set. - * - * @version 1.45, 03/03/10 - * @author John Mani - * @author Bill Shannon - */ - -public class MimeUtility { - - // This class cannot be instantiated - private MimeUtility() { } - - public static final int ALL = -1; - - private static final int BUFFER_SIZE = 1024; - private static boolean decodeStrict = true; - private static boolean encodeEolStrict = false; - private static boolean foldEncodedWords = false; - private static boolean foldText = true; - - static { - try { - String s = SAAJUtil.getSystemProperty("mail.mime.decodetext.strict"); - // default to true - decodeStrict = s == null || !s.equalsIgnoreCase("false"); - s = SAAJUtil.getSystemProperty("mail.mime.encodeeol.strict"); - // default to false - encodeEolStrict = s != null && s.equalsIgnoreCase("true"); - s = SAAJUtil.getSystemProperty("mail.mime.foldencodedwords"); - // default to false - foldEncodedWords = s != null && s.equalsIgnoreCase("true"); - s = SAAJUtil.getSystemProperty("mail.mime.foldtext"); - // default to true - foldText = s == null || !s.equalsIgnoreCase("false"); - } catch (SecurityException sex) { - // ignore it - } - } - - - /** - * Get the content-transfer-encoding that should be applied - * to the input stream of this datasource, to make it mailsafe.

- * - * The algorithm used here is:
- *

- * - * @param ds DataSource - * @return the encoding. This is either "7bit", - * "quoted-printable" or "base64" - */ - public static String getEncoding(DataSource ds) { - ContentType cType = null; - InputStream is = null; - String encoding = null; - - try { - cType = new ContentType(ds.getContentType()); - is = ds.getInputStream(); - } catch (Exception ex) { - return "base64"; // what else ?! - } - - boolean isText = cType.match("text/*"); - // if not text, stop processing when we see non-ASCII - int i = checkAscii(is, ALL, !isText); - switch (i) { - case ALL_ASCII: - encoding = "7bit"; // all ascii - break; - case MOSTLY_ASCII: - encoding = "quoted-printable"; // mostly ascii - break; - default: - encoding = "base64"; // mostly binary - break; - } - - // Close the input stream - try { - is.close(); - } catch (IOException ioex) { } - - return encoding; - } - - /** - * Same as getEncoding(DataSource) except that instead - * of reading the data from an InputStream it uses the - * writeTo method to examine the data. This is more - * efficient in the common case of a DataHandler - * created with an object and a MIME type (for example, a - * "text/plain" String) because all the I/O is done in this - * thread. In the case requiring an InputStream the - * DataHandler uses a thread, a pair of pipe streams, - * and the writeTo method to produce the data.

- * - * @since JavaMail 1.2 - */ - public static String getEncoding(DataHandler dh) { - ContentType cType = null; - String encoding = null; - - /* - * Try to pick the most efficient means of determining the - * encoding. If this DataHandler was created using a DataSource, - * the getEncoding(DataSource) method is typically faster. If - * the DataHandler was created with an object, this method is - * much faster. To distinguish the two cases, we use a heuristic. - * A DataHandler created with an object will always have a null name. - * A DataHandler created with a DataSource will usually have a - * non-null name. - * - * XXX - This is actually quite a disgusting hack, but it makes - * a common case run over twice as fast. - */ - if (dh.getName() != null) - return getEncoding(dh.getDataSource()); - - try { - cType = new ContentType(dh.getContentType()); - } catch (Exception ex) { - return "base64"; // what else ?! - } - - if (cType.match("text/*")) { - // Check all of the available bytes - AsciiOutputStream aos = new AsciiOutputStream(false, false); - try { - dh.writeTo(aos); - } catch (IOException ex) { } // ignore it - switch (aos.getAscii()) { - case ALL_ASCII: - encoding = "7bit"; // all ascii - break; - case MOSTLY_ASCII: - encoding = "quoted-printable"; // mostly ascii - break; - default: - encoding = "base64"; // mostly binary - break; - } - } else { // not "text" - // Check all of available bytes, break out if we find - // at least one non-US-ASCII character - AsciiOutputStream aos = - new AsciiOutputStream(true, encodeEolStrict); - try { - dh.writeTo(aos); - } catch (IOException ex) { } // ignore it - if (aos.getAscii() == ALL_ASCII) // all ascii - encoding = "7bit"; - else // found atleast one non-ascii character, use b64 - encoding = "base64"; - } - - return encoding; - } - - /** - * Decode the given input stream. The Input stream returned is - * the decoded input stream. All the encodings defined in RFC 2045 - * are supported here. They include "base64", "quoted-printable", - * "7bit", "8bit", and "binary". In addition, "uuencode" is also - * supported. - * - * @param is input stream - * @param encoding the encoding of the stream. - * @return decoded input stream. - */ - public static InputStream decode(InputStream is, String encoding) - throws MessagingException { - if (encoding.equalsIgnoreCase("base64")) - return new BASE64DecoderStream(is); - else if (encoding.equalsIgnoreCase("quoted-printable")) - return new QPDecoderStream(is); - else if (encoding.equalsIgnoreCase("uuencode") || - encoding.equalsIgnoreCase("x-uuencode") || - encoding.equalsIgnoreCase("x-uue")) - return new UUDecoderStream(is); - else if (encoding.equalsIgnoreCase("binary") || - encoding.equalsIgnoreCase("7bit") || - encoding.equalsIgnoreCase("8bit")) - return is; - else - throw new MessagingException("Unknown encoding: " + encoding); - } - - /** - * Wrap an encoder around the given output stream. - * All the encodings defined in RFC 2045 are supported here. - * They include "base64", "quoted-printable", "7bit", "8bit" and - * "binary". In addition, "uuencode" is also supported. - * - * @param os output stream - * @param encoding the encoding of the stream. - * @return output stream that applies the - * specified encoding. - */ - public static OutputStream encode(OutputStream os, String encoding) - throws MessagingException { - if (encoding == null) - return os; - else if (encoding.equalsIgnoreCase("base64")) - return new BASE64EncoderStream(os); - else if (encoding.equalsIgnoreCase("quoted-printable")) - return new QPEncoderStream(os); - else if (encoding.equalsIgnoreCase("uuencode") || - encoding.equalsIgnoreCase("x-uuencode") || - encoding.equalsIgnoreCase("x-uue")) - return new UUEncoderStream(os); - else if (encoding.equalsIgnoreCase("binary") || - encoding.equalsIgnoreCase("7bit") || - encoding.equalsIgnoreCase("8bit")) - return os; - else - throw new MessagingException("Unknown encoding: " +encoding); - } - - /** - * Wrap an encoder around the given output stream. - * All the encodings defined in RFC 2045 are supported here. - * They include "base64", "quoted-printable", "7bit", "8bit" and - * "binary". In addition, "uuencode" is also supported. - * The filename parameter is used with the "uuencode" - * encoding and is included in the encoded output. - * - * @param os output stream - * @param encoding the encoding of the stream. - * @param filename name for the file being encoded (only used - * with uuencode) - * @return output stream that applies the - * specified encoding. - * @since JavaMail 1.2 - */ - public static OutputStream encode(OutputStream os, String encoding, - String filename) - throws MessagingException { - if (encoding == null) - return os; - else if (encoding.equalsIgnoreCase("base64")) - return new BASE64EncoderStream(os); - else if (encoding.equalsIgnoreCase("quoted-printable")) - return new QPEncoderStream(os); - else if (encoding.equalsIgnoreCase("uuencode") || - encoding.equalsIgnoreCase("x-uuencode") || - encoding.equalsIgnoreCase("x-uue")) - return new UUEncoderStream(os, filename); - else if (encoding.equalsIgnoreCase("binary") || - encoding.equalsIgnoreCase("7bit") || - encoding.equalsIgnoreCase("8bit")) - return os; - else - throw new MessagingException("Unknown encoding: " +encoding); - } - - /** - * Encode a RFC 822 "text" token into mail-safe form as per - * RFC 2047.

- * - * The given Unicode string is examined for non US-ASCII - * characters. If the string contains only US-ASCII characters, - * it is returned as-is. If the string contains non US-ASCII - * characters, it is first character-encoded using the platform's - * default charset, then transfer-encoded using either the B or - * Q encoding. The resulting bytes are then returned as a Unicode - * string containing only ASCII characters.

- * - * Note that this method should be used to encode only - * "unstructured" RFC 822 headers.

- * - * Example of usage: - *

-     *
-     *  MimeBodyPart part = ...
-     *  String rawvalue = "FooBar Mailer, Japanese version 1.1"
-     *  try {
-     *    // If we know for sure that rawvalue contains only US-ASCII
-     *    // characters, we can skip the encoding part
-     *    part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));
-     *  } catch (UnsupportedEncodingException e) {
-     *    // encoding failure
-     *  } catch (MessagingException me) {
-     *   // setHeader() failure
-     *  }
-     *
-     * 

- * - * @param text unicode string - * @return Unicode string containing only US-ASCII characters - * @exception UnsupportedEncodingException if the encoding fails - */ - public static String encodeText(String text) - throws UnsupportedEncodingException { - return encodeText(text, null, null); - } - - /** - * Encode a RFC 822 "text" token into mail-safe form as per - * RFC 2047.

- * - * The given Unicode string is examined for non US-ASCII - * characters. If the string contains only US-ASCII characters, - * it is returned as-is. If the string contains non US-ASCII - * characters, it is first character-encoded using the specified - * charset, then transfer-encoded using either the B or Q encoding. - * The resulting bytes are then returned as a Unicode string - * containing only ASCII characters.

- * - * Note that this method should be used to encode only - * "unstructured" RFC 822 headers. - * - * @param text the header value - * @param charset the charset. If this parameter is null, the - * platform's default chatset is used. - * @param encoding the encoding to be used. Currently supported - * values are "B" and "Q". If this parameter is null, then - * the "Q" encoding is used if most of characters to be - * encoded are in the ASCII charset, otherwise "B" encoding - * is used. - * @return Unicode string containing only US-ASCII characters - */ - public static String encodeText(String text, String charset, - String encoding) - throws UnsupportedEncodingException { - return encodeWord(text, charset, encoding, false); - } - - /** - * Decode "unstructured" headers, that is, headers that are defined - * as '*text' as per RFC 822.

- * - * The string is decoded using the algorithm specified in - * RFC 2047, Section 6.1.1. If the charset-conversion fails - * for any sequence, an UnsupportedEncodingException is thrown. - * If the String is not an RFC 2047 style encoded header, it is - * returned as-is

- * - * Example of usage: - *

-     *
-     *  MimeBodyPart part = ...
-     *  String rawvalue = null;
-     *  String  value = null;
-     *  try {
-     *    if ((rawvalue = part.getHeader("X-mailer")[0]) != null)
-     *      value = MimeUtility.decodeText(rawvalue);
-     *  } catch (UnsupportedEncodingException e) {
-     *      // Don't care
-     *      value = rawvalue;
-     *  } catch (MessagingException me) { }
-     *
-     *  return value;
-     *
-     * 

- * - * @param etext the possibly encoded value - * @exception UnsupportedEncodingException if the charset - * conversion failed. - */ - public static String decodeText(String etext) - throws UnsupportedEncodingException { - /* - * We look for sequences separated by "linear-white-space". - * (as per RFC 2047, Section 6.1.1) - * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL. - */ - String lwsp = " \t\n\r"; - StringTokenizer st; - - /* - * First, lets do a quick run thru the string and check - * whether the sequence "=?" exists at all. If none exists, - * we know there are no encoded-words in here and we can just - * return the string as-is, without suffering thru the later - * decoding logic. - * This handles the most common case of unencoded headers - * efficiently. - */ - if (etext.indexOf("=?") == -1) - return etext; - - // Encoded words found. Start decoding ... - - st = new StringTokenizer(etext, lwsp, true); - StringBuffer sb = new StringBuffer(); // decode buffer - StringBuffer wsb = new StringBuffer(); // white space buffer - boolean prevWasEncoded = false; - - while (st.hasMoreTokens()) { - char c; - String s = st.nextToken(); - // If whitespace, append it to the whitespace buffer - if (((c = s.charAt(0)) == ' ') || (c == '\t') || - (c == '\r') || (c == '\n')) - wsb.append(c); - else { - // Check if token is an 'encoded-word' .. - String word; - try { - word = decodeWord(s); - // Yes, this IS an 'encoded-word'. - if (!prevWasEncoded && wsb.length() > 0) { - // if the previous word was also encoded, we - // should ignore the collected whitespace. Else - // we include the whitespace as well. - sb.append(wsb); - } - prevWasEncoded = true; - } catch (ParseException pex) { - // This is NOT an 'encoded-word'. - word = s; - // possibly decode inner encoded words - if (!decodeStrict) - word = decodeInnerWords(word); - // include colleced whitespace .. - if (wsb.length() > 0) - sb.append(wsb); - prevWasEncoded = false; - } - sb.append(word); // append the actual word - wsb.setLength(0); // reset wsb for reuse - } - } - return sb.toString(); - } - - /** - * Encode a RFC 822 "word" token into mail-safe form as per - * RFC 2047.

- * - * The given Unicode string is examined for non US-ASCII - * characters. If the string contains only US-ASCII characters, - * it is returned as-is. If the string contains non US-ASCII - * characters, it is first character-encoded using the platform's - * default charset, then transfer-encoded using either the B or - * Q encoding. The resulting bytes are then returned as a Unicode - * string containing only ASCII characters.

- * - * This method is meant to be used when creating RFC 822 "phrases". - * The InternetAddress class, for example, uses this to encode - * it's 'phrase' component. - * - * @param text unicode string - * @return Array of Unicode strings containing only US-ASCII - * characters. - * @exception UnsupportedEncodingException if the encoding fails - */ - public static String encodeWord(String word) - throws UnsupportedEncodingException { - return encodeWord(word, null, null); - } - - /** - * Encode a RFC 822 "word" token into mail-safe form as per - * RFC 2047.

- * - * The given Unicode string is examined for non US-ASCII - * characters. If the string contains only US-ASCII characters, - * it is returned as-is. If the string contains non US-ASCII - * characters, it is first character-encoded using the specified - * charset, then transfer-encoded using either the B or Q encoding. - * The resulting bytes are then returned as a Unicode string - * containing only ASCII characters.

- * - * @param text unicode string - * @param charset the MIME charset - * @param encoding the encoding to be used. Currently supported - * values are "B" and "Q". If this parameter is null, then - * the "Q" encoding is used if most of characters to be - * encoded are in the ASCII charset, otherwise "B" encoding - * is used. - * @return Unicode string containing only US-ASCII characters - * @exception UnsupportedEncodingException if the encoding fails - */ - public static String encodeWord(String word, String charset, - String encoding) - throws UnsupportedEncodingException { - return encodeWord(word, charset, encoding, true); - } - - /* - * Encode the given string. The parameter 'encodingWord' should - * be true if a RFC 822 "word" token is being encoded and false if a - * RFC 822 "text" token is being encoded. This is because the - * "Q" encoding defined in RFC 2047 has more restrictions when - * encoding "word" tokens. (Sigh) - */ - private static String encodeWord(String string, String charset, - String encoding, boolean encodingWord) - throws UnsupportedEncodingException { - - // If 'string' contains only US-ASCII characters, just - // return it. - int ascii = checkAscii(string); - if (ascii == ALL_ASCII) - return string; - - // Else, apply the specified charset conversion. - String jcharset; - if (charset == null) { // use default charset - jcharset = getDefaultJavaCharset(); // the java charset - charset = getDefaultMIMECharset(); // the MIME equivalent - } else // MIME charset -> java charset - jcharset = javaCharset(charset); - - // If no transfer-encoding is specified, figure one out. - if (encoding == null) { - if (ascii != MOSTLY_NONASCII) - encoding = "Q"; - else - encoding = "B"; - } - - boolean b64; - if (encoding.equalsIgnoreCase("B")) - b64 = true; - else if (encoding.equalsIgnoreCase("Q")) - b64 = false; - else - throw new UnsupportedEncodingException( - "Unknown transfer encoding: " + encoding); - - StringBuffer outb = new StringBuffer(); // the output buffer - doEncode(string, b64, jcharset, - // As per RFC 2047, size of an encoded string should not - // exceed 75 bytes. - // 7 = size of "=?", '?', 'B'/'Q', '?', "?=" - 75 - 7 - charset.length(), // the available space - "=?" + charset + "?" + encoding + "?", // prefix - true, encodingWord, outb); - - return outb.toString(); - } - - private static void doEncode(String string, boolean b64, - String jcharset, int avail, String prefix, - boolean first, boolean encodingWord, StringBuffer buf) - throws UnsupportedEncodingException { - - // First find out what the length of the encoded version of - // 'string' would be. - byte[] bytes = string.getBytes(jcharset); - int len; - if (b64) // "B" encoding - len = BEncoderStream.encodedLength(bytes); - else // "Q" - len = QEncoderStream.encodedLength(bytes, encodingWord); - - int size; - if ((len > avail) && ((size = string.length()) > 1)) { - // If the length is greater than 'avail', split 'string' - // into two and recurse. - doEncode(string.substring(0, size/2), b64, jcharset, - avail, prefix, first, encodingWord, buf); - doEncode(string.substring(size/2, size), b64, jcharset, - avail, prefix, false, encodingWord, buf); - } else { - // length <= than 'avail'. Encode the given string - ByteArrayOutputStream os = new ByteArrayOutputStream(BUFFER_SIZE); - OutputStream eos; // the encoder - if (b64) // "B" encoding - eos = new BEncoderStream(os); - else // "Q" encoding - eos = new QEncoderStream(os, encodingWord); - - try { // do the encoding - eos.write(bytes); - eos.close(); - } catch (IOException ioex) { } - - byte[] encodedBytes = os.toByteArray(); // the encoded stuff - // Now write out the encoded (all ASCII) bytes into our - // StringBuffer - if (!first) // not the first line of this sequence - if (foldEncodedWords) - buf.append("\r\n "); // start a continuation line - else - buf.append(" "); // line will be folded later - - buf.append(prefix); - for (int i = 0; i < encodedBytes.length; i++) - buf.append((char)encodedBytes[i]); - buf.append("?="); // terminate the current sequence - } - } - - /** - * The string is parsed using the rules in RFC 2047 for parsing - * an "encoded-word". If the parse fails, a ParseException is - * thrown. Otherwise, it is transfer-decoded, and then - * charset-converted into Unicode. If the charset-conversion - * fails, an UnsupportedEncodingException is thrown.

- * - * @param eword the possibly encoded value - * @exception ParseException if the string is not an - * encoded-word as per RFC 2047. - * @exception UnsupportedEncodingException if the charset - * conversion failed. - */ - public static String decodeWord(String eword) - throws ParseException, UnsupportedEncodingException { - - if (!eword.startsWith("=?")) // not an encoded word - throw new ParseException(); - - // get charset - int start = 2; int pos; - if ((pos = eword.indexOf('?', start)) == -1) - throw new ParseException(); - String charset = javaCharset(eword.substring(start, pos)); - - // get encoding - start = pos+1; - if ((pos = eword.indexOf('?', start)) == -1) - throw new ParseException(); - String encoding = eword.substring(start, pos); - - // get encoded-sequence - start = pos+1; - if ((pos = eword.indexOf("?=", start)) == -1) - throw new ParseException(); - String word = eword.substring(start, pos); - - try { - // Extract the bytes from word - ByteArrayInputStream bis = - new ByteArrayInputStream(ASCIIUtility.getBytes(word)); - - // Get the appropriate decoder - InputStream is; - if (encoding.equalsIgnoreCase("B")) - is = new BASE64DecoderStream(bis); - else if (encoding.equalsIgnoreCase("Q")) - is = new QDecoderStream(bis); - else - throw new UnsupportedEncodingException( - "unknown encoding: " + encoding); - - // For b64 & q, size of decoded word <= size of word. So - // the decoded bytes must fit into the 'bytes' array. This - // is certainly more efficient than writing bytes into a - // ByteArrayOutputStream and then pulling out the byte[] - // from it. - int count = bis.available(); - byte[] bytes = new byte[count]; - // count is set to the actual number of decoded bytes - count = is.read(bytes, 0, count); - - // Finally, convert the decoded bytes into a String using - // the specified charset - String s = new String(bytes, 0, count, charset); - if (pos + 2 < eword.length()) { - // there's still more text in the string - String rest = eword.substring(pos + 2); - if (!decodeStrict) - rest = decodeInnerWords(rest); - s += rest; - } - return s; - } catch (UnsupportedEncodingException uex) { - // explicitly catch and rethrow this exception, otherwise - // the below IOException catch will swallow this up! - throw uex; - } catch (IOException ioex) { - // Shouldn't happen. - throw new ParseException(); - } catch (IllegalArgumentException iex) { - /* An unknown charset of the form ISO-XXX-XXX, will cause - * the JDK to throw an IllegalArgumentException ... Since the - * JDK will attempt to create a classname using this string, - * but valid classnames must not contain the character '-', - * and this results in an IllegalArgumentException, rather than - * the expected UnsupportedEncodingException. Yikes - */ - throw new UnsupportedEncodingException(); - } - } - - /** - * Look for encoded words within a word. The MIME spec doesn't - * allow this, but many broken mailers, especially Japanese mailers, - * produce such incorrect encodings. - */ - private static String decodeInnerWords(String word) - throws UnsupportedEncodingException { - int start = 0, i; - StringBuffer buf = new StringBuffer(); - while ((i = word.indexOf("=?", start)) >= 0) { - buf.append(word.substring(start, i)); - int end = word.indexOf("?=", i); - if (end < 0) - break; - String s = word.substring(i, end + 2); - try { - s = decodeWord(s); - } catch (ParseException pex) { - // ignore it, just use the original string - } - buf.append(s); - start = end + 2; - } - if (start == 0) - return word; - if (start < word.length()) - buf.append(word.substring(start)); - return buf.toString(); - } - - /** - * A utility method to quote a word, if the word contains any - * characters from the specified 'specials' list.

- * - * The HeaderTokenizer class defines two special - * sets of delimiters - MIME and RFC 822.

- * - * This method is typically used during the generation of - * RFC 822 and MIME header fields. - * - * @param word word to be quoted - * @param specials the set of special characters - * @return the possibly quoted word - * @see javax.mail.internet.HeaderTokenizer#MIME - * @see javax.mail.internet.HeaderTokenizer#RFC822 - */ - public static String quote(String word, String specials) { - int len = word.length(); - - /* - * Look for any "bad" characters, Escape and - * quote the entire string if necessary. - */ - boolean needQuoting = false; - for (int i = 0; i < len; i++) { - char c = word.charAt(i); - if (c == '"' || c == '\\' || c == '\r' || c == '\n') { - // need to escape them and then quote the whole string - StringBuffer sb = new StringBuffer(len + 3); - sb.append('"'); - sb.append(word.substring(0, i)); - int lastc = 0; - for (int j = i; j < len; j++) { - char cc = word.charAt(j); - if ((cc == '"') || (cc == '\\') || - (cc == '\r') || (cc == '\n')) - if (cc == '\n' && lastc == '\r') - ; // do nothing, CR was already escaped - else - sb.append('\\'); // Escape the character - sb.append(cc); - lastc = cc; - } - sb.append('"'); - return sb.toString(); - } else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0) - // These characters cause the string to be quoted - needQuoting = true; - } - - if (needQuoting) { - StringBuffer sb = new StringBuffer(len + 2); - sb.append('"').append(word).append('"'); - return sb.toString(); - } else - return word; - } - - /** - * Fold a string at linear whitespace so that each line is no longer - * than 76 characters, if possible. If there are more than 76 - * non-whitespace characters consecutively, the string is folded at - * the first whitespace after that sequence. The parameter - * used indicates how many characters have been used in - * the current line; it is usually the length of the header name.

- * - * Note that line breaks in the string aren't escaped; they probably - * should be. - * - * @param used characters used in line so far - * @param s the string to fold - * @return the folded string - */ - /*public*/ static String fold(int used, String s) { - if (!foldText) - return s; - - int end; - char c; - // Strip trailing spaces - for (end = s.length() - 1; end >= 0; end--) { - c = s.charAt(end); - if (c != ' ' && c != '\t') - break; - } - if (end != s.length() - 1) - s = s.substring(0, end + 1); - - // if the string fits now, just return it - if (used + s.length() <= 76) - return s; - - // have to actually fold the string - StringBuffer sb = new StringBuffer(s.length() + 4); - char lastc = 0; - while (used + s.length() > 76) { - int lastspace = -1; - for (int i = 0; i < s.length(); i++) { - if (lastspace != -1 && used + i > 76) - break; - c = s.charAt(i); - if (c == ' ' || c == '\t') - if (!(lastc == ' ' || lastc == '\t')) - lastspace = i; - lastc = c; - } - if (lastspace == -1) { - // no space, use the whole thing - sb.append(s); - s = ""; - used = 0; - break; - } - sb.append(s.substring(0, lastspace)); - sb.append("\r\n"); - lastc = s.charAt(lastspace); - sb.append(lastc); - s = s.substring(lastspace + 1); - used = 1; - } - sb.append(s); - return sb.toString(); - } - - /** - * Unfold a folded header. Any line breaks that aren't escaped and - * are followed by whitespace are removed. - * - * @param s the string to unfold - * @return the unfolded string - */ - /*public*/ static String unfold(String s) { - if (!foldText) - return s; - - StringBuffer sb = null; - int i; - while ((i = indexOfAny(s, "\r\n")) >= 0) { - int start = i; - int l = s.length(); - i++; // skip CR or NL - if (i < l && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n') - i++; // skip LF - if (start == 0 || s.charAt(start - 1) != '\\') { - char c; - // if next line starts with whitespace, skip all of it - // XXX - always has to be true? - if (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) { - i++; // skip whitespace - while (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) - i++; - if (sb == null) - sb = new StringBuffer(s.length()); - if (start != 0) { - sb.append(s.substring(0, start)); - sb.append(' '); - } - s = s.substring(i); - continue; - } - // it's not a continuation line, just leave it in - if (sb == null) - sb = new StringBuffer(s.length()); - sb.append(s.substring(0, i)); - s = s.substring(i); - } else { - // there's a backslash at "start - 1" - // strip it out, but leave in the line break - if (sb == null) - sb = new StringBuffer(s.length()); - sb.append(s.substring(0, start - 1)); - sb.append(s.substring(start, i)); - s = s.substring(i); - } - } - if (sb != null) { - sb.append(s); - return sb.toString(); - } else - return s; - } - - /** - * Return the first index of any of the characters in "any" in "s", - * or -1 if none are found. - * - * This should be a method on String. - */ - private static int indexOfAny(String s, String any) { - return indexOfAny(s, any, 0); - } - - private static int indexOfAny(String s, String any, int start) { - try { - int len = s.length(); - for (int i = start; i < len; i++) { - if (any.indexOf(s.charAt(i)) >= 0) - return i; - } - return -1; - } catch (StringIndexOutOfBoundsException e) { - return -1; - } - } - - /** - * Convert a MIME charset name into a valid Java charset name.

- * - * @param charset the MIME charset name - * @return the Java charset equivalent. If a suitable mapping is - * not available, the passed in charset is itself returned. - */ - public static String javaCharset(String charset) { - if (mime2java == null || charset == null) - // no mapping table, or charset parameter is null - return charset; - - String alias = (String)mime2java.get(charset.toLowerCase()); - return alias == null ? charset : alias; - } - - /** - * Convert a java charset into its MIME charset name.

- * - * Note that a future version of JDK (post 1.2) might provide - * this functionality, in which case, we may deprecate this - * method then. - * - * @param charset the JDK charset - * @return the MIME/IANA equivalent. If a mapping - * is not possible, the passed in charset itself - * is returned. - * @since JavaMail 1.1 - */ - public static String mimeCharset(String charset) { - if (java2mime == null || charset == null) - // no mapping table or charset param is null - return charset; - - String alias = (String)java2mime.get(charset.toLowerCase()); - return alias == null ? charset : alias; - } - - private static String defaultJavaCharset; - private static String defaultMIMECharset; - - /** - * Get the default charset corresponding to the system's current - * default locale. If the System property mail.mime.charset - * is set, a system charset corresponding to this MIME charset will be - * returned.

- * - * @return the default charset of the system's default locale, - * as a Java charset. (NOT a MIME charset) - * @since JavaMail 1.1 - */ - public static String getDefaultJavaCharset() { - if (defaultJavaCharset == null) { - /* - * If mail.mime.charset is set, it controls the default - * Java charset as well. - */ - String mimecs = null; - - mimecs = SAAJUtil.getSystemProperty("mail.mime.charset"); - - if (mimecs != null && mimecs.length() > 0) { - defaultJavaCharset = javaCharset(mimecs); - return defaultJavaCharset; - } - - try { - defaultJavaCharset = System.getProperty("file.encoding", - "8859_1"); - } catch (SecurityException sex) { - - class NullInputStream extends InputStream { - public int read() { - return 0; - } - } - InputStreamReader reader = - new InputStreamReader(new NullInputStream()); - defaultJavaCharset = reader.getEncoding(); - if (defaultJavaCharset == null) - defaultJavaCharset = "8859_1"; - } - } - - return defaultJavaCharset; - } - - /* - * Get the default MIME charset for this locale. - */ - static String getDefaultMIMECharset() { - if (defaultMIMECharset == null) { - defaultMIMECharset = SAAJUtil.getSystemProperty("mail.mime.charset"); - } - if (defaultMIMECharset == null) - defaultMIMECharset = mimeCharset(getDefaultJavaCharset()); - return defaultMIMECharset; - } - - // Tables to map MIME charset names to Java names and vice versa. - // XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset - private static Hashtable mime2java; - private static Hashtable java2mime; - - static { - java2mime = new Hashtable(40); - mime2java = new Hashtable(10); - - try { - // Use this class's classloader to load the mapping file - // XXX - we should use SecuritySupport, but it's in another package - InputStream is = - com.sun.xml.internal.messaging.saaj.packaging.mime.internet.MimeUtility.class.getResourceAsStream( - "/META-INF/javamail.charset.map"); - - if (is != null) { - is = new LineInputStream(is); - - // Load the JDK-to-MIME charset mapping table - loadMappings((LineInputStream)is, java2mime); - - // Load the MIME-to-JDK charset mapping table - loadMappings((LineInputStream)is, mime2java); - } - } catch (Exception ex) { } - - // If we didn't load the tables, e.g., because we didn't have - // permission, load them manually. The entries here should be - // the same as the default javamail.charset.map. - if (java2mime.isEmpty()) { - java2mime.put("8859_1", "ISO-8859-1"); - java2mime.put("iso8859_1", "ISO-8859-1"); - java2mime.put("ISO8859-1", "ISO-8859-1"); - - java2mime.put("8859_2", "ISO-8859-2"); - java2mime.put("iso8859_2", "ISO-8859-2"); - java2mime.put("ISO8859-2", "ISO-8859-2"); - - java2mime.put("8859_3", "ISO-8859-3"); - java2mime.put("iso8859_3", "ISO-8859-3"); - java2mime.put("ISO8859-3", "ISO-8859-3"); - - java2mime.put("8859_4", "ISO-8859-4"); - java2mime.put("iso8859_4", "ISO-8859-4"); - java2mime.put("ISO8859-4", "ISO-8859-4"); - - java2mime.put("8859_5", "ISO-8859-5"); - java2mime.put("iso8859_5", "ISO-8859-5"); - java2mime.put("ISO8859-5", "ISO-8859-5"); - - java2mime.put("8859_6", "ISO-8859-6"); - java2mime.put("iso8859_6", "ISO-8859-6"); - java2mime.put("ISO8859-6", "ISO-8859-6"); - - java2mime.put("8859_7", "ISO-8859-7"); - java2mime.put("iso8859_7", "ISO-8859-7"); - java2mime.put("ISO8859-7", "ISO-8859-7"); - - java2mime.put("8859_8", "ISO-8859-8"); - java2mime.put("iso8859_8", "ISO-8859-8"); - java2mime.put("ISO8859-8", "ISO-8859-8"); - - java2mime.put("8859_9", "ISO-8859-9"); - java2mime.put("iso8859_9", "ISO-8859-9"); - java2mime.put("ISO8859-9", "ISO-8859-9"); - - java2mime.put("SJIS", "Shift_JIS"); - java2mime.put("MS932", "Shift_JIS"); - java2mime.put("JIS", "ISO-2022-JP"); - java2mime.put("ISO2022JP", "ISO-2022-JP"); - java2mime.put("EUC_JP", "euc-jp"); - java2mime.put("KOI8_R", "koi8-r"); - java2mime.put("EUC_CN", "euc-cn"); - java2mime.put("EUC_TW", "euc-tw"); - java2mime.put("EUC_KR", "euc-kr"); - } - if (mime2java.isEmpty()) { - mime2java.put("iso-2022-cn", "ISO2022CN"); - mime2java.put("iso-2022-kr", "ISO2022KR"); - mime2java.put("utf-8", "UTF8"); - mime2java.put("utf8", "UTF8"); - mime2java.put("ja_jp.iso2022-7", "ISO2022JP"); - mime2java.put("ja_jp.eucjp", "EUCJIS"); - mime2java.put("euc-kr", "KSC5601"); - mime2java.put("euckr", "KSC5601"); - mime2java.put("us-ascii", "ISO-8859-1"); - mime2java.put("x-us-ascii", "ISO-8859-1"); - } - } - - private static void loadMappings(LineInputStream is, Hashtable table) { - String currLine; - - while (true) { - try { - currLine = is.readLine(); - } catch (IOException ioex) { - break; // error in reading, stop - } - - if (currLine == null) // end of file, stop - break; - if (currLine.startsWith("--") && currLine.endsWith("--")) - // end of this table - break; - - // ignore empty lines and comments - if (currLine.trim().length() == 0 || currLine.startsWith("#")) - continue; - - // A valid entry is of the form - // where, := SPACE | HT. Parse this - StringTokenizer tk = new StringTokenizer(currLine, " \t"); - try { - String key = tk.nextToken(); - String value = tk.nextToken(); - table.put(key.toLowerCase(), value); - } catch (NoSuchElementException nex) { } - } - } - - static final int ALL_ASCII = 1; - static final int MOSTLY_ASCII = 2; - static final int MOSTLY_NONASCII = 3; - - /** - * Check if the given string contains non US-ASCII characters. - * @param s string - * @return ALL_ASCII if all characters in the string - * belong to the US-ASCII charset. MOSTLY_ASCII - * if more than half of the available characters - * are US-ASCII characters. Else MOSTLY_NONASCII. - */ - static int checkAscii(String s) { - int ascii = 0, non_ascii = 0; - int l = s.length(); - - for (int i = 0; i < l; i++) { - if (nonascii((int)s.charAt(i))) // non-ascii - non_ascii++; - else - ascii++; - } - - if (non_ascii == 0) - return ALL_ASCII; - if (ascii > non_ascii) - return MOSTLY_ASCII; - - return MOSTLY_NONASCII; - } - - /** - * Check if the given byte array contains non US-ASCII characters. - * @param b byte array - * @return ALL_ASCII if all characters in the string - * belong to the US-ASCII charset. MOSTLY_ASCII - * if more than half of the available characters - * are US-ASCII characters. Else MOSTLY_NONASCII. - * - * XXX - this method is no longer used - */ - static int checkAscii(byte[] b) { - int ascii = 0, non_ascii = 0; - - for (int i=0; i < b.length; i++) { - // The '&' operator automatically causes b[i] to be promoted - // to an int, and we mask out the higher bytes in the int - // so that the resulting value is not a negative integer. - if (nonascii(b[i] & 0xff)) // non-ascii - non_ascii++; - else - ascii++; - } - - if (non_ascii == 0) - return ALL_ASCII; - if (ascii > non_ascii) - return MOSTLY_ASCII; - - return MOSTLY_NONASCII; - } - - /** - * Check if the given input stream contains non US-ASCII characters. - * Upto max bytes are checked. If max is - * set to ALL, then all the bytes available in this - * input stream are checked. If breakOnNonAscii is true - * the check terminates when the first non-US-ASCII character is - * found and MOSTLY_NONASCII is returned. Else, the check continues - * till max bytes or till the end of stream. - * - * @param is the input stream - * @param max maximum bytes to check for. The special value - * ALL indicates that all the bytes in this input - * stream must be checked. - * @param breakOnNonAscii if true, then terminate the - * the check when the first non-US-ASCII character - * is found. - * @return ALL_ASCII if all characters in the string - * belong to the US-ASCII charset. MOSTLY_ASCII - * if more than half of the available characters - * are US-ASCII characters. Else MOSTLY_NONASCII. - */ - static int checkAscii(InputStream is, int max, boolean breakOnNonAscii) { - int ascii = 0, non_ascii = 0; - int len; - int block = 4096; - int linelen = 0; - boolean longLine = false, badEOL = false; - boolean checkEOL = encodeEolStrict && breakOnNonAscii; - byte buf[] = null; - if (max != 0) { - block = (max == ALL) ? 4096 : Math.min(max, 4096); - buf = new byte[block]; - } - while (max != 0) { - try { - if ((len = is.read(buf, 0, block)) == -1) - break; - int lastb = 0; - for (int i = 0; i < len; i++) { - // The '&' operator automatically causes b[i] to - // be promoted to an int, and we mask out the higher - // bytes in the int so that the resulting value is - // not a negative integer. - int b = buf[i] & 0xff; - if (checkEOL && - ((lastb == '\r' && b != '\n') || - (lastb != '\r' && b == '\n'))) - badEOL = true; - if (b == '\r' || b == '\n') - linelen = 0; - else { - linelen++; - if (linelen > 998) // 1000 - CRLF - longLine = true; - } - if (nonascii(b)) { // non-ascii - if (breakOnNonAscii) // we are done - return MOSTLY_NONASCII; - else - non_ascii++; - } else - ascii++; - lastb = b; - } - } catch (IOException ioex) { - break; - } - if (max != ALL) - max -= len; - } - - if (max == 0 && breakOnNonAscii) - // We have been told to break on the first non-ascii character. - // We haven't got any non-ascii character yet, but then we - // have not checked all of the available bytes either. So we - // cannot say for sure that this input stream is ALL_ASCII, - // and hence we must play safe and return MOSTLY_NONASCII - - return MOSTLY_NONASCII; - - if (non_ascii == 0) { // no non-us-ascii characters so far - // If we're looking at non-text data, and we saw CR without LF - // or vice versa, consider this mostly non-ASCII so that it - // will be base64 encoded (since the quoted-printable encoder - // doesn't encode this case properly). - if (badEOL) - return MOSTLY_NONASCII; - // if we've seen a long line, we degrade to mostly ascii - else if (longLine) - return MOSTLY_ASCII; - else - return ALL_ASCII; - } - if (ascii > non_ascii) // mostly ascii - return MOSTLY_ASCII; - return MOSTLY_NONASCII; - } - - static final boolean nonascii(int b) { - return b >= 0177 || (b < 040 && b != '\r' && b != '\n' && b != '\t'); - } -} - -/** - * An OutputStream that determines whether the data written to - * it is all ASCII, mostly ASCII, or mostly non-ASCII. - */ -class AsciiOutputStream extends OutputStream { - private boolean breakOnNonAscii; - private int ascii = 0, non_ascii = 0; - private int linelen = 0; - private boolean longLine = false; - private boolean badEOL = false; - private boolean checkEOL = false; - private int lastb = 0; - private int ret = 0; - - public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) { - this.breakOnNonAscii = breakOnNonAscii; - checkEOL = encodeEolStrict && breakOnNonAscii; - } - - public void write(int b) throws IOException { - check(b); - } - - public void write(byte b[]) throws IOException { - write(b, 0, b.length); - } - - public void write(byte b[], int off, int len) throws IOException { - len += off; - for (int i = off; i < len ; i++) - check(b[i]); - } - - private final void check(int b) throws IOException { - b &= 0xff; - if (checkEOL && - ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n'))) - badEOL = true; - if (b == '\r' || b == '\n') - linelen = 0; - else { - linelen++; - if (linelen > 998) // 1000 - CRLF - longLine = true; - } - if (MimeUtility.nonascii(b)) { // non-ascii - non_ascii++; - if (breakOnNonAscii) { // we are done - ret = MimeUtility.MOSTLY_NONASCII; - throw new EOFException(); - } - } else - ascii++; - lastb = b; - } - - /** - * Return ASCII-ness of data stream. - */ - public int getAscii() { - if (ret != 0) - return ret; - // If we're looking at non-text data, and we saw CR without LF - // or vice versa, consider this mostly non-ASCII so that it - // will be base64 encoded (since the quoted-printable encoder - // doesn't encode this case properly). - if (badEOL) - return MimeUtility.MOSTLY_NONASCII; - else if (non_ascii == 0) { // no non-us-ascii characters so far - // if we've seen a long line, we degrade to mostly ascii - if (longLine) - return MimeUtility.MOSTLY_ASCII; - else - return MimeUtility.ALL_ASCII; - } - if (ascii > non_ascii) // mostly ascii - return MimeUtility.MOSTLY_ASCII; - return MimeUtility.MOSTLY_NONASCII; - } -}