jaxp/src/share/classes/com/sun/org/apache/xml/internal/serializer/Encodings.java
changeset 12005 a754d69d5e60
equal deleted inserted replaced
11943:16ba58282d11 12005:a754d69d5e60
       
     1 /*
       
     2  * reserved comment block
       
     3  * DO NOT REMOVE OR ALTER!
       
     4  */
       
     5 /*
       
     6  * Copyright 1999-2004 The Apache Software Foundation.
       
     7  *
       
     8  * Licensed under the Apache License, Version 2.0 (the "License");
       
     9  * you may not use this file except in compliance with the License.
       
    10  * You may obtain a copy of the License at
       
    11  *
       
    12  *     http://www.apache.org/licenses/LICENSE-2.0
       
    13  *
       
    14  * Unless required by applicable law or agreed to in writing, software
       
    15  * distributed under the License is distributed on an "AS IS" BASIS,
       
    16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
       
    17  * See the License for the specific language governing permissions and
       
    18  * limitations under the License.
       
    19  */
       
    20 /*
       
    21  * $Id: Encodings.java,v 1.3 2005/09/28 13:49:04 pvedula Exp $
       
    22  */
       
    23 package com.sun.org.apache.xml.internal.serializer;
       
    24 
       
    25 import java.io.InputStream;
       
    26 import java.io.OutputStream;
       
    27 import java.io.OutputStreamWriter;
       
    28 import java.io.UnsupportedEncodingException;
       
    29 import java.io.Writer;
       
    30 import java.io.BufferedWriter;
       
    31 import java.net.URL;
       
    32 import java.util.Enumeration;
       
    33 import java.util.HashMap;
       
    34 import java.util.Properties;
       
    35 import java.util.StringTokenizer;
       
    36 
       
    37 
       
    38 /**
       
    39  * Provides information about encodings. Depends on the Java runtime
       
    40  * to provides writers for the different encodings, but can be used
       
    41  * to override encoding names and provide the last printable character
       
    42  * for each encoding.
       
    43  *
       
    44  * @version $Revision: 1.9 $ $Date: 2009/12/01 22:17:31 $
       
    45  * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
       
    46  */
       
    47 
       
    48 public final class Encodings extends Object
       
    49 {
       
    50 
       
    51     /**
       
    52      * The last printable character for unknown encodings.
       
    53      */
       
    54     private static final int m_defaultLastPrintable = 0x7F;
       
    55 
       
    56     /**
       
    57      * Standard filename for properties file with encodings data.
       
    58      */
       
    59     private static final String ENCODINGS_FILE = "com/sun/org/apache/xml/internal/serializer/Encodings.properties";
       
    60 
       
    61     /**
       
    62      * Standard filename for properties file with encodings data.
       
    63      */
       
    64     private static final String ENCODINGS_PROP = "com.sun.org.apache.xalan.internal.serialize.encodings";
       
    65 
       
    66 
       
    67     /**
       
    68      * Returns a writer for the specified encoding based on
       
    69      * an output stream.
       
    70      *
       
    71      * @param output The output stream
       
    72      * @param encoding The encoding
       
    73      * @return A suitable writer
       
    74      * @throws UnsupportedEncodingException There is no convertor
       
    75      *  to support this encoding
       
    76      */
       
    77     static Writer getWriter(OutputStream output, String encoding)
       
    78         throws UnsupportedEncodingException
       
    79     {
       
    80 
       
    81         for (int i = 0; i < _encodings.length; ++i)
       
    82         {
       
    83             if (_encodings[i].name.equalsIgnoreCase(encoding))
       
    84             {
       
    85                 try
       
    86                 {
       
    87                     return new BufferedWriter(new OutputStreamWriter(
       
    88                         output,
       
    89                         _encodings[i].javaName));
       
    90                 }
       
    91                 catch (java.lang.IllegalArgumentException iae) // java 1.1.8
       
    92                 {
       
    93                     // keep trying
       
    94                 }
       
    95                 catch (UnsupportedEncodingException usee)
       
    96                 {
       
    97 
       
    98                     // keep trying
       
    99                 }
       
   100             }
       
   101         }
       
   102 
       
   103         try
       
   104         {
       
   105             return new BufferedWriter(new OutputStreamWriter(output, encoding));
       
   106         }
       
   107         catch (java.lang.IllegalArgumentException iae) // java 1.1.8
       
   108         {
       
   109             throw new UnsupportedEncodingException(encoding);
       
   110         }
       
   111     }
       
   112 
       
   113 
       
   114     /**
       
   115      * Returns the last printable character for an unspecified
       
   116      * encoding.
       
   117      *
       
   118      * @return the default size
       
   119      */
       
   120     public static int getLastPrintable()
       
   121     {
       
   122         return m_defaultLastPrintable;
       
   123     }
       
   124 
       
   125 
       
   126 
       
   127     /**
       
   128      * Returns the EncodingInfo object for the specified
       
   129      * encoding.
       
   130      * <p>
       
   131      * This is not a public API.
       
   132      *
       
   133      * @param encoding The encoding
       
   134      * @return The object that is used to determine if
       
   135      * characters are in the given encoding.
       
   136      * @xsl.usage internal
       
   137      */
       
   138     static EncodingInfo getEncodingInfo(String encoding)
       
   139     {
       
   140         EncodingInfo ei;
       
   141 
       
   142         String normalizedEncoding = toUpperCaseFast(encoding);
       
   143         ei = (EncodingInfo) _encodingTableKeyJava.get(normalizedEncoding);
       
   144         if (ei == null)
       
   145             ei = (EncodingInfo) _encodingTableKeyMime.get(normalizedEncoding);
       
   146         if (ei == null) {
       
   147             // We shouldn't have to do this, but just in case.
       
   148             ei = new EncodingInfo(null,null);
       
   149         }
       
   150 
       
   151         return ei;
       
   152     }
       
   153 
       
   154     /**
       
   155      * A fast and cheap way to uppercase a String that is
       
   156      * only made of printable ASCII characters.
       
   157      * <p>
       
   158      * This is not a public API.
       
   159      * @param s a String of ASCII characters
       
   160      * @return an uppercased version of the input String,
       
   161      * possibly the same String.
       
   162      * @xsl.usage internal
       
   163      */
       
   164     static private String toUpperCaseFast(final String s) {
       
   165 
       
   166         boolean different = false;
       
   167         final int mx = s.length();
       
   168                 char[] chars = new char[mx];
       
   169         for (int i=0; i < mx; i++) {
       
   170                 char ch = s.charAt(i);
       
   171             // is the character a lower case ASCII one?
       
   172                 if ('a' <= ch && ch <= 'z') {
       
   173                 // a cheap and fast way to uppercase that is good enough
       
   174                         ch = (char) (ch + ('A' - 'a'));
       
   175                         different = true; // the uppercased String is different
       
   176                 }
       
   177                 chars[i] = ch;
       
   178         }
       
   179 
       
   180         // A little optimization, don't call String.valueOf() if
       
   181         // the uppercased string is the same as the input string.
       
   182         final String upper;
       
   183         if (different)
       
   184                 upper = String.valueOf(chars);
       
   185         else
       
   186                 upper = s;
       
   187 
       
   188         return upper;
       
   189     }
       
   190 
       
   191     /** The default encoding, ISO style, ISO style.   */
       
   192     static final String DEFAULT_MIME_ENCODING = "UTF-8";
       
   193 
       
   194     /**
       
   195      * Get the proper mime encoding.  From the XSLT recommendation: "The encoding
       
   196      * attribute specifies the preferred encoding to use for outputting the result
       
   197      * tree. XSLT processors are required to respect values of UTF-8 and UTF-16.
       
   198      * For other values, if the XSLT processor does not support the specified
       
   199      * encoding it may signal an error; if it does not signal an error it should
       
   200      * use UTF-8 or UTF-16 instead. The XSLT processor must not use an encoding
       
   201      * whose name does not match the EncName production of the XML Recommendation
       
   202      * [XML]. If no encoding attribute is specified, then the XSLT processor should
       
   203      * use either UTF-8 or UTF-16."
       
   204      *
       
   205      * @param encoding Reference to java-style encoding string, which may be null,
       
   206      * in which case a default will be found.
       
   207      *
       
   208      * @return The ISO-style encoding string, or null if failure.
       
   209      */
       
   210     static String getMimeEncoding(String encoding)
       
   211     {
       
   212 
       
   213         if (null == encoding)
       
   214         {
       
   215             try
       
   216             {
       
   217 
       
   218                 // Get the default system character encoding.  This may be
       
   219                 // incorrect if they passed in a writer, but right now there
       
   220                 // seems to be no way to get the encoding from a writer.
       
   221                 encoding = System.getProperty("file.encoding", "UTF8");
       
   222 
       
   223                 if (null != encoding)
       
   224                 {
       
   225 
       
   226                     /*
       
   227                     * See if the mime type is equal to UTF8.  If you don't
       
   228                     * do that, then  convertJava2MimeEncoding will convert
       
   229                     * 8859_1 to "ISO-8859-1", which is not what we want,
       
   230                     * I think, and I don't think I want to alter the tables
       
   231                     * to convert everything to UTF-8.
       
   232                     */
       
   233                     String jencoding =
       
   234                         (encoding.equalsIgnoreCase("Cp1252")
       
   235                             || encoding.equalsIgnoreCase("ISO8859_1")
       
   236                             || encoding.equalsIgnoreCase("8859_1")
       
   237                             || encoding.equalsIgnoreCase("UTF8"))
       
   238                             ? DEFAULT_MIME_ENCODING
       
   239                             : convertJava2MimeEncoding(encoding);
       
   240 
       
   241                     encoding =
       
   242                         (null != jencoding) ? jencoding : DEFAULT_MIME_ENCODING;
       
   243                 }
       
   244                 else
       
   245                 {
       
   246                     encoding = DEFAULT_MIME_ENCODING;
       
   247                 }
       
   248             }
       
   249             catch (SecurityException se)
       
   250             {
       
   251                 encoding = DEFAULT_MIME_ENCODING;
       
   252             }
       
   253         }
       
   254         else
       
   255         {
       
   256             encoding = convertJava2MimeEncoding(encoding);
       
   257         }
       
   258 
       
   259         return encoding;
       
   260     }
       
   261 
       
   262     /**
       
   263      * Try the best we can to convert a Java encoding to a XML-style encoding.
       
   264      *
       
   265      * @param encoding non-null reference to encoding string, java style.
       
   266      *
       
   267      * @return ISO-style encoding string.
       
   268      */
       
   269     private static String convertJava2MimeEncoding(String encoding)
       
   270     {
       
   271         EncodingInfo enc =
       
   272             (EncodingInfo) _encodingTableKeyJava.get(encoding.toUpperCase());
       
   273         if (null != enc)
       
   274             return enc.name;
       
   275         return encoding;
       
   276     }
       
   277 
       
   278     /**
       
   279      * Try the best we can to convert a Java encoding to a XML-style encoding.
       
   280      *
       
   281      * @param encoding non-null reference to encoding string, java style.
       
   282      *
       
   283      * @return ISO-style encoding string.
       
   284      */
       
   285     public static String convertMime2JavaEncoding(String encoding)
       
   286     {
       
   287 
       
   288         for (int i = 0; i < _encodings.length; ++i)
       
   289         {
       
   290             if (_encodings[i].name.equalsIgnoreCase(encoding))
       
   291             {
       
   292                 return _encodings[i].javaName;
       
   293             }
       
   294         }
       
   295 
       
   296         return encoding;
       
   297     }
       
   298 
       
   299     /**
       
   300      * Load a list of all the supported encodings.
       
   301      *
       
   302      * System property "encodings" formatted using URL syntax may define an
       
   303      * external encodings list. Thanks to Sergey Ushakov for the code
       
   304      * contribution!
       
   305      */
       
   306     private static EncodingInfo[] loadEncodingInfo()
       
   307     {
       
   308         try
       
   309         {
       
   310             String urlString = null;
       
   311             InputStream is = null;
       
   312 
       
   313             try
       
   314             {
       
   315                 urlString = System.getProperty(ENCODINGS_PROP, "");
       
   316             }
       
   317             catch (SecurityException e)
       
   318             {
       
   319             }
       
   320 
       
   321             if (urlString != null && urlString.length() > 0) {
       
   322                 URL url = new URL(urlString);
       
   323                 is = url.openStream();
       
   324             }
       
   325 
       
   326             if (is == null) {
       
   327                 SecuritySupport ss = SecuritySupport.getInstance();
       
   328                 is = ss.getResourceAsStream(ObjectFactory.findClassLoader(),
       
   329                                             ENCODINGS_FILE);
       
   330             }
       
   331 
       
   332             Properties props = new Properties();
       
   333             if (is != null) {
       
   334                 props.load(is);
       
   335                 is.close();
       
   336             } else {
       
   337                 // Seems to be no real need to force failure here, let the
       
   338                 // system do its best... The issue is not really very critical,
       
   339                 // and the output will be in any case _correct_ though maybe not
       
   340                 // always human-friendly... :)
       
   341                 // But maybe report/log the resource problem?
       
   342                 // Any standard ways to report/log errors (in static context)?
       
   343             }
       
   344 
       
   345             int totalEntries = props.size();
       
   346             int totalMimeNames = 0;
       
   347             Enumeration keys = props.keys();
       
   348             for (int i = 0; i < totalEntries; ++i)
       
   349             {
       
   350                 String javaName = (String) keys.nextElement();
       
   351                 String val = props.getProperty(javaName);
       
   352                 totalMimeNames++;
       
   353                 int pos = val.indexOf(' ');
       
   354                 for (int j = 0; j < pos; ++j)
       
   355                     if (val.charAt(j) == ',')
       
   356                         totalMimeNames++;
       
   357             }
       
   358             EncodingInfo[] ret = new EncodingInfo[totalMimeNames];
       
   359             int j = 0;
       
   360             keys = props.keys();
       
   361             for (int i = 0; i < totalEntries; ++i)
       
   362             {
       
   363                 String javaName = (String) keys.nextElement();
       
   364                 String val = props.getProperty(javaName);
       
   365                 int pos = val.indexOf(' ');
       
   366                 String mimeName;
       
   367                 //int lastPrintable;
       
   368                 if (pos < 0)
       
   369                 {
       
   370                     // Maybe report/log this problem?
       
   371                     //  "Last printable character not defined for encoding " +
       
   372                     //  mimeName + " (" + val + ")" ...
       
   373                     mimeName = val;
       
   374                     //lastPrintable = 0x00FF;
       
   375                 }
       
   376                 else
       
   377                 {
       
   378                     //lastPrintable =
       
   379                     //    Integer.decode(val.substring(pos).trim()).intValue();
       
   380                     StringTokenizer st =
       
   381                         new StringTokenizer(val.substring(0, pos), ",");
       
   382                     for (boolean first = true;
       
   383                         st.hasMoreTokens();
       
   384                         first = false)
       
   385                     {
       
   386                         mimeName = st.nextToken();
       
   387                         ret[j] =
       
   388                             new EncodingInfo(mimeName, javaName);
       
   389                         _encodingTableKeyMime.put(
       
   390                             mimeName.toUpperCase(),
       
   391                             ret[j]);
       
   392                         if (first)
       
   393                             _encodingTableKeyJava.put(
       
   394                                 javaName.toUpperCase(),
       
   395                                 ret[j]);
       
   396                         j++;
       
   397                     }
       
   398                 }
       
   399             }
       
   400             return ret;
       
   401         }
       
   402         catch (java.net.MalformedURLException mue)
       
   403         {
       
   404             throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(mue);
       
   405         }
       
   406         catch (java.io.IOException ioe)
       
   407         {
       
   408             throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(ioe);
       
   409         }
       
   410     }
       
   411 
       
   412     /**
       
   413      * Return true if the character is the high member of a surrogate pair.
       
   414      * <p>
       
   415      * This is not a public API.
       
   416      * @param ch the character to test
       
   417      * @xsl.usage internal
       
   418      */
       
   419     static boolean isHighUTF16Surrogate(char ch) {
       
   420         return ('\uD800' <= ch && ch <= '\uDBFF');
       
   421     }
       
   422     /**
       
   423      * Return true if the character is the low member of a surrogate pair.
       
   424      * <p>
       
   425      * This is not a public API.
       
   426      * @param ch the character to test
       
   427      * @xsl.usage internal
       
   428      */
       
   429     static boolean isLowUTF16Surrogate(char ch) {
       
   430         return ('\uDC00' <= ch && ch <= '\uDFFF');
       
   431     }
       
   432     /**
       
   433      * Return the unicode code point represented by the high/low surrogate pair.
       
   434      * <p>
       
   435      * This is not a public API.
       
   436      * @param highSurrogate the high char of the high/low pair
       
   437      * @param lowSurrogate the low char of the high/low pair
       
   438      * @xsl.usage internal
       
   439      */
       
   440     static int toCodePoint(char highSurrogate, char lowSurrogate) {
       
   441         int codePoint =
       
   442             ((highSurrogate - 0xd800) << 10)
       
   443                 + (lowSurrogate - 0xdc00)
       
   444                 + 0x10000;
       
   445         return codePoint;
       
   446     }
       
   447     /**
       
   448      * Return the unicode code point represented by the char.
       
   449      * A bit of a dummy method, since all it does is return the char,
       
   450      * but as an int value.
       
   451      * <p>
       
   452      * This is not a public API.
       
   453      * @param ch the char.
       
   454      * @xsl.usage internal
       
   455      */
       
   456     static int toCodePoint(char ch) {
       
   457         int codePoint = ch;
       
   458         return codePoint;
       
   459     }
       
   460 
       
   461     private static final HashMap _encodingTableKeyJava = new HashMap();
       
   462     private static final HashMap _encodingTableKeyMime = new HashMap();
       
   463     private static final EncodingInfo[] _encodings = loadEncodingInfo();
       
   464 }