jaxp/src/com/sun/org/apache/xml/internal/serializer/Encodings.java
changeset 17533 93a2cadbbd33
parent 16953 a44e04deb948
child 25264 040625ce9b72
equal deleted inserted replaced
17267:4df3e7ceb396 17533:93a2cadbbd33
    31 import java.net.URL;
    31 import java.net.URL;
    32 import java.util.Enumeration;
    32 import java.util.Enumeration;
    33 import java.util.HashMap;
    33 import java.util.HashMap;
    34 import java.util.Properties;
    34 import java.util.Properties;
    35 import java.util.StringTokenizer;
    35 import java.util.StringTokenizer;
       
    36 import java.io.IOException;
       
    37 import java.net.MalformedURLException;
       
    38 import java.nio.charset.Charset;
       
    39 import java.nio.charset.IllegalCharsetNameException;
       
    40 import java.nio.charset.UnsupportedCharsetException;
       
    41 import java.util.Collections;
       
    42 import java.util.Map;
       
    43 import java.util.Map.Entry;
    36 
    44 
    37 import com.sun.org.apache.xalan.internal.utils.SecuritySupport;
    45 import com.sun.org.apache.xalan.internal.utils.SecuritySupport;
    38 
    46 
    39 /**
    47 /**
    40  * Provides information about encodings. Depends on the Java runtime
    48  * Provides information about encodings. Depends on the Java runtime
    77      */
    85      */
    78     static Writer getWriter(OutputStream output, String encoding)
    86     static Writer getWriter(OutputStream output, String encoding)
    79         throws UnsupportedEncodingException
    87         throws UnsupportedEncodingException
    80     {
    88     {
    81 
    89 
    82         for (int i = 0; i < _encodings.length; ++i)
    90         final EncodingInfo ei = _encodingInfos.findEncoding(toUpperCaseFast(encoding));
    83         {
    91         if (ei != null) {
    84             if (_encodings[i].name.equalsIgnoreCase(encoding))
    92             try {
    85             {
    93                 return new BufferedWriter(new OutputStreamWriter(
    86                 try
    94                         output, ei.javaName));
    87                 {
    95             } catch (UnsupportedEncodingException usee) {
    88                     return new BufferedWriter(new OutputStreamWriter(
    96                 // keep trying
    89                         output,
    97             }
    90                         _encodings[i].javaName));
    98         }
    91                 }
    99 
    92                 catch (java.lang.IllegalArgumentException iae) // java 1.1.8
   100         return new BufferedWriter(new OutputStreamWriter(output, encoding));
    93                 {
       
    94                     // keep trying
       
    95                 }
       
    96                 catch (UnsupportedEncodingException usee)
       
    97                 {
       
    98 
       
    99                     // keep trying
       
   100                 }
       
   101             }
       
   102         }
       
   103 
       
   104         try
       
   105         {
       
   106             return new BufferedWriter(new OutputStreamWriter(output, encoding));
       
   107         }
       
   108         catch (java.lang.IllegalArgumentException iae) // java 1.1.8
       
   109         {
       
   110             throw new UnsupportedEncodingException(encoding);
       
   111         }
       
   112     }
   101     }
   113 
   102 
   114 
   103 
   115     /**
   104     /**
   116      * Returns the last printable character for an unspecified
   105      * Returns the last printable character for an unspecified
   139     static EncodingInfo getEncodingInfo(String encoding)
   128     static EncodingInfo getEncodingInfo(String encoding)
   140     {
   129     {
   141         EncodingInfo ei;
   130         EncodingInfo ei;
   142 
   131 
   143         String normalizedEncoding = toUpperCaseFast(encoding);
   132         String normalizedEncoding = toUpperCaseFast(encoding);
   144         ei = (EncodingInfo) _encodingTableKeyJava.get(normalizedEncoding);
   133         ei = _encodingInfos.findEncoding(normalizedEncoding);
   145         if (ei == null)
       
   146             ei = (EncodingInfo) _encodingTableKeyMime.get(normalizedEncoding);
       
   147         if (ei == null) {
   134         if (ei == null) {
   148             // We shouldn't have to do this, but just in case.
   135             // We shouldn't have to do this, but just in case.
   149             ei = new EncodingInfo(null,null);
   136             try {
       
   137                 // This may happen if the caller tries to use
       
   138                 // an encoding that wasn't registered in the
       
   139                 // (java name)->(preferred mime name) mapping file.
       
   140                 // In that case we attempt to load the charset for the
       
   141                 // given encoding, and if that succeeds - we create a new
       
   142                 // EncodingInfo instance - assuming the canonical name
       
   143                 // of the charset can be used as the mime name.
       
   144                 final Charset c = Charset.forName(encoding);
       
   145                 final String name = c.name();
       
   146                 ei = new EncodingInfo(name, name);
       
   147                 _encodingInfos.putEncoding(normalizedEncoding, ei);
       
   148             } catch (IllegalCharsetNameException | UnsupportedCharsetException x) {
       
   149                 ei = new EncodingInfo(null,null);
       
   150             }
   150         }
   151         }
   151 
   152 
   152         return ei;
   153         return ei;
   153     }
   154     }
   154 
   155 
   267      *
   268      *
   268      * @return ISO-style encoding string.
   269      * @return ISO-style encoding string.
   269      */
   270      */
   270     private static String convertJava2MimeEncoding(String encoding)
   271     private static String convertJava2MimeEncoding(String encoding)
   271     {
   272     {
   272         EncodingInfo enc =
   273         final EncodingInfo enc =
   273             (EncodingInfo) _encodingTableKeyJava.get(encoding.toUpperCase());
   274              _encodingInfos.getEncodingFromJavaKey(toUpperCaseFast(encoding));
   274         if (null != enc)
   275         if (null != enc)
   275             return enc.name;
   276             return enc.name;
   276         return encoding;
   277         return encoding;
   277     }
   278     }
   278 
   279 
   283      *
   284      *
   284      * @return ISO-style encoding string.
   285      * @return ISO-style encoding string.
   285      */
   286      */
   286     public static String convertMime2JavaEncoding(String encoding)
   287     public static String convertMime2JavaEncoding(String encoding)
   287     {
   288     {
   288 
   289         final EncodingInfo info = _encodingInfos.findEncoding(toUpperCaseFast(encoding));
   289         for (int i = 0; i < _encodings.length; ++i)
   290         return info != null ? info.javaName : encoding;
   290         {
   291     }
   291             if (_encodings[i].name.equalsIgnoreCase(encoding))
   292 
   292             {
   293     // Using an inner static class here prevent initialization races
   293                 return _encodings[i].javaName;
   294     // where the hash maps could be used before they were populated.
   294             }
   295     //
   295         }
   296     private final static class EncodingInfos {
   296 
   297         // These maps are final and not modified after initialization.
   297         return encoding;
   298         private final Map<String, EncodingInfo> _encodingTableKeyJava = new HashMap<>();
   298     }
   299         private final Map<String, EncodingInfo> _encodingTableKeyMime = new HashMap<>();
   299 
   300         // This map will be added to after initialization: make sure it's
   300     /**
   301         // thread-safe. This map should not be used frequently - only in cases
   301      * Load a list of all the supported encodings.
   302         // where the mapping requested was not declared in the Encodings.properties
   302      *
   303         // file.
   303      * System property "encodings" formatted using URL syntax may define an
   304         private final Map<String, EncodingInfo> _encodingDynamicTable =
   304      * external encodings list. Thanks to Sergey Ushakov for the code
   305                 Collections.synchronizedMap(new HashMap<String, EncodingInfo>());
   305      * contribution!
   306 
   306      */
   307         private EncodingInfos() {
   307     private static EncodingInfo[] loadEncodingInfo()
   308             loadEncodingInfo();
   308     {
   309         }
   309         try
   310 
   310         {
   311         // Opens the file/resource containing java charset name -> preferred mime
       
   312         // name mapping and returns it as an InputStream.
       
   313         private InputStream openEncodingsFileStream() throws MalformedURLException, IOException {
   311             String urlString = null;
   314             String urlString = null;
   312             InputStream is = null;
   315             InputStream is = null;
   313 
   316 
   314             try
   317             try {
   315             {
       
   316                 urlString = SecuritySupport.getSystemProperty(ENCODINGS_PROP, "");
   318                 urlString = SecuritySupport.getSystemProperty(ENCODINGS_PROP, "");
   317             }
   319             } catch (SecurityException e) {
   318             catch (SecurityException e)
       
   319             {
       
   320             }
   320             }
   321 
   321 
   322             if (urlString != null && urlString.length() > 0) {
   322             if (urlString != null && urlString.length() > 0) {
   323                 URL url = new URL(urlString);
   323                 URL url = new URL(urlString);
   324                 is = url.openStream();
   324                 is = url.openStream();
   325             }
   325             }
   326 
   326 
   327             if (is == null) {
   327             if (is == null) {
   328                 is = SecuritySupport.getResourceAsStream(ENCODINGS_FILE);
   328                 is = SecuritySupport.getResourceAsStream(ENCODINGS_FILE);
   329             }
   329             }
   330 
   330             return is;
       
   331         }
       
   332 
       
   333         // Loads the Properties resource containing the mapping:
       
   334         //    java charset name -> preferred mime name
       
   335         // and returns it.
       
   336         private Properties loadProperties() throws MalformedURLException, IOException {
   331             Properties props = new Properties();
   337             Properties props = new Properties();
   332             if (is != null) {
   338             try (InputStream is = openEncodingsFileStream()) {
   333                 props.load(is);
   339                 if (is != null) {
   334                 is.close();
   340                     props.load(is);
   335             } else {
   341                 } else {
   336                 // Seems to be no real need to force failure here, let the
   342                     // Seems to be no real need to force failure here, let the
   337                 // system do its best... The issue is not really very critical,
   343                     // system do its best... The issue is not really very critical,
   338                 // and the output will be in any case _correct_ though maybe not
   344                     // and the output will be in any case _correct_ though maybe not
   339                 // always human-friendly... :)
   345                     // always human-friendly... :)
   340                 // But maybe report/log the resource problem?
   346                     // But maybe report/log the resource problem?
   341                 // Any standard ways to report/log errors (in static context)?
   347                     // Any standard ways to report/log errors (in static context)?
   342             }
       
   343 
       
   344             int totalEntries = props.size();
       
   345             int totalMimeNames = 0;
       
   346             Enumeration keys = props.keys();
       
   347             for (int i = 0; i < totalEntries; ++i)
       
   348             {
       
   349                 String javaName = (String) keys.nextElement();
       
   350                 String val = props.getProperty(javaName);
       
   351                 totalMimeNames++;
       
   352                 int pos = val.indexOf(' ');
       
   353                 for (int j = 0; j < pos; ++j)
       
   354                     if (val.charAt(j) == ',')
       
   355                         totalMimeNames++;
       
   356             }
       
   357             EncodingInfo[] ret = new EncodingInfo[totalMimeNames];
       
   358             int j = 0;
       
   359             keys = props.keys();
       
   360             for (int i = 0; i < totalEntries; ++i)
       
   361             {
       
   362                 String javaName = (String) keys.nextElement();
       
   363                 String val = props.getProperty(javaName);
       
   364                 int pos = val.indexOf(' ');
       
   365                 String mimeName;
       
   366                 //int lastPrintable;
       
   367                 if (pos < 0)
       
   368                 {
       
   369                     // Maybe report/log this problem?
       
   370                     //  "Last printable character not defined for encoding " +
       
   371                     //  mimeName + " (" + val + ")" ...
       
   372                     mimeName = val;
       
   373                     //lastPrintable = 0x00FF;
       
   374                 }
   348                 }
   375                 else
   349             }
   376                 {
   350             return props;
   377                     //lastPrintable =
   351         }
   378                     //    Integer.decode(val.substring(pos).trim()).intValue();
   352 
   379                     StringTokenizer st =
   353         // Parses the mime list associated to a java charset name.
   380                         new StringTokenizer(val.substring(0, pos), ",");
   354         // The first mime name in the list is supposed to be the preferred
   381                     for (boolean first = true;
   355         // mime name.
   382                         st.hasMoreTokens();
   356         private String[] parseMimeTypes(String val) {
   383                         first = false)
   357             int pos = val.indexOf(' ');
   384                     {
   358             //int lastPrintable;
   385                         mimeName = st.nextToken();
   359             if (pos < 0) {
   386                         ret[j] =
   360                 // Maybe report/log this problem?
   387                             new EncodingInfo(mimeName, javaName);
   361                 //  "Last printable character not defined for encoding " +
   388                         _encodingTableKeyMime.put(
   362                 //  mimeName + " (" + val + ")" ...
   389                             mimeName.toUpperCase(),
   363                 return new String[] { val };
   390                             ret[j]);
   364                 //lastPrintable = 0x00FF;
   391                         if (first)
   365             }
   392                             _encodingTableKeyJava.put(
   366             //lastPrintable =
   393                                 javaName.toUpperCase(),
   367             //    Integer.decode(val.substring(pos).trim()).intValue();
   394                                 ret[j]);
   368             StringTokenizer st =
   395                         j++;
   369                     new StringTokenizer(val.substring(0, pos), ",");
       
   370             String[] values = new String[st.countTokens()];
       
   371             for (int i=0; st.hasMoreTokens(); i++) {
       
   372                 values[i] = st.nextToken();
       
   373             }
       
   374             return values;
       
   375         }
       
   376 
       
   377         // This method here attempts to find the canonical charset name for the
       
   378         // the given name - which is supposed to be either a java name or a mime
       
   379         // name.
       
   380         // For that, it attempts to load the charset using the given name, and
       
   381         // then returns the charset's canonical name.
       
   382         // If the charset could not be loaded from the given name,
       
   383         // the method returns null.
       
   384         private String findCharsetNameFor(String name) {
       
   385             try {
       
   386                 return Charset.forName(name).name();
       
   387             } catch (Exception x) {
       
   388                 return null;
       
   389             }
       
   390         }
       
   391 
       
   392         // This method here attempts to find the canonical charset name for the
       
   393         // the set javaName+mimeNames - which are supposed to all refer to the
       
   394         // same charset.
       
   395         // For that it attempts to load the charset using the javaName, and if
       
   396         // not found, attempts again using each of the mime names in turn.
       
   397         // If the charset could be loaded from the javaName, then the javaName
       
   398         // itself is returned as charset name. Otherwise, each of the mime names
       
   399         // is tried in turn, until a charset can be loaded from one of the names,
       
   400         // and the loaded charset's canonical name is returned.
       
   401         // If no charset can be loaded from either the javaName or one of the
       
   402         // mime names, then null is returned.
       
   403         //
       
   404         // Note that the returned name is the 'java' name that will be used in
       
   405         // instances of EncodingInfo.
       
   406         // This is important because EncodingInfo uses that 'java name' later on
       
   407         // in calls to String.getBytes(javaName).
       
   408         // As it happens, sometimes only one element of the set mime names/javaName
       
   409         // is known by Charset: sometimes only one of the mime names is known,
       
   410         // sometime only the javaName is known, sometimes all are known.
       
   411         //
       
   412         // By using this method here, we fix the problem where one of the mime
       
   413         // names is known but the javaName is unknown, by associating the charset
       
   414         // loaded from one of the mime names with the unrecognized javaName.
       
   415         //
       
   416         // When none of the mime names or javaName are known - there's not much we can
       
   417         // do... It can mean that this encoding is not supported for this
       
   418         // OS. If such a charset is ever use it will result in having all characters
       
   419         // escaped.
       
   420         //
       
   421         private String findCharsetNameFor(String javaName, String[] mimes) {
       
   422             String cs = findCharsetNameFor(javaName);
       
   423             if (cs != null) return javaName;
       
   424             for (String m : mimes) {
       
   425                 cs = findCharsetNameFor(m);
       
   426                 if (cs != null) break;
       
   427             }
       
   428             return cs;
       
   429         }
       
   430 
       
   431         /**
       
   432          * Loads a list of all the supported encodings.
       
   433          *
       
   434          * System property "encodings" formatted using URL syntax may define an
       
   435          * external encodings list. Thanks to Sergey Ushakov for the code
       
   436          * contribution!
       
   437          */
       
   438         private void loadEncodingInfo() {
       
   439             try {
       
   440                 // load (java name)->(preferred mime name) mapping.
       
   441                 final Properties props = loadProperties();
       
   442 
       
   443                 // create instances of EncodingInfo from the loaded mapping
       
   444                 Enumeration keys = props.keys();
       
   445                 Map<String, EncodingInfo> canonicals = new HashMap<>();
       
   446                 while (keys.hasMoreElements()) {
       
   447                     final String javaName = (String) keys.nextElement();
       
   448                     final String[] mimes = parseMimeTypes(props.getProperty(javaName));
       
   449 
       
   450                     final String charsetName = findCharsetNameFor(javaName, mimes);
       
   451                     if (charsetName != null) {
       
   452                         final String kj = toUpperCaseFast(javaName);
       
   453                         final String kc = toUpperCaseFast(charsetName);
       
   454                         for (int i = 0; i < mimes.length; ++i) {
       
   455                             final String mimeName = mimes[i];
       
   456                             final String km = toUpperCaseFast(mimeName);
       
   457                             EncodingInfo info = new EncodingInfo(mimeName, charsetName);
       
   458                             _encodingTableKeyMime.put(km, info);
       
   459                             if (!canonicals.containsKey(kc)) {
       
   460                                 // canonicals will map the charset name to
       
   461                                 //   the info containing the prefered mime name
       
   462                                 //   (the preferred mime name is the first mime
       
   463                                 //   name in the list).
       
   464                                 canonicals.put(kc, info);
       
   465                                 _encodingTableKeyJava.put(kc, info);
       
   466                             }
       
   467                             _encodingTableKeyJava.put(kj, info);
       
   468                         }
       
   469                     } else {
       
   470                         // None of the java or mime names on the line were
       
   471                         // recognized => this charset is not supported?
   396                     }
   472                     }
   397                 }
   473                 }
   398             }
   474 
   399             return ret;
   475                 // Fix up the _encodingTableKeyJava so that the info mapped to
   400         }
   476                 // the java name contains the preferred mime name.
   401         catch (java.net.MalformedURLException mue)
   477                 // (a given java name can correspond to several mime name,
   402         {
   478                 //  but we want the _encodingTableKeyJava to point to the
   403             throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(mue);
   479                 //  preferred mime name).
   404         }
   480                 for (Entry<String, EncodingInfo> e : _encodingTableKeyJava.entrySet()) {
   405         catch (java.io.IOException ioe)
   481                     e.setValue(canonicals.get(toUpperCaseFast(e.getValue().javaName)));
   406         {
   482                 }
   407             throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(ioe);
   483 
       
   484             } catch (java.net.MalformedURLException mue) {
       
   485                 throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(mue);
       
   486             } catch (java.io.IOException ioe) {
       
   487                 throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(ioe);
       
   488             }
       
   489         }
       
   490 
       
   491         EncodingInfo findEncoding(String normalizedEncoding) {
       
   492             EncodingInfo info = _encodingTableKeyJava.get(normalizedEncoding);
       
   493             if (info == null) {
       
   494                 info = _encodingTableKeyMime.get(normalizedEncoding);
       
   495             }
       
   496             if (info == null) {
       
   497                 info = _encodingDynamicTable.get(normalizedEncoding);
       
   498             }
       
   499             return info;
       
   500         }
       
   501 
       
   502         EncodingInfo getEncodingFromMimeKey(String normalizedMimeName) {
       
   503             return _encodingTableKeyMime.get(normalizedMimeName);
       
   504         }
       
   505 
       
   506         EncodingInfo getEncodingFromJavaKey(String normalizedJavaName) {
       
   507             return _encodingTableKeyJava.get(normalizedJavaName);
       
   508         }
       
   509 
       
   510         void putEncoding(String key, EncodingInfo info) {
       
   511             _encodingDynamicTable.put(key, info);
   408         }
   512         }
   409     }
   513     }
   410 
   514 
   411     /**
   515     /**
   412      * Return true if the character is the high member of a surrogate pair.
   516      * Return true if the character is the high member of a surrogate pair.
   455     static int toCodePoint(char ch) {
   559     static int toCodePoint(char ch) {
   456         int codePoint = ch;
   560         int codePoint = ch;
   457         return codePoint;
   561         return codePoint;
   458     }
   562     }
   459 
   563 
   460     private static final HashMap _encodingTableKeyJava = new HashMap();
   564     private final static EncodingInfos _encodingInfos = new EncodingInfos();
   461     private static final HashMap _encodingTableKeyMime = new HashMap();
   565 
   462     private static final EncodingInfo[] _encodings = loadEncodingInfo();
       
   463 }
   566 }