283 * |
284 * |
284 * @return ISO-style encoding string. |
285 * @return ISO-style encoding string. |
285 */ |
286 */ |
286 public static String convertMime2JavaEncoding(String encoding) |
287 public static String convertMime2JavaEncoding(String encoding) |
287 { |
288 { |
288 |
289 final EncodingInfo info = _encodingInfos.findEncoding(toUpperCaseFast(encoding)); |
289 for (int i = 0; i < _encodings.length; ++i) |
290 return info != null ? info.javaName : encoding; |
290 { |
291 } |
291 if (_encodings[i].name.equalsIgnoreCase(encoding)) |
292 |
292 { |
293 // Using an inner static class here prevent initialization races |
293 return _encodings[i].javaName; |
294 // where the hash maps could be used before they were populated. |
294 } |
295 // |
295 } |
296 private final static class EncodingInfos { |
296 |
297 // These maps are final and not modified after initialization. |
297 return encoding; |
298 private final Map<String, EncodingInfo> _encodingTableKeyJava = new HashMap<>(); |
298 } |
299 private final Map<String, EncodingInfo> _encodingTableKeyMime = new HashMap<>(); |
299 |
300 // This map will be added to after initialization: make sure it's |
300 /** |
301 // thread-safe. This map should not be used frequently - only in cases |
301 * Load a list of all the supported encodings. |
302 // where the mapping requested was not declared in the Encodings.properties |
302 * |
303 // file. |
303 * System property "encodings" formatted using URL syntax may define an |
304 private final Map<String, EncodingInfo> _encodingDynamicTable = |
304 * external encodings list. Thanks to Sergey Ushakov for the code |
305 Collections.synchronizedMap(new HashMap<String, EncodingInfo>()); |
305 * contribution! |
306 |
306 */ |
307 private EncodingInfos() { |
307 private static EncodingInfo[] loadEncodingInfo() |
308 loadEncodingInfo(); |
308 { |
309 } |
309 try |
310 |
310 { |
311 // Opens the file/resource containing java charset name -> preferred mime |
|
312 // name mapping and returns it as an InputStream. |
|
313 private InputStream openEncodingsFileStream() throws MalformedURLException, IOException { |
311 String urlString = null; |
314 String urlString = null; |
312 InputStream is = null; |
315 InputStream is = null; |
313 |
316 |
314 try |
317 try { |
315 { |
|
316 urlString = SecuritySupport.getSystemProperty(ENCODINGS_PROP, ""); |
318 urlString = SecuritySupport.getSystemProperty(ENCODINGS_PROP, ""); |
317 } |
319 } catch (SecurityException e) { |
318 catch (SecurityException e) |
|
319 { |
|
320 } |
320 } |
321 |
321 |
322 if (urlString != null && urlString.length() > 0) { |
322 if (urlString != null && urlString.length() > 0) { |
323 URL url = new URL(urlString); |
323 URL url = new URL(urlString); |
324 is = url.openStream(); |
324 is = url.openStream(); |
325 } |
325 } |
326 |
326 |
327 if (is == null) { |
327 if (is == null) { |
328 is = SecuritySupport.getResourceAsStream(ENCODINGS_FILE); |
328 is = SecuritySupport.getResourceAsStream(ENCODINGS_FILE); |
329 } |
329 } |
330 |
330 return is; |
|
331 } |
|
332 |
|
333 // Loads the Properties resource containing the mapping: |
|
334 // java charset name -> preferred mime name |
|
335 // and returns it. |
|
336 private Properties loadProperties() throws MalformedURLException, IOException { |
331 Properties props = new Properties(); |
337 Properties props = new Properties(); |
332 if (is != null) { |
338 try (InputStream is = openEncodingsFileStream()) { |
333 props.load(is); |
339 if (is != null) { |
334 is.close(); |
340 props.load(is); |
335 } else { |
341 } else { |
336 // Seems to be no real need to force failure here, let the |
342 // Seems to be no real need to force failure here, let the |
337 // system do its best... The issue is not really very critical, |
343 // system do its best... The issue is not really very critical, |
338 // and the output will be in any case _correct_ though maybe not |
344 // and the output will be in any case _correct_ though maybe not |
339 // always human-friendly... :) |
345 // always human-friendly... :) |
340 // But maybe report/log the resource problem? |
346 // But maybe report/log the resource problem? |
341 // Any standard ways to report/log errors (in static context)? |
347 // Any standard ways to report/log errors (in static context)? |
342 } |
|
343 |
|
344 int totalEntries = props.size(); |
|
345 int totalMimeNames = 0; |
|
346 Enumeration keys = props.keys(); |
|
347 for (int i = 0; i < totalEntries; ++i) |
|
348 { |
|
349 String javaName = (String) keys.nextElement(); |
|
350 String val = props.getProperty(javaName); |
|
351 totalMimeNames++; |
|
352 int pos = val.indexOf(' '); |
|
353 for (int j = 0; j < pos; ++j) |
|
354 if (val.charAt(j) == ',') |
|
355 totalMimeNames++; |
|
356 } |
|
357 EncodingInfo[] ret = new EncodingInfo[totalMimeNames]; |
|
358 int j = 0; |
|
359 keys = props.keys(); |
|
360 for (int i = 0; i < totalEntries; ++i) |
|
361 { |
|
362 String javaName = (String) keys.nextElement(); |
|
363 String val = props.getProperty(javaName); |
|
364 int pos = val.indexOf(' '); |
|
365 String mimeName; |
|
366 //int lastPrintable; |
|
367 if (pos < 0) |
|
368 { |
|
369 // Maybe report/log this problem? |
|
370 // "Last printable character not defined for encoding " + |
|
371 // mimeName + " (" + val + ")" ... |
|
372 mimeName = val; |
|
373 //lastPrintable = 0x00FF; |
|
374 } |
348 } |
375 else |
349 } |
376 { |
350 return props; |
377 //lastPrintable = |
351 } |
378 // Integer.decode(val.substring(pos).trim()).intValue(); |
352 |
379 StringTokenizer st = |
353 // Parses the mime list associated to a java charset name. |
380 new StringTokenizer(val.substring(0, pos), ","); |
354 // The first mime name in the list is supposed to be the preferred |
381 for (boolean first = true; |
355 // mime name. |
382 st.hasMoreTokens(); |
356 private String[] parseMimeTypes(String val) { |
383 first = false) |
357 int pos = val.indexOf(' '); |
384 { |
358 //int lastPrintable; |
385 mimeName = st.nextToken(); |
359 if (pos < 0) { |
386 ret[j] = |
360 // Maybe report/log this problem? |
387 new EncodingInfo(mimeName, javaName); |
361 // "Last printable character not defined for encoding " + |
388 _encodingTableKeyMime.put( |
362 // mimeName + " (" + val + ")" ... |
389 mimeName.toUpperCase(), |
363 return new String[] { val }; |
390 ret[j]); |
364 //lastPrintable = 0x00FF; |
391 if (first) |
365 } |
392 _encodingTableKeyJava.put( |
366 //lastPrintable = |
393 javaName.toUpperCase(), |
367 // Integer.decode(val.substring(pos).trim()).intValue(); |
394 ret[j]); |
368 StringTokenizer st = |
395 j++; |
369 new StringTokenizer(val.substring(0, pos), ","); |
|
370 String[] values = new String[st.countTokens()]; |
|
371 for (int i=0; st.hasMoreTokens(); i++) { |
|
372 values[i] = st.nextToken(); |
|
373 } |
|
374 return values; |
|
375 } |
|
376 |
|
377 // This method here attempts to find the canonical charset name for the |
|
378 // the given name - which is supposed to be either a java name or a mime |
|
379 // name. |
|
380 // For that, it attempts to load the charset using the given name, and |
|
381 // then returns the charset's canonical name. |
|
382 // If the charset could not be loaded from the given name, |
|
383 // the method returns null. |
|
384 private String findCharsetNameFor(String name) { |
|
385 try { |
|
386 return Charset.forName(name).name(); |
|
387 } catch (Exception x) { |
|
388 return null; |
|
389 } |
|
390 } |
|
391 |
|
392 // This method here attempts to find the canonical charset name for the |
|
393 // the set javaName+mimeNames - which are supposed to all refer to the |
|
394 // same charset. |
|
395 // For that it attempts to load the charset using the javaName, and if |
|
396 // not found, attempts again using each of the mime names in turn. |
|
397 // If the charset could be loaded from the javaName, then the javaName |
|
398 // itself is returned as charset name. Otherwise, each of the mime names |
|
399 // is tried in turn, until a charset can be loaded from one of the names, |
|
400 // and the loaded charset's canonical name is returned. |
|
401 // If no charset can be loaded from either the javaName or one of the |
|
402 // mime names, then null is returned. |
|
403 // |
|
404 // Note that the returned name is the 'java' name that will be used in |
|
405 // instances of EncodingInfo. |
|
406 // This is important because EncodingInfo uses that 'java name' later on |
|
407 // in calls to String.getBytes(javaName). |
|
408 // As it happens, sometimes only one element of the set mime names/javaName |
|
409 // is known by Charset: sometimes only one of the mime names is known, |
|
410 // sometime only the javaName is known, sometimes all are known. |
|
411 // |
|
412 // By using this method here, we fix the problem where one of the mime |
|
413 // names is known but the javaName is unknown, by associating the charset |
|
414 // loaded from one of the mime names with the unrecognized javaName. |
|
415 // |
|
416 // When none of the mime names or javaName are known - there's not much we can |
|
417 // do... It can mean that this encoding is not supported for this |
|
418 // OS. If such a charset is ever use it will result in having all characters |
|
419 // escaped. |
|
420 // |
|
421 private String findCharsetNameFor(String javaName, String[] mimes) { |
|
422 String cs = findCharsetNameFor(javaName); |
|
423 if (cs != null) return javaName; |
|
424 for (String m : mimes) { |
|
425 cs = findCharsetNameFor(m); |
|
426 if (cs != null) break; |
|
427 } |
|
428 return cs; |
|
429 } |
|
430 |
|
431 /** |
|
432 * Loads a list of all the supported encodings. |
|
433 * |
|
434 * System property "encodings" formatted using URL syntax may define an |
|
435 * external encodings list. Thanks to Sergey Ushakov for the code |
|
436 * contribution! |
|
437 */ |
|
438 private void loadEncodingInfo() { |
|
439 try { |
|
440 // load (java name)->(preferred mime name) mapping. |
|
441 final Properties props = loadProperties(); |
|
442 |
|
443 // create instances of EncodingInfo from the loaded mapping |
|
444 Enumeration keys = props.keys(); |
|
445 Map<String, EncodingInfo> canonicals = new HashMap<>(); |
|
446 while (keys.hasMoreElements()) { |
|
447 final String javaName = (String) keys.nextElement(); |
|
448 final String[] mimes = parseMimeTypes(props.getProperty(javaName)); |
|
449 |
|
450 final String charsetName = findCharsetNameFor(javaName, mimes); |
|
451 if (charsetName != null) { |
|
452 final String kj = toUpperCaseFast(javaName); |
|
453 final String kc = toUpperCaseFast(charsetName); |
|
454 for (int i = 0; i < mimes.length; ++i) { |
|
455 final String mimeName = mimes[i]; |
|
456 final String km = toUpperCaseFast(mimeName); |
|
457 EncodingInfo info = new EncodingInfo(mimeName, charsetName); |
|
458 _encodingTableKeyMime.put(km, info); |
|
459 if (!canonicals.containsKey(kc)) { |
|
460 // canonicals will map the charset name to |
|
461 // the info containing the prefered mime name |
|
462 // (the preferred mime name is the first mime |
|
463 // name in the list). |
|
464 canonicals.put(kc, info); |
|
465 _encodingTableKeyJava.put(kc, info); |
|
466 } |
|
467 _encodingTableKeyJava.put(kj, info); |
|
468 } |
|
469 } else { |
|
470 // None of the java or mime names on the line were |
|
471 // recognized => this charset is not supported? |
396 } |
472 } |
397 } |
473 } |
398 } |
474 |
399 return ret; |
475 // Fix up the _encodingTableKeyJava so that the info mapped to |
400 } |
476 // the java name contains the preferred mime name. |
401 catch (java.net.MalformedURLException mue) |
477 // (a given java name can correspond to several mime name, |
402 { |
478 // but we want the _encodingTableKeyJava to point to the |
403 throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(mue); |
479 // preferred mime name). |
404 } |
480 for (Entry<String, EncodingInfo> e : _encodingTableKeyJava.entrySet()) { |
405 catch (java.io.IOException ioe) |
481 e.setValue(canonicals.get(toUpperCaseFast(e.getValue().javaName))); |
406 { |
482 } |
407 throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(ioe); |
483 |
|
484 } catch (java.net.MalformedURLException mue) { |
|
485 throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(mue); |
|
486 } catch (java.io.IOException ioe) { |
|
487 throw new com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException(ioe); |
|
488 } |
|
489 } |
|
490 |
|
491 EncodingInfo findEncoding(String normalizedEncoding) { |
|
492 EncodingInfo info = _encodingTableKeyJava.get(normalizedEncoding); |
|
493 if (info == null) { |
|
494 info = _encodingTableKeyMime.get(normalizedEncoding); |
|
495 } |
|
496 if (info == null) { |
|
497 info = _encodingDynamicTable.get(normalizedEncoding); |
|
498 } |
|
499 return info; |
|
500 } |
|
501 |
|
502 EncodingInfo getEncodingFromMimeKey(String normalizedMimeName) { |
|
503 return _encodingTableKeyMime.get(normalizedMimeName); |
|
504 } |
|
505 |
|
506 EncodingInfo getEncodingFromJavaKey(String normalizedJavaName) { |
|
507 return _encodingTableKeyJava.get(normalizedJavaName); |
|
508 } |
|
509 |
|
510 void putEncoding(String key, EncodingInfo info) { |
|
511 _encodingDynamicTable.put(key, info); |
408 } |
512 } |
409 } |
513 } |
410 |
514 |
411 /** |
515 /** |
412 * Return true if the character is the high member of a surrogate pair. |
516 * Return true if the character is the high member of a surrogate pair. |