8027607: (rb) Provide UTF-8 based properties resource bundles
Reviewed-by: okutsu, sherman
--- a/jdk/src/java.base/share/classes/java/util/PropertyResourceBundle.java Wed Jul 29 11:47:19 2015 +0200
+++ b/jdk/src/java.base/share/classes/java/util/PropertyResourceBundle.java Wed Jul 29 13:36:53 2015 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -40,8 +40,17 @@
package java.util;
import java.io.InputStream;
+import java.io.InputStreamReader;
import java.io.Reader;
import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.MalformedInputException;
+import java.nio.charset.StandardCharsets;
+import java.nio.charset.UnmappableCharacterException;
+import java.security.AccessController;
+import java.util.Locale;
+import sun.security.action.GetPropertyAction;
+import sun.util.PropertyResourceBundleCharset;
import sun.util.ResourceBundleEnumeration;
/**
@@ -108,11 +117,20 @@
* <strong>Note:</strong> PropertyResourceBundle can be constructed either
* from an InputStream or a Reader, which represents a property file.
* Constructing a PropertyResourceBundle instance from an InputStream requires
- * that the input stream be encoded in ISO-8859-1. In that case, characters
- * that cannot be represented in ISO-8859-1 encoding must be represented by Unicode Escapes
- * as defined in section 3.3 of
- * <cite>The Java™ Language Specification</cite>
+ * that the input stream be encoded in UTF-8. By default, if a
+ * {@link java.nio.charset.MalformedInputException} or an
+ * {@link java.nio.charset.UnmappableCharacterException} occurs on reading the
+ * input stream, then the PropertyResourceBundle instance resets to the state
+ * before the exception, re-reads the input stream in {@code ISO-8859-1}, and
+ * continues reading. If the system property
+ * {@code java.util.PropertyResourceBundle.encoding} is set to either
+ * "ISO-8859-1" or "UTF-8", the input stream is solely read in that encoding,
+ * and throws the exception if it encounters an invalid sequence.
+ * If "ISO-8859-1" is specified, characters that cannot be represented in
+ * ISO-8859-1 encoding must be represented by Unicode Escapes as defined in section
+ * 3.3 of <cite>The Java™ Language Specification</cite>
* whereas the other constructor which takes a Reader does not have that limitation.
+ * Other encoding values are ignored for this system property.
*
* @see ResourceBundle
* @see ListResourceBundle
@@ -120,10 +138,26 @@
* @since 1.1
*/
public class PropertyResourceBundle extends ResourceBundle {
+
+ // Check whether the strict encoding is specified.
+ // The possible encoding is either "ISO-8859-1" or "UTF-8".
+ private static final String encoding =
+ AccessController.doPrivileged(
+ new GetPropertyAction("java.util.PropertyResourceBundle.encoding", ""))
+ .toUpperCase(Locale.ROOT);
+
/**
* Creates a property resource bundle from an {@link java.io.InputStream
- * InputStream}. The property file read with this constructor
- * must be encoded in ISO-8859-1.
+ * InputStream}. This constructor reads the property file in UTF-8 by default.
+ * If a {@link java.nio.charset.MalformedInputException} or an
+ * {@link java.nio.charset.UnmappableCharacterException} occurs on reading the
+ * input stream, then the PropertyResourceBundle instance resets to the state
+ * before the exception, re-reads the input stream in {@code ISO-8859-1} and
+ * continues reading. If the system property
+ * {@code java.util.PropertyResourceBundle.encoding} is set to either
+ * "ISO-8859-1" or "UTF-8", the input stream is solely read in that encoding,
+ * and throws the exception if it encounters an invalid sequence. Other
+ * encoding values are ignored for this system property.
*
* @param stream an InputStream that represents a property file
* to read from.
@@ -131,12 +165,19 @@
* @throws NullPointerException if <code>stream</code> is null
* @throws IllegalArgumentException if {@code stream} contains a
* malformed Unicode escape sequence.
+ * @throws MalformedInputException if the system property
+ * {@code java.util.PropertyResourceBundle.encoding} is set to "UTF-8"
+ * and {@code stream} contains an invalid UTF-8 byte sequence.
+ * @throws UnmappableCharacterException if the system property
+ * {@code java.util.PropertyResourceBundle.encoding} is set to "UTF-8"
+ * and {@code stream} contains an unmappable UTF-8 byte sequence.
*/
@SuppressWarnings({"unchecked", "rawtypes"})
public PropertyResourceBundle (InputStream stream) throws IOException {
- Properties properties = new Properties();
- properties.load(stream);
- lookup = new HashMap(properties);
+ this(new InputStreamReader(stream,
+ "ISO-8859-1".equals(encoding) ?
+ StandardCharsets.ISO_8859_1.newDecoder() :
+ new PropertyResourceBundleCharset("UTF-8".equals(encoding)).newDecoder()));
}
/**
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.base/share/classes/sun/util/PropertyResourceBundleCharset.java Wed Jul 29 13:36:53 2015 -0700
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.util;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
+import java.util.Objects;
+import sun.util.logging.PlatformLogger;
+
+/**
+ * A Charset implementation for reading PropertyResourceBundle, in order
+ * for loading properties files. This first tries to load the properties
+ * file with UTF-8 encoding). If it fails, then load the file with ISO-8859-1
+ */
+public class PropertyResourceBundleCharset extends Charset {
+
+ private boolean strictUTF8 = false;
+
+ public PropertyResourceBundleCharset(boolean strictUTF8) {
+ this(PropertyResourceBundleCharset.class.getCanonicalName(), null);
+ this.strictUTF8 = strictUTF8;
+ }
+
+ public PropertyResourceBundleCharset(String canonicalName, String[] aliases) {
+ super(canonicalName, aliases);
+ }
+
+ @Override
+ public boolean contains(Charset cs) {
+ return false;
+ }
+
+ @Override
+ public CharsetDecoder newDecoder() {
+ return new PropertiesFileDecoder(this, 1.0f, 1.0f);
+ }
+
+ @Override
+ public CharsetEncoder newEncoder() {
+ throw new UnsupportedOperationException("Encoding is not supported");
+ }
+
+ private final class PropertiesFileDecoder extends CharsetDecoder {
+
+ private CharsetDecoder cdUTF_8 = StandardCharsets.UTF_8.newDecoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ private CharsetDecoder cdISO_8859_1 = null;
+
+ protected PropertiesFileDecoder(Charset cs,
+ float averageCharsPerByte, float maxCharsPerByte) {
+ super(cs, averageCharsPerByte, maxCharsPerByte);
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+ if (Objects.nonNull(cdISO_8859_1)) {
+ return cdISO_8859_1.decode(in, out, false);
+ }
+ in.mark();
+ out.mark();
+
+ CoderResult cr = cdUTF_8.decode(in, out, false);
+ if (cr.isUnderflow() || cr.isOverflow() ||
+ PropertyResourceBundleCharset.this.strictUTF8) {
+ return cr;
+ }
+
+ in.reset();
+ out.reset();
+
+ PlatformLogger.getLogger(getClass().getCanonicalName()).info(
+ "Invalid or unmappable UTF-8 sequence detected. " +
+ "Switching encoding from UTF-8 to ISO-8859-1");
+ cdISO_8859_1 = StandardCharsets.ISO_8859_1.newDecoder();
+ return cdISO_8859_1.decode(in, out, false);
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/util/ResourceBundle/UTF8Properties/CodePointTest.java Wed Jul 29 13:36:53 2015 -0700
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ * @test
+ * @bug 8027607
+ * @summary Test UTF-8 based properties files can be loaded successfully,
+ * @run main CodePointTest
+ * @run main/othervm -Djava.util.PropertyResourceBundle.encoding=ISO-8859-1 CodePointTest
+ * @run main/othervm -Djava.util.PropertyResourceBundle.encoding=UTF-8 CodePointTest
+ */
+
+import java.io.*;
+import java.nio.charset.*;
+import java.nio.file.*;
+import java.util.*;
+import static java.util.ResourceBundle.Control;
+import java.util.stream.*;
+
+/*
+ * Dumps every legal characters in ISO-8859-1/UTF-8 into
+ * a <CharSet>.properties file. Each entry has a form of
+ * "keyXXXX=c", where "XXXX" is a code point (variable length)
+ * and "c" is the character encoded in the passed character set.
+ * Then, load it with ResourceBundle.Control.newBundle() and compare both
+ * contents. This confirms the following two functions:
+ * - For UTF-8.properties, UTF-8 code points are loaded correctly
+ * - For ISO-8859-1.properties, UTF-8->ISO-8859-1 fallback works
+ *
+ * Does the same test with "java.util.PropertyResourceBundle.encoding"
+ * to "ISO-8859-1", and confirms only UTF-8 properties loading fails.
+ */
+public class CodePointTest {
+ static final Charset[] props = {StandardCharsets.ISO_8859_1,
+ StandardCharsets.UTF_8,
+ StandardCharsets.US_ASCII};
+ static final String encoding =
+ System.getProperty("java.util.PropertyResourceBundle.encoding", "");
+
+ public static void main(String[] args) {
+ for (Charset cs : props) {
+ try {
+ checkProps(cs,
+ cs == StandardCharsets.UTF_8 &&
+ encoding.equals("ISO-8859-1"));
+
+ if (cs == StandardCharsets.ISO_8859_1 &&
+ encoding.equals("UTF-8")) {
+ // should not happen
+ throw new RuntimeException("Reading ISO-8859-1 properties in "+
+ "strict UTF-8 encoding should throw an exception");
+ }
+ } catch (IOException e) {
+ if ((e instanceof MalformedInputException ||
+ e instanceof UnmappableCharacterException) &&
+ cs == StandardCharsets.ISO_8859_1 &&
+ encoding.equals("UTF-8")) {
+ // Expected exception is correctly detected.
+ } else {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+ }
+
+ static void checkProps(Charset cs, boolean shouldFail) throws IOException {
+ int start = Character.MIN_CODE_POINT;
+ int end= 0;
+
+ switch (cs.name()) {
+ case "ISO-8859-1":
+ end = 0xff;
+ break;
+ case "UTF-8":
+ end = Character.MAX_CODE_POINT;
+ break;
+ case "US-ASCII":
+ end = 0x7f;
+ break;
+ default:
+ assert false;
+ }
+
+ Properties p = new Properties();
+ String outputName = cs.name() + ".properties";
+
+ // Forget previous test artifacts
+ ResourceBundle.clearCache();
+
+ IntStream.range(start, end+1).forEach(c ->
+ {
+ if (Character.isDefined(c) &&
+ (Character.isSupplementaryCodePoint(c) ||
+ !Character.isSurrogate((char)c))) {
+ p.setProperty("key"+Integer.toHexString(c),
+ Character.isSupplementaryCodePoint(c) ?
+ String.valueOf(Character.toChars(c)) :
+ Character.toString((char)c));
+ }
+ }
+ );
+
+ try (BufferedWriter bw = Files.newBufferedWriter(
+ FileSystems.getDefault().getPath(System.getProperty("test.classes", "."),
+ outputName), cs)) {
+ p.store(bw, null);
+ } catch (IOException ex) {
+ throw new RuntimeException(ex);
+ }
+
+ // try loading it
+ Control c = Control.getControl(Control.FORMAT_PROPERTIES);
+ ResourceBundle rb;
+ try {
+ rb = c.newBundle(cs.name(), Locale.ROOT, "java.properties",
+ CodePointTest.class.getClassLoader(), false);
+ } catch (IllegalAccessException |
+ InstantiationException ex) {
+ throw new RuntimeException(ex);
+ }
+ Properties result = new Properties();
+ rb.keySet().stream().forEach((key) -> {
+ result.setProperty(key, rb.getString(key));
+ });
+
+ if (!p.equals(result) && !shouldFail) {
+ System.out.println("Charset: "+cs);
+ rb.keySet().stream().sorted().forEach((key) -> {
+ if (!p.getProperty(key).equals(result.getProperty(key))) {
+ System.out.println(key+": file: "+p.getProperty(key)+", RB: "+result.getProperty(key));
+ }
+ });
+ throw new RuntimeException("not equal!");
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/test/java/util/ResourceBundle/UTF8Properties/IllegalSequenceTest.java Wed Jul 29 13:36:53 2015 -0700
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ * @test
+ * @bug 8027607
+ * @summary Test whether illegal UTF-8 sequences are handled correctly.
+ * @run main/othervm -Djava.util.PropertyResourceBundle.encoding=UTF-8 IllegalSequenceTest
+ */
+
+import java.io.*;
+import java.nio.charset.*;
+import java.util.*;
+
+public class IllegalSequenceTest {
+ static final byte[][] illegalSequences = {
+ {(byte)0xc0, (byte)0xaf}, // non-shortest UTF-8
+ {(byte)0xc2, (byte)0xe0}, // consecutive leading bytes
+ {(byte)0xc2, (byte)0x80, (byte)0x80}, // two byte leading + 2 trailing
+ {(byte)0xe0, (byte)0x80}, // three byte leading + 1 trailing
+ {(byte)0xf4, (byte)0x90, (byte)0x80, (byte)0x80}, // 0x110000 (over U+10FFFF)
+ };
+
+ public static void main(String[] args) throws IOException {
+ for (byte[] illegalSec: illegalSequences) {
+ try (InputStream is = new ByteArrayInputStream(illegalSec)) {
+ ResourceBundle rb = new PropertyResourceBundle(is);
+ rb.getString("key");
+ } catch (MalformedInputException |
+ UnmappableCharacterException e) {
+ // success
+ continue;
+ }
+ throw new RuntimeException("Excepted exception was not thrown.");
+ }
+ }
+}