src/demo/share/jpackager/JNLPConverter/src/jnlp/converter/parser/xml/XMLEncoding.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/demo/share/jpackager/JNLPConverter/src/jnlp/converter/parser/xml/XMLEncoding.java Fri Oct 12 19:00:51 2018 -0400
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2006, 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package jnlp.converter.parser.xml;
+
+import java.io.ByteArrayInputStream;
+import java.io.EOFException;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.UnsupportedEncodingException;
+
+public class XMLEncoding {
+ /**
+ * Decodes a byte stream into a String by testing for a Byte Order Mark
+ * (BOM) or an XML declaration.
+ * <br />
+ * Detection begins by examining the first four octets of the stream for a
+ * BOM. If a BOM is not found, then an encoding declaration is looked for
+ * at the beginning of the stream. If the encoding still can not be
+ * determined at this point, then UTF-8 is assumed.
+ *
+ * @param data an array of bytes containing an encoded XML document.
+ *
+ * @return A string containing the decoded XML document.
+ */
+ public static String decodeXML(byte [] data) throws IOException {
+ int start = 0;
+ String encoding;
+
+ if (data.length < BOM_LENGTH) {
+ throw (new EOFException("encoding.error.not.xml"));
+ }
+ // no else required; successfully read stream
+ int firstFour = ((0xff000000 & ((int) data[0] << 24)) |
+ (0x00ff0000 & ((int) data[1] << 16)) |
+ (0x0000ff00 & ((int) data[2] << 8)) |
+ (0x000000ff & (int) data[3]));
+
+ // start by examining the first four bytes for a BOM
+ switch (firstFour) {
+ case EBCDIC:
+ // examine the encoding declaration
+ encoding = examineEncodingDeclaration(data, IBM037_ENC);
+ break;
+
+ case XML_DECLARATION:
+ // assume UTF-8, but examine the encoding declaration
+ encoding = examineEncodingDeclaration(data, UTF_8_ENC);
+ break;
+
+ case UTF_16BE:
+ encoding = UTF_16BE_ENC;
+ break;
+
+ case UTF_16LE:
+ encoding = UTF_16LE_ENC;
+ break;
+
+ case UNUSUAL_OCTET_1:
+ case UNUSUAL_OCTET_2:
+ throw (new UnsupportedEncodingException("encoding.error.unusual.octet"));
+
+ case UTF_32_BE_BOM:
+ case UTF_32_LE_BOM:
+ encoding = UTF_32_ENC;
+ break;
+
+ default:
+ int firstThree = firstFour & 0xffffff00;
+
+ switch (firstThree) {
+ case UTF_8_BOM:
+ // the InputStreamReader class doen't properly handle
+ // the Byte Order Mark (BOM) in UTF-8 streams, so don't
+ // putback those 3 bytes.
+ start = 3;
+ encoding = UTF_8_ENC;
+ break;
+
+ default:
+ int firstTwo = firstFour & 0xffff0000;
+
+ switch (firstTwo) {
+ case UTF_16_BE_BOM:
+ case UTF_16_LE_BOM:
+ encoding = UTF_16_ENC;
+ break;
+
+ default:
+ // this is probably UTF-8 without the encoding
+ // declaration
+ encoding = UTF_8_ENC;
+ break;
+ }
+ break;
+ }
+ break;
+ }
+
+ return (new String(data, start, data.length - start, encoding));
+ }
+
+ /**
+ * [3] S ::= ( #x20 | #x09 | #x0d | #x0a )
+ * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
+ * [24] VersionInfo ::= S 'version' Eq ( '"' VersionNum '"' |
+ * "'" VersionNum "'" )
+ * [25] Eq ::= S? '=' S?
+ * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
+ * [80] EncodingDecl ::= S 'encoding' Eq ( '"' EncName '"' |
+ * "'" EncName "'" )
+ * [81] EncName ::= [a-zA-Z] ([a-zA-Z0-9_.] | '-')*
+ */
+ private static String examineEncodingDeclaration(byte [] data,
+ String encoding) throws IOException {
+ boolean loop = false;
+ boolean recognized = false;
+ boolean almost = false;
+ boolean question = false;
+ boolean done = false;
+ boolean found = false;
+ int pos = 0;
+ int ch = -1;
+ Reader reader = null;
+ String result = ((encoding != null) ? encoding : UTF_8_ENC);
+
+ reader = new InputStreamReader(new ByteArrayInputStream(data), result);
+ ch = reader.read();
+
+ // if this is an XML declaration, it will start with the text '<?xml'
+ for (int i = 0; ((i < XML_DECL_START.length()) && (done == false)); i++) {
+ if (ch != XML_DECL_START.charAt(i)) {
+ // This doesn't look like an XML declaration. This method
+ // should only be called if the stream contains an XML
+ // declaration in the encoding that is passed into the method.
+ done = true;
+ break;
+ }
+ // no else required; still matches
+ ch = reader.read();
+ }
+
+ // there must be at least one whitespace character next.
+ loop = true;
+ while ((loop == true) && (done == false)) {
+ switch (ch) {
+ case SPACE:
+ case TAB: // intentional
+ case LINEFEED: // fall
+ case RETURN: // through
+ ch = reader.read();
+ break;
+
+ case -1:
+ // unexpected EOF
+ done = true;
+ break;
+
+ default:
+ // non-whitespace
+ loop = false;
+ break;
+ }
+ }
+
+ // now look for the text 'encoding', but if the end of the XML
+ // declaration (signified by the text '?>') comes first, then
+ // assume the encoding is UTF-8
+ loop = true;
+ while ((loop == true) && (done == false)) {
+ if (ch == -1) {
+ // unexpected EOF
+ done = true;
+ break;
+ } else if (recognized == true) {
+ // this is the encoding declaration as long as the next few
+ // characters are whitespace and/or the equals ('=') sign
+ switch (ch) {
+ case SPACE: // intentional
+ case TAB: // fall
+ case LINEFEED: // through
+ case RETURN:
+ // don't need to do anything
+ break;
+
+ case EQUAL:
+ if (almost == false) {
+ // got the equal, now find a quote
+ almost = true;
+ } else {
+ // this is not valid XML, so punt
+ recognized = false;
+ done = true;
+ }
+ break;
+
+ case DOUBLE_QUOTE: // intentional
+ case SINGLE_QUOTE: // fall through
+ if (almost == true) {
+ // got the quote, so move on to get the value
+ loop = false;
+ } else {
+ // got a quote before the equal; this is not valid
+ // XML, so punt
+ recognized = false;
+ done = true;
+ }
+ break;
+
+ default:
+ // non-whitespace
+ recognized = false;
+ if (almost == true) {
+ // this is not valid XML, so punt
+ done = true;
+ }
+ // no else required; this wasn't the encoding
+ // declaration
+ break;
+ }
+
+ if (recognized == false) {
+ // this isn't the encoding declaration, so go back to the
+ // top without reading the next character
+ pos = 0;
+ continue;
+ }
+ // no else required; still looking good
+ } else if (ch == ENCODING_DECL.charAt(pos++)) {
+ if (ENCODING_DECL.length() == pos) {
+ // this looks like the encoding declaration
+ recognized = true;
+ }
+ // no else required; this might be the encoding declaration
+ } else if (ch == '?') {
+ question = true;
+ pos = 0;
+ } else if ((ch == '>') && (question == true)) {
+ // there is no encoding declaration, so assume that the initial
+ // encoding guess was correct
+ done = true;
+ continue;
+ } else {
+ // still searching for the encoding declaration
+ pos = 0;
+ }
+
+ ch = reader.read();
+ }
+
+ if (done == false) {
+ StringBuilder buffer = new StringBuilder(MAX_ENC_NAME);
+
+ if (((ch >= 'a') && (ch <= 'z')) |
+ ((ch >= 'A') && (ch <= 'Z'))) {
+ // add the character to the result
+ buffer.append((char) ch);
+
+ loop = true;
+ while ((loop == true) && (done == false)) {
+ ch = reader.read();
+
+ if (((ch >= 'a') && (ch <= 'z')) ||
+ ((ch >= 'A') && (ch <= 'Z')) ||
+ ((ch >= '0') && (ch <= '9')) ||
+ (ch == '_') || (ch == '.') || (ch == '-')) {
+ // add the character to the result
+ buffer.append((char) ch);
+ } else if ((ch == DOUBLE_QUOTE) || (ch == SINGLE_QUOTE)) {
+ // finished!
+ found = true;
+ done = true;
+ result = buffer.toString();
+ } else {
+ // this is not a valid encoding name, so punt
+ done = true;
+ }
+ }
+ } else {
+ // this is not a valid encoding name, so punt
+ done = true;
+ }
+ }
+ // no else required; already failed to find the encoding somewhere else
+
+ return (result);
+ }
+
+ private static final int BOM_LENGTH = 4;
+ private static final int MAX_ENC_NAME = 512;
+
+ private static final int SPACE = 0x00000020;
+ private static final int TAB = 0x00000009;
+ private static final int LINEFEED = 0x0000000a;
+ private static final int RETURN = 0x0000000d;
+ private static final int EQUAL = '=';
+ private static final int DOUBLE_QUOTE = '\"';
+ private static final int SINGLE_QUOTE = '\'';
+
+ private static final int UTF_32_BE_BOM = 0x0000feff;
+ private static final int UTF_32_LE_BOM = 0xfffe0000;
+ private static final int UTF_16_BE_BOM = 0xfeff0000;
+ private static final int UTF_16_LE_BOM = 0xfffe0000;
+ private static final int UTF_8_BOM = 0xefbbbf00;
+ private static final int UNUSUAL_OCTET_1 = 0x00003c00;
+ private static final int UNUSUAL_OCTET_2 = 0x003c0000;
+ private static final int UTF_16BE = 0x003c003f;
+ private static final int UTF_16LE = 0x3c003f00;
+ private static final int EBCDIC = 0x4c6fa794;
+ private static final int XML_DECLARATION = 0x3c3f786d;
+
+ private static final String UTF_32_ENC = "UTF-32";
+ private static final String UTF_16_ENC = "UTF-16";
+ private static final String UTF_16BE_ENC = "UTF-16BE";
+ private static final String UTF_16LE_ENC = "UTF-16LE";
+ private static final String UTF_8_ENC = "UTF-8";
+ private static final String IBM037_ENC = "IBM037";
+
+ private static final String XML_DECL_START = "<?xml";
+ private static final String ENCODING_DECL = "encoding";
+}