8207760: SAXException: Invalid UTF-16 surrogate detected: d83c ?
authorjoehw
Tue, 18 Sep 2018 09:44:20 -0700
changeset 51786 c93f14a4ae29
parent 51785 05b05af6c160
child 51787 ba51515b64e5
8207760: SAXException: Invalid UTF-16 surrogate detected: d83c ? Reviewed-by: lancea, dfuchs
src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToHTMLStream.java
src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToStream.java
src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToTextStream.java
test/jaxp/javax/xml/jaxp/unittest/transform/JDK8207760.java
--- a/src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToHTMLStream.java	Tue Sep 18 11:55:33 2018 -0400
+++ b/src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToHTMLStream.java	Tue Sep 18 09:44:20 2018 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
  */
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -40,6 +40,7 @@
  * because it is used from another package.
  *
  * @xsl.usage internal
+ * @LastModified: Sept 2018
  */
 public final class ToHTMLStream extends ToStream
 {
@@ -1049,7 +1050,7 @@
         String name,
         String value,
         ElemDesc elemDesc)
-        throws IOException
+        throws IOException, SAXException
     {
         writer.write(' ');
 
@@ -1373,7 +1374,7 @@
      */
     public void writeAttrString(
         final java.io.Writer writer, String string, String encoding)
-        throws IOException
+        throws IOException, SAXException
     {
         final int end = string.length();
         if (end > m_attrBuff.length)
@@ -1425,13 +1426,16 @@
                 }
                 else
                 {
-                    if (Encodings.isHighUTF16Surrogate(ch))
+                    if (Encodings.isHighUTF16Surrogate(ch) ||
+                            Encodings.isLowUTF16Surrogate(ch))
                     {
-
-                            writeUTF16Surrogate(ch, chars, i, end);
-                            i++; // two input characters processed
-                                 // this increments by one and the for()
-                                 // loop itself increments by another one.
+                        if (writeUTF16Surrogate(ch, chars, i, end) >= 0) {
+                            // move the index if the low surrogate is consumed
+                            // as writeUTF16Surrogate has written the pair
+                            if (Encodings.isHighUTF16Surrogate(ch)) {
+                                i++;
+                            }
+                        }
                     }
 
                     // The next is kind of a hack to keep from escaping in the case
--- a/src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToStream.java	Tue Sep 18 11:55:33 2018 -0400
+++ b/src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToStream.java	Tue Sep 18 09:44:20 2018 -0700
@@ -51,7 +51,7 @@
  * serializers (xml, html, text ...) that write output to a stream.
  *
  * @xsl.usage internal
- * @LastModified: Feb 2018
+ * @LastModified: Sept 2018
  */
 abstract public class ToStream extends SerializerBase {
 
@@ -193,6 +193,8 @@
      */
     private boolean m_expandDTDEntities = true;
 
+    private char m_highSurrogate = 0;
+
     /**
      * Default constructor
      */
@@ -953,45 +955,46 @@
      * @param ch Character array.
      * @param i position Where the surrogate was detected.
      * @param end The end index of the significant characters.
-     * @return 0 if the pair of characters was written out as-is,
-     * the unicode code point of the character represented by
-     * the surrogate pair if an entity reference with that value
-     * was written out.
+     * @return the status of writing a surrogate pair.
+     *        -1 -- nothing is written
+     *         0 -- the pair is written as-is
+     *         code point -- the pair is written as an entity reference
      *
      * @throws IOException
      * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
      */
     protected int writeUTF16Surrogate(char c, char ch[], int i, int end)
-        throws IOException
+        throws IOException, SAXException
     {
-        int codePoint = 0;
+        int status = -1;
         if (i + 1 >= end)
         {
-            throw new IOException(
-                Utils.messages.createMessage(
-                    MsgKey.ER_INVALID_UTF16_SURROGATE,
-                    new Object[] { Integer.toHexString((int) c)}));
+            m_highSurrogate = c;
+            return status;
         }
 
-        final char high = c;
-        final char low = ch[i+1];
+        char high, low;
+        if (m_highSurrogate == 0) {
+            high = c;
+            low = ch[i+1];
+            status = 0;
+        } else {
+            high = m_highSurrogate;
+            low = c;
+            m_highSurrogate = 0;
+        }
+
         if (!Encodings.isLowUTF16Surrogate(low)) {
-            throw new IOException(
-                Utils.messages.createMessage(
-                    MsgKey.ER_INVALID_UTF16_SURROGATE,
-                    new Object[] {
-                        Integer.toHexString((int) c)
-                            + " "
-                            + Integer.toHexString(low)}));
+            throwIOE(high, low);
         }
 
         final Writer writer = m_writer;
 
         // If we make it to here we have a valid high, low surrogate pair
-        if (m_encodingInfo.isInEncoding(c,low)) {
+        if (m_encodingInfo.isInEncoding(high,low)) {
             // If the character formed by the surrogate pair
             // is in the encoding, so just write it out
-            writer.write(ch,i,2);
+            writer.write(new char[]{high, low}, 0, 2);
         }
         else {
             // Don't know what to do with this char, it is
@@ -999,24 +1002,16 @@
             // a surrogate pair, so write out as an entity ref
             final String encoding = getEncoding();
             if (encoding != null) {
-                /* The output encoding is known,
-                 * so somthing is wrong.
-                  */
-                codePoint = Encodings.toCodePoint(high, low);
-                // not in the encoding, so write out a character reference
-                writer.write('&');
-                writer.write('#');
-                writer.write(Integer.toString(codePoint));
-                writer.write(';');
+                status = writeCharRef(writer, high, low);
             } else {
                 /* The output encoding is not known,
                  * so just write it out as-is.
                  */
-                writer.write(ch, i, 2);
+                writer.write(new char[]{high, low}, 0, 2);
             }
         }
         // non-zero only if character reference was written out.
-        return codePoint;
+        return status;
     }
 
     /**
@@ -1106,32 +1101,7 @@
             }
             else if (isCData && (!escapingNotNeeded(c)))
             {
-                //                if (i != 0)
-                if (m_cdataTagOpen)
-                    closeCDATA();
-
-                // This needs to go into a function...
-                if (Encodings.isHighUTF16Surrogate(c))
-                {
-                    writeUTF16Surrogate(c, ch, i, end);
-                    i++ ; // process two input characters
-                }
-                else
-                {
-                    writer.write("&#");
-
-                    String intStr = Integer.toString((int) c);
-
-                    writer.write(intStr);
-                    writer.write(';');
-                }
-
-                //                if ((i != 0) && (i < (end - 1)))
-                //                if (!m_cdataTagOpen && (i < (end - 1)))
-                //                {
-                //                    writer.write(CDATA_DELIMITER_OPEN);
-                //                    m_cdataTagOpen = true;
-                //                }
+                i = handleEscaping(writer, c, ch, i, end);
             }
             else if (
                 isCData
@@ -1155,25 +1125,8 @@
                     }
                     writer.write(c);
                 }
-
-                // This needs to go into a function...
-                else if (Encodings.isHighUTF16Surrogate(c))
-                {
-                    if (m_cdataTagOpen)
-                        closeCDATA();
-                    writeUTF16Surrogate(c, ch, i, end);
-                    i++; // process two input characters
-                }
-                else
-                {
-                    if (m_cdataTagOpen)
-                        closeCDATA();
-                    writer.write("&#");
-
-                    String intStr = Integer.toString((int) c);
-
-                    writer.write(intStr);
-                    writer.write(';');
+                else {
+                    i = handleEscaping(writer, c, ch, i, end);
                 }
             }
         }
@@ -1181,6 +1134,38 @@
     }
 
     /**
+     * Handles escaping, writes either with a surrogate pair or a character
+     * reference.
+     *
+     * @param c the current char
+     * @param ch the character array
+     * @param i the current position
+     * @param end the end index of the array
+     * @return the next index
+     *
+     * @throws IOException
+     * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
+     */
+    private int handleEscaping(Writer writer, char c, char ch[], int i, int end)
+            throws IOException, SAXException {
+        if (Encodings.isHighUTF16Surrogate(c) || Encodings.isLowUTF16Surrogate(c))
+        {
+            if (writeUTF16Surrogate(c, ch, i, end) >= 0) {
+                // move the index if the low surrogate is consumed
+                // as writeUTF16Surrogate has written the pair
+                if (Encodings.isHighUTF16Surrogate(c)) {
+                    i++ ;
+                }
+            }
+        }
+        else
+        {
+            writeCharRef(writer, c);
+        }
+        return i;
+    }
+
+    /**
      * Ends an un-escaping section.
      *
      * @see #startNonEscaping
@@ -1246,7 +1231,7 @@
                 m_elemContext.m_startTagOpen = false;
             }
 
-            if (shouldIndent())
+            if (!m_cdataTagOpen && shouldIndent())
                 indent();
 
             boolean writeCDataBrackets =
@@ -1644,7 +1629,7 @@
         int i,
         char ch,
         int lastDirty,
-        boolean fromTextNode) throws IOException
+        boolean fromTextNode) throws IOException, SAXException
     {
         int startClean = lastDirty + 1;
         // if we have some clean characters accumulated
@@ -1723,54 +1708,40 @@
         int len,
         boolean fromTextNode,
         boolean escLF)
-        throws IOException
+        throws IOException, SAXException
     {
 
         int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF);
 
         if (i == pos)
         {
+            if (m_highSurrogate != 0) {
+                if (!(Encodings.isLowUTF16Surrogate(ch))) {
+                    throwIOE(m_highSurrogate, ch);
+                }
+                writeCharRef(writer, m_highSurrogate, ch);
+                m_highSurrogate = 0;
+                return ++pos;
+            }
+
             if (Encodings.isHighUTF16Surrogate(ch))
             {
-
-                // Should be the UTF-16 low surrogate of the hig/low pair.
-                char next;
-                // Unicode code point formed from the high/low pair.
-                int codePoint = 0;
-
                 if (i + 1 >= len)
                 {
-                    throw new IOException(
-                        Utils.messages.createMessage(
-                            MsgKey.ER_INVALID_UTF16_SURROGATE,
-                            new Object[] { Integer.toHexString(ch)}));
-                    //"Invalid UTF-16 surrogate detected: "
-
-                    //+Integer.toHexString(ch)+ " ?");
+                    // save for the next read
+                    m_highSurrogate = ch;
+                    pos++;
                 }
                 else
                 {
-                    next = chars[++i];
-
+                    // the next should be the UTF-16 low surrogate of the hig/low pair.
+                    char next = chars[++i];
                     if (!(Encodings.isLowUTF16Surrogate(next)))
-                        throw new IOException(
-                            Utils.messages.createMessage(
-                                MsgKey
-                                    .ER_INVALID_UTF16_SURROGATE,
-                                new Object[] {
-                                    Integer.toHexString(ch)
-                                        + " "
-                                        + Integer.toHexString(next)}));
-                    //"Invalid UTF-16 surrogate detected: "
-
-                    //+Integer.toHexString(ch)+" "+Integer.toHexString(next));
-                    codePoint = Encodings.toCodePoint(ch,next);
+                        throwIOE(ch, next);
+
+                    writeCharRef(writer, ch, next);
+                    pos += 2; // count the two characters that went into writing out this entity
                 }
-
-                writer.write("&#");
-                writer.write(Integer.toString(codePoint));
-                writer.write(';');
-                pos += 2; // count the two characters that went into writing out this entity
             }
             else
             {
@@ -1782,18 +1753,14 @@
                 if (isCharacterInC0orC1Range(ch) ||
                         (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
                 {
-                    writer.write("&#");
-                    writer.write(Integer.toString(ch));
-                    writer.write(';');
+                    writeCharRef(writer, ch);
                 }
                 else if ((!escapingNotNeeded(ch) ||
                     (  (fromTextNode && m_charInfo.isSpecialTextChar(ch))
                      || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
-                && m_elemContext.m_currentElemDepth > 0)
+                     && m_elemContext.m_currentElemDepth > 0)
                 {
-                    writer.write("&#");
-                    writer.write(Integer.toString(ch));
-                    writer.write(';');
+                    writeCharRef(writer, ch);
                 }
                 else
                 {
@@ -1807,6 +1774,45 @@
     }
 
     /**
+     * Writes out a character reference.
+     * @param writer the writer
+     * @param c the character
+     * @throws IOException
+     */
+    private void writeCharRef(Writer writer, char c) throws IOException, SAXException {
+        if (m_cdataTagOpen)
+            closeCDATA();
+        writer.write("&#");
+        writer.write(Integer.toString(c));
+        writer.write(';');
+    }
+
+    /**
+     * Writes out a pair of surrogates as a character reference
+     * @param writer the writer
+     * @param high the high surrogate
+     * @param low the low surrogate
+     * @throws IOException
+     */
+    private int writeCharRef(Writer writer, char high, char low) throws IOException, SAXException {
+        if (m_cdataTagOpen)
+            closeCDATA();
+        // Unicode code point formed from the high/low pair.
+        int codePoint = Encodings.toCodePoint(high, low);
+        writer.write("&#");
+        writer.write(Integer.toString(codePoint));
+        writer.write(';');
+        return codePoint;
+    }
+
+    private void throwIOE(char ch, char next) throws IOException {
+        throw new IOException(Utils.messages.createMessage(
+                MsgKey.ER_INVALID_UTF16_SURROGATE,
+                new Object[] {Integer.toHexString(ch) + " "
+                        + Integer.toHexString(next)}));
+    }
+
+    /**
      * Receive notification of the beginning of an element, although this is a
      * SAX method additional namespace or attribute information can occur before
      * or after this call, that is associated with this element.
@@ -2053,7 +2059,7 @@
         Writer writer,
         String string,
         String encoding)
-        throws IOException
+        throws IOException, SAXException
     {
         final int len = string.length();
         if (len > m_attrBuff.length)
--- a/src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToTextStream.java	Tue Sep 18 11:55:33 2018 -0400
+++ b/src/java.xml/share/classes/com/sun/org/apache/xml/internal/serializer/ToTextStream.java	Tue Sep 18 09:44:20 2018 -0700
@@ -1,6 +1,5 @@
 /*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
  */
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -34,6 +33,7 @@
  * This class converts SAX or SAX-like calls to a
  * serialized document for xsl:output method of "text".
  * @xsl.usage internal
+ * @LastModified: Sept 2018
  */
 public final class ToTextStream extends ToStream
 {
@@ -295,23 +295,32 @@
             } else if (m_encodingInfo.isInEncoding(c)) {
                 writer.write(c);
                 // one input char processed
-            } else if (Encodings.isHighUTF16Surrogate(c)) {
+            } else if (Encodings.isHighUTF16Surrogate(c) ||
+                       Encodings.isLowUTF16Surrogate(c)) {
                 final int codePoint = writeUTF16Surrogate(c, ch, i, end);
-                if (codePoint != 0) {
-                    // I think we can just emit the message,
-                    // not crash and burn.
-                    final String integralValue = Integer.toString(codePoint);
-                    final String msg = Utils.messages.createMessage(
-                        MsgKey.ER_ILLEGAL_CHARACTER,
-                        new Object[] { integralValue, encoding });
+                if (codePoint >= 0) {
+                    // move the index if the low surrogate is consumed
+                    // as writeUTF16Surrogate has written the pair
+                    if (Encodings.isHighUTF16Surrogate(c)) {
+                        i++;
+                    }
 
-                    //Older behavior was to throw the message,
-                    //but newer gentler behavior is to write a message to System.err
-                    //throw new SAXException(msg);
-                    System.err.println(msg);
+                    // printing to the console is not appropriate, but will leave
+                    // it as is for compatibility.
+                    if (codePoint >0) {
+                        // I think we can just emit the message,
+                        // not crash and burn.
+                        final String integralValue = Integer.toString(codePoint);
+                        final String msg = Utils.messages.createMessage(
+                            MsgKey.ER_ILLEGAL_CHARACTER,
+                            new Object[] { integralValue, encoding });
 
+                        //Older behavior was to throw the message,
+                        //but newer gentler behavior is to write a message to System.err
+                        //throw new SAXException(msg);
+                        System.err.println(msg);
+                    }
                 }
-                i++; // two input chars processed
             } else {
                 // Don't know what to do with this char, it is
                 // not in the encoding and not a high char in
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jaxp/javax/xml/jaxp/unittest/transform/JDK8207760.java	Tue Sep 18 09:44:20 2018 -0700
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package transform;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.nio.charset.StandardCharsets;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.stream.StreamResult;
+import javax.xml.transform.stream.StreamSource;
+
+import org.testng.Assert;
+import org.testng.annotations.Listeners;
+import org.testng.annotations.Test;
+import java.util.Random;
+import javax.xml.transform.OutputKeys;
+import org.testng.annotations.DataProvider;
+
+/*
+ * @test
+ * @library /javax/xml/jaxp/libs /javax/xml/jaxp/unittest
+ * @run testng/othervm transform.JDK8207760
+ * @summary Verifies that a surrogate pair at the edge of a buffer is properly handled
+ * @bug 8207760
+ */
+@Listeners({jaxp.library.FilePolicy.class})
+public class JDK8207760 {
+    final String xsl8207760 =
+        "<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">\n" +
+        "    <xsl:output omit-xml-declaration=\"yes\" indent=\"no\" />\n" +
+        "\n" +
+        "    <xsl:template match=\"node()|@*\">\n" +
+        "        <xsl:copy>\n" +
+        "            <xsl:apply-templates select=\"node()|@*\" />\n" +
+        "        </xsl:copy>\n" +
+        "    </xsl:template>\n" +
+        "</xsl:stylesheet>\n";
+
+    final String xsl8207760_2 = "<xsl:stylesheet \n" +
+        "  version=\"1.0\" \n" +
+        "  xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">\n" +
+        "\n" +
+        "  <xsl:output method=\"xml\" indent=\"no\" cdata-section-elements=\"source\"/>\n" +
+        "\n" +
+        "  <xsl:template match=\"source\">\n" +
+                "        <xsl:copy>\n" +
+                "            <xsl:apply-templates select=\"node()\" />\n" +
+                "        </xsl:copy>\n" +
+        "  </xsl:template>\n" +
+        "\n" +
+        "</xsl:stylesheet>";
+
+    final String xsl8207760_3 = "<xsl:stylesheet \n" +
+        "  version=\"1.0\" \n" +
+        "  xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">\n" +
+        "\n" +
+        "  <xsl:output method=\"xml\" indent=\"no\" cdata-section-elements=\"source\"/>\n" +
+        "\n" +
+        "  <xsl:template match=\"source\">\n" +
+        "    <xsl:copy>\n" +
+        "      <!-- Copy the attributes -->\n" +
+        "      <xsl:apply-templates select=\"@*\"/>\n" +
+        "      <!-- Convert the contained nodes (elements and text) into text -->\n" +
+        "      <xsl:variable name=\"subElementsText\">\n" +
+        "        <xsl:apply-templates select=\"node()\"/>\n" +
+        "      </xsl:variable>\n" +
+        "      <!-- Output the XML directive and the converted nodes -->\n" +
+        "      <xsl:value-of select=\"$subElementsText\"/>\n" +
+        "    </xsl:copy>\n" +
+        "  </xsl:template>\n" +
+        "\n" +
+        "</xsl:stylesheet>";
+
+    @DataProvider(name = "xsls")
+    public Object[][] getDataBug8207760_cdata() {
+        return new Object[][]{
+            {xsl8207760_2},
+            {xsl8207760_3},
+        };
+    }
+
+    /*
+     * @bug 8207760
+     * Verifies that a surrogate pair at the edge of a buffer is properly handled
+     * when serializing into a Character section.
+     */
+    @Test
+    public final void testBug8207760() throws Exception {
+        String[] xmls = prepareXML(false);
+        Transformer t = createTransformerFromInputstream(
+                new ByteArrayInputStream(xsl8207760.getBytes(StandardCharsets.UTF_8)));
+        t.setOutputProperty(OutputKeys.ENCODING, StandardCharsets.UTF_8.name());
+        StringWriter sw = new StringWriter();
+        t.transform(new StreamSource(new StringReader(xmls[0])), new StreamResult(sw));
+        Assert.assertEquals(sw.toString().replaceAll(System.lineSeparator(), "\n"), xmls[1]);
+    }
+
+    /*
+     * @bug 8207760
+     * Verifies that a surrogate pair at the edge of a buffer is properly handled
+     * when serializing into a CDATA section.
+     */
+    @Test(dataProvider = "xsls")
+    public final void testBug8207760_cdata(String xsl) throws Exception {
+        String[] xmls = prepareXML(true);
+        Transformer t = createTransformerFromInputstream(
+                new ByteArrayInputStream(xsl.getBytes(StandardCharsets.UTF_8)));
+        t.setOutputProperty(OutputKeys.ENCODING, StandardCharsets.UTF_8.name());
+        StringWriter sw = new StringWriter();
+        t.transform(new StreamSource(new StringReader(xmls[0])), new StreamResult(sw));
+        Assert.assertEquals(sw.toString().replaceAll(System.lineSeparator(), "\n"), xmls[1]);
+    }
+
+    private String[] prepareXML(boolean cdata) {
+        String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><source>";
+        if (cdata) {
+            xml += "<![CDATA[";
+        }
+        String tail = "abc 123 </source>";
+        if (cdata) {
+            tail = "abc 123 ]]></source>";
+        }
+        String temp = generateString(1023);
+        xml = xml + temp + '\uD83C' + '\uDF42' + tail;
+        //xml = xml + temp + tail;
+        String expected = (!cdata) ? "<source>" + temp + "&#127810;" + tail
+                : xml;
+
+        return new String[]{xml, expected};
+    }
+
+    static final char[] CHARS = "abcdefghijklmnopqrstuvwxyz \n".toCharArray();
+    StringBuilder sb = new StringBuilder(1024 << 4);
+    Random random = new Random();
+
+    private String generateString(int size) {
+        sb.setLength(0);
+        for (int i = 0; i < size; i++) {
+            char c = CHARS[random.nextInt(CHARS.length)];
+            sb.append(c);
+        }
+
+        return sb.toString();
+    }
+
+    private Transformer createTransformerFromInputstream(InputStream xslStream)
+            throws TransformerException {
+        return TransformerFactory.newInstance().newTransformer(new StreamSource(xslStream));
+    }
+}