diff -r 5c5a64ec0839 -r 0a840d92fa30 jaxp/src/com/sun/org/apache/xml/internal/serializer/ToStream.java --- a/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToStream.java Wed Jul 05 18:12:32 2017 +0200 +++ b/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToStream.java Thu Jun 07 13:47:53 2012 -0700 @@ -919,8 +919,7 @@ { // This is the old/fast code here, but is this // correct for all encodings? - if (ch >= CharInfo.S_SPACE || (CharInfo.S_LINEFEED == ch || - CharInfo.S_CARRIAGERETURN == ch || CharInfo.S_HORIZONAL_TAB == ch)) + if (ch >= 0x20 || (0x0A == ch || 0x0D == ch || 0x09 == ch)) ret= true; else ret = false; @@ -1029,7 +1028,7 @@ * * @throws java.io.IOException */ - int accumDefaultEntity( + protected int accumDefaultEntity( java.io.Writer writer, char ch, int i, @@ -1048,7 +1047,7 @@ { // if this is text node character and a special one of those, // or if this is a character from attribute value and a special one of those - if ((fromTextNode && m_charInfo.shouldMapTextChar(ch)) || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch))) + if ((fromTextNode && m_charInfo.isSpecialTextChar(ch)) || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))) { String outputStringForChar = m_charInfo.getOutputStringForChar(ch); @@ -1399,6 +1398,7 @@ if (m_cdataTagOpen) closeCDATA(); + // the check with _escaping is a bit of a hack for XLSTC if (m_disableOutputEscapingStates.peekOrFalse() || (!m_escaping)) { @@ -1421,173 +1421,82 @@ try { int i; + char ch1; int startClean; // skip any leading whitspace // don't go off the end and use a hand inlined version // of isWhitespace(ch) final int end = start + length; - int lastDirtyCharProcessed = start - 1; // last non-clean character that was processed - // that was processed - final Writer writer = m_writer; - boolean isAllWhitespace = true; - - // process any leading whitspace - i = start; - while (i < end && isAllWhitespace) { - char ch1 = chars[i]; - - if (m_charInfo.shouldMapTextChar(ch1)) { - // The character is supposed to be replaced by a String - // so write out the clean whitespace characters accumulated - // so far - // then the String. - writeOutCleanChars(chars, i, lastDirtyCharProcessed); - String outputStringForChar = m_charInfo - .getOutputStringForChar(ch1); - writer.write(outputStringForChar); - // We can't say that everything we are writing out is - // all whitespace, we just wrote out a String. - isAllWhitespace = false; - lastDirtyCharProcessed = i; // mark the last non-clean - // character processed - i++; - } else { - // The character is clean, but is it a whitespace ? - switch (ch1) { - // TODO: Any other whitespace to consider? - case CharInfo.S_SPACE: - // Just accumulate the clean whitespace - i++; - break; - case CharInfo.S_LINEFEED: - lastDirtyCharProcessed = processLineFeed(chars, i, - lastDirtyCharProcessed, writer); - i++; - break; - case CharInfo.S_CARRIAGERETURN: - writeOutCleanChars(chars, i, lastDirtyCharProcessed); - writer.write(" "); - lastDirtyCharProcessed = i; - i++; - break; - case CharInfo.S_HORIZONAL_TAB: - // Just accumulate the clean whitespace - i++; - break; - default: - // The character was clean, but not a whitespace - // so break the loop to continue with this character - // (we don't increment index i !!) - isAllWhitespace = false; - break; + int lastDirty = start - 1; // last character that needed processing + for (i = start; + ((i < end) + && ((ch1 = chars[i]) == 0x20 + || (ch1 == 0xA && m_lineSepUse) + || ch1 == 0xD + || ch1 == 0x09)); + i++) + { + /* + * We are processing leading whitespace, but are doing the same + * processing for dirty characters here as for non-whitespace. + * + */ + if (!m_charInfo.isTextASCIIClean(ch1)) + { + lastDirty = processDirty(chars,end, i,ch1, lastDirty, true); + i = lastDirty; } } - } /* If there is some non-whitespace, mark that we may need * to preserve this. This is only important if we have indentation on. */ - if (i < end || !isAllWhitespace) + if (i < end) m_ispreserve = true; + +// int lengthClean; // number of clean characters in a row +// final boolean[] isAsciiClean = m_charInfo.getASCIIClean(); + + final boolean isXML10 = XMLVERSION10.equals(getVersion()); + // we've skipped the leading whitespace, now deal with the rest for (; i < end; i++) { - char ch = chars[i]; - - if (m_charInfo.shouldMapTextChar(ch)) { - // The character is supposed to be replaced by a String - // e.g. '&' --> "&" - // e.g. '<' --> "<" - writeOutCleanChars(chars, i, lastDirtyCharProcessed); - String outputStringForChar = m_charInfo.getOutputStringForChar(ch); - writer.write(outputStringForChar); - lastDirtyCharProcessed = i; - } - else { - if (ch <= 0x1F) { - // Range 0x00 through 0x1F inclusive - // - // This covers the non-whitespace control characters - // in the range 0x1 to 0x1F inclusive. - // It also covers the whitespace control characters in the same way: - // 0x9 TAB - // 0xA NEW LINE - // 0xD CARRIAGE RETURN - // - // We also cover 0x0 ... It isn't valid - // but we will output "�" - - // The default will handle this just fine, but this - // is a little performance boost to handle the more - // common TAB, NEW-LINE, CARRIAGE-RETURN - switch (ch) { - - case CharInfo.S_HORIZONAL_TAB: - // Leave whitespace TAB as a real character + { + // A tight loop to skip over common clean chars + // This tight loop makes it easier for the JIT + // to optimize. + char ch2; + while (i startClean) { int lengthClean = i - startClean; @@ -1606,32 +1515,6 @@ if (m_tracer != null) super.fireCharEvent(chars, start, length); } - - private int processLineFeed(final char[] chars, int i, int lastProcessed, final Writer writer) throws IOException { - if (!m_lineSepUse - || (m_lineSepLen ==1 && m_lineSep[0] == CharInfo.S_LINEFEED)){ - // We are leaving the new-line alone, and it is just - // being added to the 'clean' characters, - // so the last dirty character processed remains unchanged - } - else { - writeOutCleanChars(chars, i, lastProcessed); - writer.write(m_lineSep, 0, m_lineSepLen); - lastProcessed = i; - } - return lastProcessed; - } - - private void writeOutCleanChars(final char[] chars, int i, int lastProcessed) throws IOException { - int startClean; - startClean = lastProcessed + 1; - if (startClean < i) - { - int lengthClean = i - startClean; - m_writer.write(chars, startClean, lengthClean); - } - } - /** * This method checks if a given character is between C0 or C1 range * of Control characters. @@ -1751,7 +1634,7 @@ * * @throws org.xml.sax.SAXException */ - private int accumDefaultEscape( + protected int accumDefaultEscape( Writer writer, char ch, int i, @@ -1815,15 +1698,16 @@ * to write it out as Numeric Character Reference(NCR) regardless of XML Version * being used for output document. */ - if (isCharacterInC0orC1Range(ch) || isNELorLSEPCharacter(ch)) + if (isCharacterInC0orC1Range(ch) || + (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch))) { writer.write("&#"); writer.write(Integer.toString(ch)); writer.write(';'); } else if ((!escapingNotNeeded(ch) || - ( (fromTextNode && m_charInfo.shouldMapTextChar(ch)) - || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch)))) + ( (fromTextNode && m_charInfo.isSpecialTextChar(ch)) + || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch)))) && m_elemContext.m_currentElemDepth > 0) { writer.write("&#"); @@ -2087,86 +1971,28 @@ string.getChars(0,len, m_attrBuff, 0); final char[] stringChars = m_attrBuff; - for (int i = 0; i < len;) + for (int i = 0; i < len; ) { char ch = stringChars[i]; - - if (m_charInfo.shouldMapAttrChar(ch) || !(escapingNotNeeded(ch))) { - // The character is supposed to be replaced by a String - // e.g. '&' --> "&" - // e.g. '<' --> "<" + if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) + { + writer.write(ch); + i++; + } + else + { // I guess the parser doesn't normalize cr/lf in attributes. -sb +// if ((CharInfo.S_CARRIAGERETURN == ch) +// && ((i + 1) < len) +// && (CharInfo.S_LINEFEED == stringChars[i + 1])) +// { +// i++; +// ch = CharInfo.S_LINEFEED; +// } + i = accumDefaultEscape(writer, ch, i, stringChars, len, false, true); } - else { - i++; - if (0x0 <= ch && ch <= 0x1F) { - // Range 0x00 through 0x1F inclusive - // This covers the non-whitespace control characters - // in the range 0x1 to 0x1F inclusive. - // It also covers the whitespace control characters in the same way: - // 0x9 TAB - // 0xA NEW LINE - // 0xD CARRIAGE RETURN - // - // We also cover 0x0 ... It isn't valid - // but we will output "�" - - // The default will handle this just fine, but this - // is a little performance boost to handle the more - // common TAB, NEW-LINE, CARRIAGE-RETURN - switch (ch) { - - case CharInfo.S_HORIZONAL_TAB: - writer.write(" "); - break; - case CharInfo.S_LINEFEED: - writer.write(" "); - break; - case CharInfo.S_CARRIAGERETURN: - writer.write(" "); - break; - default: - writer.write("&#"); - writer.write(Integer.toString(ch)); - writer.write(';'); - break; - } - } - else if (ch < 0x7F) { - // Range 0x20 through 0x7E inclusive - // Normal ASCII chars - writer.write(ch); - } - else if (ch <= 0x9F){ - // Range 0x7F through 0x9F inclusive - // More control characters - writer.write("&#"); - writer.write(Integer.toString(ch)); - writer.write(';'); - } - else if (ch == CharInfo.S_LINE_SEPARATOR) { - // LINE SEPARATOR - writer.write("
"); - } - else if (m_encodingInfo.isInEncoding(ch)) { - // If the character is in the encoding, and - // not in the normal ASCII range, we also - // just write it out - writer.write(ch); - } - else { - // This is a fallback plan, we should never get here - // but if the character wasn't previously handled - // (i.e. isn't in the encoding, etc.) then what - // should we do? We choose to write out a character ref - writer.write("&#"); - writer.write(Integer.toString(ch)); - writer.write(';'); - } - - } - } + } /** @@ -2936,14 +2762,6 @@ closeCDATA(); m_cdataTagOpen = false; } - if (m_writer != null) { - try { - m_writer.flush(); - } - catch(IOException e) { - // what? me worry? - } - } } public void setContentHandler(ContentHandler ch)