--- a/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToStream.java Thu Apr 12 08:38:26 2012 -0700
+++ b/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToStream.java Tue Apr 17 11:17:59 2012 -0700
@@ -919,7 +919,8 @@
{
// This is the old/fast code here, but is this
// correct for all encodings?
- if (ch >= 0x20 || (0x0A == ch || 0x0D == ch || 0x09 == ch))
+ if (ch >= CharInfo.S_SPACE || (CharInfo.S_LINEFEED == ch ||
+ CharInfo.S_CARRIAGERETURN == ch || CharInfo.S_HORIZONAL_TAB == ch))
ret= true;
else
ret = false;
@@ -1028,7 +1029,7 @@
*
* @throws java.io.IOException
*/
- protected int accumDefaultEntity(
+ int accumDefaultEntity(
java.io.Writer writer,
char ch,
int i,
@@ -1047,7 +1048,7 @@
{
// if this is text node character and a special one of those,
// or if this is a character from attribute value and a special one of those
- if ((fromTextNode && m_charInfo.isSpecialTextChar(ch)) || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch)))
+ if ((fromTextNode && m_charInfo.shouldMapTextChar(ch)) || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch)))
{
String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
@@ -1398,7 +1399,6 @@
if (m_cdataTagOpen)
closeCDATA();
- // the check with _escaping is a bit of a hack for XLSTC
if (m_disableOutputEscapingStates.peekOrFalse() || (!m_escaping))
{
@@ -1421,82 +1421,173 @@
try
{
int i;
- char ch1;
int startClean;
// skip any leading whitspace
// don't go off the end and use a hand inlined version
// of isWhitespace(ch)
final int end = start + length;
- int lastDirty = start - 1; // last character that needed processing
- for (i = start;
- ((i < end)
- && ((ch1 = chars[i]) == 0x20
- || (ch1 == 0xA && m_lineSepUse)
- || ch1 == 0xD
- || ch1 == 0x09));
- i++)
- {
- /*
- * We are processing leading whitespace, but are doing the same
- * processing for dirty characters here as for non-whitespace.
- *
- */
- if (!m_charInfo.isTextASCIIClean(ch1))
- {
- lastDirty = processDirty(chars,end, i,ch1, lastDirty, true);
- i = lastDirty;
+ int lastDirtyCharProcessed = start - 1; // last non-clean character that was processed
+ // that was processed
+ final Writer writer = m_writer;
+ boolean isAllWhitespace = true;
+
+ // process any leading whitspace
+ i = start;
+ while (i < end && isAllWhitespace) {
+ char ch1 = chars[i];
+
+ if (m_charInfo.shouldMapTextChar(ch1)) {
+ // The character is supposed to be replaced by a String
+ // so write out the clean whitespace characters accumulated
+ // so far
+ // then the String.
+ writeOutCleanChars(chars, i, lastDirtyCharProcessed);
+ String outputStringForChar = m_charInfo
+ .getOutputStringForChar(ch1);
+ writer.write(outputStringForChar);
+ // We can't say that everything we are writing out is
+ // all whitespace, we just wrote out a String.
+ isAllWhitespace = false;
+ lastDirtyCharProcessed = i; // mark the last non-clean
+ // character processed
+ i++;
+ } else {
+ // The character is clean, but is it a whitespace ?
+ switch (ch1) {
+ // TODO: Any other whitespace to consider?
+ case CharInfo.S_SPACE:
+ // Just accumulate the clean whitespace
+ i++;
+ break;
+ case CharInfo.S_LINEFEED:
+ lastDirtyCharProcessed = processLineFeed(chars, i,
+ lastDirtyCharProcessed, writer);
+ i++;
+ break;
+ case CharInfo.S_CARRIAGERETURN:
+ writeOutCleanChars(chars, i, lastDirtyCharProcessed);
+ writer.write(" ");
+ lastDirtyCharProcessed = i;
+ i++;
+ break;
+ case CharInfo.S_HORIZONAL_TAB:
+ // Just accumulate the clean whitespace
+ i++;
+ break;
+ default:
+ // The character was clean, but not a whitespace
+ // so break the loop to continue with this character
+ // (we don't increment index i !!)
+ isAllWhitespace = false;
+ break;
}
}
+ }
/* If there is some non-whitespace, mark that we may need
* to preserve this. This is only important if we have indentation on.
*/
- if (i < end)
+ if (i < end || !isAllWhitespace)
m_ispreserve = true;
-
-// int lengthClean; // number of clean characters in a row
-// final boolean[] isAsciiClean = m_charInfo.getASCIIClean();
-
- final boolean isXML10 = XMLVERSION10.equals(getVersion());
- // we've skipped the leading whitespace, now deal with the rest
for (; i < end; i++)
{
- {
- // A tight loop to skip over common clean chars
- // This tight loop makes it easier for the JIT
- // to optimize.
- char ch2;
- while (i<end
- && ((ch2 = chars[i])<127)
- && m_charInfo.isTextASCIIClean(ch2))
- i++;
- if (i == end)
- break;
+ char ch = chars[i];
+
+ if (m_charInfo.shouldMapTextChar(ch)) {
+ // The character is supposed to be replaced by a String
+ // e.g. '&' --> "&"
+ // e.g. '<' --> "<"
+ writeOutCleanChars(chars, i, lastDirtyCharProcessed);
+ String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
+ writer.write(outputStringForChar);
+ lastDirtyCharProcessed = i;
}
-
- final char ch = chars[i];
- /* The check for isCharacterInC0orC1Ranger and
- * isNELorLSEPCharacter has been added
- * to support Control Characters in XML 1.1
- */
- if (!isCharacterInC0orC1Range(ch) &&
- (isXML10 || !isNELorLSEPCharacter(ch)) &&
- (escapingNotNeeded(ch) && (!m_charInfo.isSpecialTextChar(ch)))
- || ('"' == ch))
- {
- ; // a character needing no special processing
+ else {
+ if (ch <= 0x1F) {
+ // Range 0x00 through 0x1F inclusive
+ //
+ // This covers the non-whitespace control characters
+ // in the range 0x1 to 0x1F inclusive.
+ // It also covers the whitespace control characters in the same way:
+ // 0x9 TAB
+ // 0xA NEW LINE
+ // 0xD CARRIAGE RETURN
+ //
+ // We also cover 0x0 ... It isn't valid
+ // but we will output "�"
+
+ // The default will handle this just fine, but this
+ // is a little performance boost to handle the more
+ // common TAB, NEW-LINE, CARRIAGE-RETURN
+ switch (ch) {
+
+ case CharInfo.S_HORIZONAL_TAB:
+ // Leave whitespace TAB as a real character
+ break;
+ case CharInfo.S_LINEFEED:
+ lastDirtyCharProcessed = processLineFeed(chars, i, lastDirtyCharProcessed, writer);
+ break;
+ case CharInfo.S_CARRIAGERETURN:
+ writeOutCleanChars(chars, i, lastDirtyCharProcessed);
+ writer.write(" ");
+ lastDirtyCharProcessed = i;
+ // Leave whitespace carriage return as a real character
+ break;
+ default:
+ writeOutCleanChars(chars, i, lastDirtyCharProcessed);
+ writer.write("&#");
+ writer.write(Integer.toString(ch));
+ writer.write(';');
+ lastDirtyCharProcessed = i;
+ break;
+
}
- else
- {
- lastDirty = processDirty(chars,end, i, ch, lastDirty, true);
- i = lastDirty;
+ }
+ else if (ch < 0x7F) {
+ // Range 0x20 through 0x7E inclusive
+ // Normal ASCII chars, do nothing, just add it to
+ // the clean characters
+
+ }
+ else if (ch <= 0x9F){
+ // Range 0x7F through 0x9F inclusive
+ // More control characters, including NEL (0x85)
+ writeOutCleanChars(chars, i, lastDirtyCharProcessed);
+ writer.write("&#");
+ writer.write(Integer.toString(ch));
+ writer.write(';');
+ lastDirtyCharProcessed = i;
+ }
+ else if (ch == CharInfo.S_LINE_SEPARATOR) {
+ // LINE SEPARATOR
+ writeOutCleanChars(chars, i, lastDirtyCharProcessed);
+ writer.write("
");
+ lastDirtyCharProcessed = i;
+ }
+ else if (m_encodingInfo.isInEncoding(ch)) {
+ // If the character is in the encoding, and
+ // not in the normal ASCII range, we also
+ // just leave it get added on to the clean characters
+
+ }
+ else {
+ // This is a fallback plan, we should never get here
+ // but if the character wasn't previously handled
+ // (i.e. isn't in the encoding, etc.) then what
+ // should we do? We choose to write out an entity
+ writeOutCleanChars(chars, i, lastDirtyCharProcessed);
+ writer.write("&#");
+ writer.write(Integer.toString(ch));
+ writer.write(';');
+ lastDirtyCharProcessed = i;
+ }
}
}
// we've reached the end. Any clean characters at the
// end of the array than need to be written out?
- startClean = lastDirty + 1;
+ startClean = lastDirtyCharProcessed + 1;
if (i > startClean)
{
int lengthClean = i - startClean;
@@ -1515,6 +1606,32 @@
if (m_tracer != null)
super.fireCharEvent(chars, start, length);
}
+
+ private int processLineFeed(final char[] chars, int i, int lastProcessed, final Writer writer) throws IOException {
+ if (!m_lineSepUse
+ || (m_lineSepLen ==1 && m_lineSep[0] == CharInfo.S_LINEFEED)){
+ // We are leaving the new-line alone, and it is just
+ // being added to the 'clean' characters,
+ // so the last dirty character processed remains unchanged
+ }
+ else {
+ writeOutCleanChars(chars, i, lastProcessed);
+ writer.write(m_lineSep, 0, m_lineSepLen);
+ lastProcessed = i;
+ }
+ return lastProcessed;
+ }
+
+ private void writeOutCleanChars(final char[] chars, int i, int lastProcessed) throws IOException {
+ int startClean;
+ startClean = lastProcessed + 1;
+ if (startClean < i)
+ {
+ int lengthClean = i - startClean;
+ m_writer.write(chars, startClean, lengthClean);
+ }
+ }
+
/**
* This method checks if a given character is between C0 or C1 range
* of Control characters.
@@ -1634,7 +1751,7 @@
*
* @throws org.xml.sax.SAXException
*/
- protected int accumDefaultEscape(
+ private int accumDefaultEscape(
Writer writer,
char ch,
int i,
@@ -1698,16 +1815,15 @@
* to write it out as Numeric Character Reference(NCR) regardless of XML Version
* being used for output document.
*/
- if (isCharacterInC0orC1Range(ch) ||
- (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
+ if (isCharacterInC0orC1Range(ch) || isNELorLSEPCharacter(ch))
{
writer.write("&#");
writer.write(Integer.toString(ch));
writer.write(';');
}
else if ((!escapingNotNeeded(ch) ||
- ( (fromTextNode && m_charInfo.isSpecialTextChar(ch))
- || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
+ ( (fromTextNode && m_charInfo.shouldMapTextChar(ch))
+ || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch))))
&& m_elemContext.m_currentElemDepth > 0)
{
writer.write("&#");
@@ -1971,28 +2087,86 @@
string.getChars(0,len, m_attrBuff, 0);
final char[] stringChars = m_attrBuff;
- for (int i = 0; i < len; )
+ for (int i = 0; i < len;)
{
char ch = stringChars[i];
- if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
- {
- writer.write(ch);
- i++;
- }
- else
- { // I guess the parser doesn't normalize cr/lf in attributes. -sb
-// if ((CharInfo.S_CARRIAGERETURN == ch)
-// && ((i + 1) < len)
-// && (CharInfo.S_LINEFEED == stringChars[i + 1]))
-// {
-// i++;
-// ch = CharInfo.S_LINEFEED;
-// }
-
+
+ if (m_charInfo.shouldMapAttrChar(ch) || !(escapingNotNeeded(ch))) {
+ // The character is supposed to be replaced by a String
+ // e.g. '&' --> "&"
+ // e.g. '<' --> "<"
i = accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
}
+ else {
+ i++;
+ if (0x0 <= ch && ch <= 0x1F) {
+ // Range 0x00 through 0x1F inclusive
+ // This covers the non-whitespace control characters
+ // in the range 0x1 to 0x1F inclusive.
+ // It also covers the whitespace control characters in the same way:
+ // 0x9 TAB
+ // 0xA NEW LINE
+ // 0xD CARRIAGE RETURN
+ //
+ // We also cover 0x0 ... It isn't valid
+ // but we will output "�"
+
+ // The default will handle this just fine, but this
+ // is a little performance boost to handle the more
+ // common TAB, NEW-LINE, CARRIAGE-RETURN
+ switch (ch) {
+
+ case CharInfo.S_HORIZONAL_TAB:
+ writer.write("	");
+ break;
+ case CharInfo.S_LINEFEED:
+ writer.write(" ");
+ break;
+ case CharInfo.S_CARRIAGERETURN:
+ writer.write(" ");
+ break;
+ default:
+ writer.write("&#");
+ writer.write(Integer.toString(ch));
+ writer.write(';');
+ break;
+
}
-
+ }
+ else if (ch < 0x7F) {
+ // Range 0x20 through 0x7E inclusive
+ // Normal ASCII chars
+ writer.write(ch);
+ }
+ else if (ch <= 0x9F){
+ // Range 0x7F through 0x9F inclusive
+ // More control characters
+ writer.write("&#");
+ writer.write(Integer.toString(ch));
+ writer.write(';');
+ }
+ else if (ch == CharInfo.S_LINE_SEPARATOR) {
+ // LINE SEPARATOR
+ writer.write("
");
+ }
+ else if (m_encodingInfo.isInEncoding(ch)) {
+ // If the character is in the encoding, and
+ // not in the normal ASCII range, we also
+ // just write it out
+ writer.write(ch);
+ }
+ else {
+ // This is a fallback plan, we should never get here
+ // but if the character wasn't previously handled
+ // (i.e. isn't in the encoding, etc.) then what
+ // should we do? We choose to write out a character ref
+ writer.write("&#");
+ writer.write(Integer.toString(ch));
+ writer.write(';');
+ }
+
+ }
+ }
}
/**
@@ -2219,13 +2393,17 @@
try
{
- if (shouldIndent())
+ if (shouldIndent() && m_isStandalone)
indent();
final int limit = start + length;
boolean wasDash = false;
if (m_cdataTagOpen)
closeCDATA();
+
+ if (shouldIndent() && !m_isStandalone)
+ indent();
+
final java.io.Writer writer = m_writer;
writer.write(COMMENT_BEGIN);
// Detect occurrences of two consecutive dashes, handle as necessary.
@@ -2258,6 +2436,15 @@
throw new SAXException(e);
}
+ /*
+ * Don't write out any indentation whitespace now,
+ * because there may be non-whitespace text after this.
+ *
+ * Simply mark that at this point if we do decide
+ * to indent that we should
+ * add a newline on the end of the current line before
+ * the indentation at the start of the next line.
+ */
m_startNewLine = true;
// time to generate comment event
if (m_tracer != null)
@@ -2506,7 +2693,7 @@
*/
protected boolean shouldIndent()
{
- return m_doIndent && (!m_ispreserve && !m_isprevtext);
+ return m_doIndent && (!m_ispreserve && !m_isprevtext) && (m_elemContext.m_currentElemDepth > 0 || m_isStandalone);
}
/**
@@ -2749,6 +2936,14 @@
closeCDATA();
m_cdataTagOpen = false;
}
+ if (m_writer != null) {
+ try {
+ m_writer.flush();
+ }
+ catch(IOException e) {
+ // what? me worry?
+ }
+ }
}
public void setContentHandler(ContentHandler ch)