jdk-sandbox: comparison jaxp/src/com/sun/org/apache/xml/internal/serializer/ToStream.java

equal deleted inserted replaced

-:5c5a64ec0839
+:0a840d92fa30
 final boolean ret;
 if (ch < 127)
 {
 // This is the old/fast code here, but is this
 // correct for all encodings?
-if (ch >= CharInfo.S_SPACE || (CharInfo.S_LINEFEED == ch ||
+if (ch >= 0x20 || (0x0A == ch || 0x0D == ch || 0x09 == ch))
-CharInfo.S_CARRIAGERETURN == ch || CharInfo.S_HORIZONAL_TAB == ch))
 ret= true;
 else
 ret = false;
 }
 else {
 *
 * @return i+1 if the character was written, else i.
 *
 * @throws java.io.IOException
 */
-int accumDefaultEntity(
+protected int accumDefaultEntity(
 java.io.Writer writer,
 char ch,
 int i,
 char[] chars,
 int len,
 }
 else
 {
 // if this is text node character and a special one of those,
 // or if this is a character from attribute value and a special one of those
-if ((fromTextNode && m_charInfo.shouldMapTextChar(ch)) || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch)))
+if ((fromTextNode && m_charInfo.isSpecialTextChar(ch)) || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch)))
 {
 String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
 if (null != outputStringForChar)
 {
 return;
 }
 if (m_cdataTagOpen)
 closeCDATA();
+// the check with _escaping is a bit of a hack for XLSTC
 if (m_disableOutputEscapingStates.peekOrFalse() || (!m_escaping))
 {
 charactersRaw(chars, start, length);
 try
 {
 int i;
+char ch1;
 int startClean;
 // skip any leading whitspace
 // don't go off the end and use a hand inlined version
 // of isWhitespace(ch)
 final int end = start + length;
-int lastDirtyCharProcessed = start - 1; // last non-clean character that was processed
+int lastDirty = start - 1; // last character that needed processing
-// that was processed
+for (i = start;
-final Writer writer = m_writer;
+((i < end)
-boolean isAllWhitespace = true;
+&& ((ch1 = chars[i]) == 0x20
+|| (ch1 == 0xA && m_lineSepUse)
-// process any leading whitspace
+|| ch1 == 0xD
-i = start;
+|| ch1 == 0x09));
-while (i < end && isAllWhitespace) {
+i++)
-char ch1 = chars[i];
+{
+/*
-if (m_charInfo.shouldMapTextChar(ch1)) {
+* We are processing leading whitespace, but are doing the same
-// The character is supposed to be replaced by a String
+* processing for dirty characters here as for non-whitespace.
-// so write out the clean whitespace characters accumulated
+*
-// so far
+*/
-// then the String.
+if (!m_charInfo.isTextASCIIClean(ch1))
-writeOutCleanChars(chars, i, lastDirtyCharProcessed);
+{
-String outputStringForChar = m_charInfo
+lastDirty = processDirty(chars,end, i,ch1, lastDirty, true);
-.getOutputStringForChar(ch1);
+i = lastDirty;
-writer.write(outputStringForChar);
-// We can't say that everything we are writing out is
-// all whitespace, we just wrote out a String.
-isAllWhitespace = false;
-lastDirtyCharProcessed = i; // mark the last non-clean
-// character processed
-i++;
-} else {
-// The character is clean, but is it a whitespace ?
-switch (ch1) {
-// TODO: Any other whitespace to consider?
-case CharInfo.S_SPACE:
-// Just accumulate the clean whitespace
-i++;
-break;
-case CharInfo.S_LINEFEED:
-lastDirtyCharProcessed = processLineFeed(chars, i,
-lastDirtyCharProcessed, writer);
-i++;
-break;
-case CharInfo.S_CARRIAGERETURN:
-writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-writer.write("&#13;");
-lastDirtyCharProcessed = i;
-i++;
-break;
-case CharInfo.S_HORIZONAL_TAB:
-// Just accumulate the clean whitespace
-i++;
-break;
-default:
-// The character was clean, but not a whitespace
-// so break the loop to continue with this character
-// (we don't increment index i !!)
-isAllWhitespace = false;
-break;
 }
-}
 }
 /* If there is some non-whitespace, mark that we may need
 * to preserve this. This is only important if we have indentation on.
 */
-if (i < end || !isAllWhitespace)
+if (i < end)
 m_ispreserve = true;
+//            int lengthClean;    // number of clean characters in a row
+//            final boolean[] isAsciiClean = m_charInfo.getASCIIClean();
+final boolean isXML10 = XMLVERSION10.equals(getVersion());
+// we've skipped the leading whitespace, now deal with the rest
 for (; i < end; i++)
 {
-char ch = chars[i];
+{
+// A tight loop to skip over common clean chars
-if (m_charInfo.shouldMapTextChar(ch)) {
+// This tight loop makes it easier for the JIT
-// The character is supposed to be replaced by a String
+// to optimize.
-// e.g.   '&'  -->  "&amp;"
+char ch2;
-// e.g.   '<'  -->  "&lt;"
+while (i<end
-writeOutCleanChars(chars, i, lastDirtyCharProcessed);
+&& ((ch2 = chars[i])<127)
-String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
+&& m_charInfo.isTextASCIIClean(ch2))
-writer.write(outputStringForChar);
+i++;
-lastDirtyCharProcessed = i;
+if (i == end)
+break;
 }
-else {
-if (ch <= 0x1F) {
+final char ch = chars[i];
-// Range 0x00 through 0x1F inclusive
+/*  The check for isCharacterInC0orC1Ranger and
-//
+*  isNELorLSEPCharacter has been added
-// This covers the non-whitespace control characters
+*  to support Control Characters in XML 1.1
-// in the range 0x1 to 0x1F inclusive.
+*/
-// It also covers the whitespace control characters in the same way:
+if (!isCharacterInC0orC1Range(ch) &&
-// 0x9   TAB
+(isXML10 || !isNELorLSEPCharacter(ch)) &&
-// 0xA   NEW LINE
+(escapingNotNeeded(ch) && (!m_charInfo.isSpecialTextChar(ch)))
-// 0xD   CARRIAGE RETURN
+|| ('"' == ch))
-//
+{
-// We also cover 0x0 ... It isn't valid
+; // a character needing no special processing
-// but we will output "&#0;"
-// The default will handle this just fine, but this
-// is a little performance boost to handle the more
-// common TAB, NEW-LINE, CARRIAGE-RETURN
-switch (ch) {
-case CharInfo.S_HORIZONAL_TAB:
-// Leave whitespace TAB as a real character
-break;
-case CharInfo.S_LINEFEED:
-lastDirtyCharProcessed = processLineFeed(chars, i, lastDirtyCharProcessed, writer);
-break;
-case CharInfo.S_CARRIAGERETURN:
-writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-writer.write("&#13;");
-lastDirtyCharProcessed = i;
-// Leave whitespace carriage return as a real character
-break;
-default:
-writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-writer.write("&#");
-writer.write(Integer.toString(ch));
-writer.write(';');
-lastDirtyCharProcessed = i;
-break;
 }
-}
+else
-else if (ch < 0x7F) {
+{
-// Range 0x20 through 0x7E inclusive
+lastDirty = processDirty(chars,end, i, ch, lastDirty, true);
-// Normal ASCII chars, do nothing, just add it to
+i = lastDirty;
-// the clean characters
-}
-else if (ch <= 0x9F){
-// Range 0x7F through 0x9F inclusive
-// More control characters, including NEL (0x85)
-writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-writer.write("&#");
-writer.write(Integer.toString(ch));
-writer.write(';');
-lastDirtyCharProcessed = i;
-}
-else if (ch == CharInfo.S_LINE_SEPARATOR) {
-// LINE SEPARATOR
-writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-writer.write("&#8232;");
-lastDirtyCharProcessed = i;
-}
-else if (m_encodingInfo.isInEncoding(ch)) {
-// If the character is in the encoding, and
-// not in the normal ASCII range, we also
-// just leave it get added on to the clean characters
-}
-else {
-// This is a fallback plan, we should never get here
-// but if the character wasn't previously handled
-// (i.e. isn't in the encoding, etc.) then what
-// should we do?  We choose to write out an entity
-writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-writer.write("&#");
-writer.write(Integer.toString(ch));
-writer.write(';');
-lastDirtyCharProcessed = i;
-}
 }
 }
 // we've reached the end. Any clean characters at the
 // end of the array than need to be written out?
-startClean = lastDirtyCharProcessed + 1;
+startClean = lastDirty + 1;
 if (i > startClean)
 {
 int lengthClean = i - startClean;
 m_writer.write(chars, startClean, lengthClean);
 }
 // time to fire off characters generation event
 if (m_tracer != null)
 super.fireCharEvent(chars, start, length);
 }
-private int processLineFeed(final char[] chars, int i, int lastProcessed, final Writer writer) throws IOException {
-if (!m_lineSepUse
-|| (m_lineSepLen ==1 && m_lineSep[0] == CharInfo.S_LINEFEED)){
-// We are leaving the new-line alone, and it is just
-// being added to the 'clean' characters,
-// so the last dirty character processed remains unchanged
-}
-else {
-writeOutCleanChars(chars, i, lastProcessed);
-writer.write(m_lineSep, 0, m_lineSepLen);
-lastProcessed = i;
-}
-return lastProcessed;
-}
-private void writeOutCleanChars(final char[] chars, int i, int lastProcessed) throws IOException {
-int startClean;
-startClean = lastProcessed + 1;
-if (startClean < i)
-{
-int lengthClean = i - startClean;
-m_writer.write(chars, startClean, lengthClean);
-}
-}
 /**
 * This method checks if a given character is between C0 or C1 range
 * of Control characters.
 * This method is added to support Control Characters for XML 1.1
 * If a given character is TAB (0x09), LF (0x0A) or CR (0x0D), this method
 * @return i+1 if a character was written, i+2 if two characters
 * were written out, else return i.
 *
 * @throws org.xml.sax.SAXException
 */
-private int accumDefaultEscape(
+protected int accumDefaultEscape(
 Writer writer,
 char ch,
 int i,
 char[] chars,
 int len,
 /*  This if check is added to support control characters in XML 1.1.
 *  If a character is a Control Character within C0 and C1 range, it is desirable
 *  to write it out as Numeric Character Reference(NCR) regardless of XML Version
 *  being used for output document.
 */
-if (isCharacterInC0orC1Range(ch) || isNELorLSEPCharacter(ch))
+if (isCharacterInC0orC1Range(ch) ||
+(XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
 {
 writer.write("&#");
 writer.write(Integer.toString(ch));
 writer.write(';');
 }
 else if ((!escapingNotNeeded(ch) ||
-(  (fromTextNode && m_charInfo.shouldMapTextChar(ch))
+(  (fromTextNode && m_charInfo.isSpecialTextChar(ch))
-|| (!fromTextNode && m_charInfo.shouldMapAttrChar(ch))))
+|| (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
 && m_elemContext.m_currentElemDepth > 0)
 {
 writer.write("&#");
 writer.write(Integer.toString(ch));
 writer.write(';');
 m_attrBuff = new char[len*2 + 1];
 }
 string.getChars(0,len, m_attrBuff, 0);
 final char[] stringChars = m_attrBuff;
-for (int i = 0; i < len;)
+for (int i = 0; i < len; )
 {
 char ch = stringChars[i];
+if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
-if (m_charInfo.shouldMapAttrChar(ch) || !(escapingNotNeeded(ch))) {
+{
-// The character is supposed to be replaced by a String
+writer.write(ch);
-// e.g.   '&'  -->  "&amp;"
+i++;
-// e.g.   '<'  -->  "&lt;"
+}
+else
+{ // I guess the parser doesn't normalize cr/lf in attributes. -sb
+//                if ((CharInfo.S_CARRIAGERETURN == ch)
+//                    && ((i + 1) < len)
+//                    && (CharInfo.S_LINEFEED == stringChars[i + 1]))
+//                {
+//                    i++;
+//                    ch = CharInfo.S_LINEFEED;
+//                }
 i = accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
 }
-else {
+}
-i++;
-if (0x0 <= ch && ch <= 0x1F) {
-// Range 0x00 through 0x1F inclusive
-// This covers the non-whitespace control characters
-// in the range 0x1 to 0x1F inclusive.
-// It also covers the whitespace control characters in the same way:
-// 0x9   TAB
-// 0xA   NEW LINE
-// 0xD   CARRIAGE RETURN
-//
-// We also cover 0x0 ... It isn't valid
-// but we will output "&#0;"
-// The default will handle this just fine, but this
-// is a little performance boost to handle the more
-// common TAB, NEW-LINE, CARRIAGE-RETURN
-switch (ch) {
-case CharInfo.S_HORIZONAL_TAB:
-writer.write("&#9;");
-break;
-case CharInfo.S_LINEFEED:
-writer.write("&#10;");
-break;
-case CharInfo.S_CARRIAGERETURN:
-writer.write("&#13;");
-break;
-default:
-writer.write("&#");
-writer.write(Integer.toString(ch));
-writer.write(';');
-break;
-}
-}
-else if (ch < 0x7F) {
-// Range 0x20 through 0x7E inclusive
-// Normal ASCII chars
-writer.write(ch);
-}
-else if (ch <= 0x9F){
-// Range 0x7F through 0x9F inclusive
-// More control characters
-writer.write("&#");
-writer.write(Integer.toString(ch));
-writer.write(';');
-}
-else if (ch == CharInfo.S_LINE_SEPARATOR) {
-// LINE SEPARATOR
-writer.write("&#8232;");
-}
-else if (m_encodingInfo.isInEncoding(ch)) {
-// If the character is in the encoding, and
-// not in the normal ASCII range, we also
-// just write it out
-writer.write(ch);
-}
-else {
-// This is a fallback plan, we should never get here
-// but if the character wasn't previously handled
-// (i.e. isn't in the encoding, etc.) then what
-// should we do?  We choose to write out a character ref
-writer.write("&#");
-writer.write(Integer.toString(ch));
-writer.write(';');
-}
-}
-}
 }
 /**
 * Receive notification of the end of an element.
 *
 if (m_cdataTagOpen)
 {
 closeCDATA();
 m_cdataTagOpen = false;
-}
-if (m_writer != null) {
-try {
-m_writer.flush();
-}
-catch(IOException e) {
-// what? me worry?
-}
 }
 }
 public void setContentHandler(ContentHandler ch)
 {

changeset 12902	0a840d92fa30
parent 12458	d601e4bba306
child 16953	a44e04deb948