jaxp/src/com/sun/org/apache/xml/internal/serializer/ToStream.java
changeset 12902 0a840d92fa30
parent 12458 d601e4bba306
child 16953 a44e04deb948
equal deleted inserted replaced
12796:5c5a64ec0839 12902:0a840d92fa30
   917         final boolean ret;
   917         final boolean ret;
   918         if (ch < 127)
   918         if (ch < 127)
   919         {
   919         {
   920             // This is the old/fast code here, but is this
   920             // This is the old/fast code here, but is this
   921             // correct for all encodings?
   921             // correct for all encodings?
   922             if (ch >= CharInfo.S_SPACE || (CharInfo.S_LINEFEED == ch ||
   922             if (ch >= 0x20 || (0x0A == ch || 0x0D == ch || 0x09 == ch))
   923                     CharInfo.S_CARRIAGERETURN == ch || CharInfo.S_HORIZONAL_TAB == ch))
       
   924                 ret= true;
   923                 ret= true;
   925             else
   924             else
   926                 ret = false;
   925                 ret = false;
   927         }
   926         }
   928         else {
   927         else {
  1027      *
  1026      *
  1028      * @return i+1 if the character was written, else i.
  1027      * @return i+1 if the character was written, else i.
  1029      *
  1028      *
  1030      * @throws java.io.IOException
  1029      * @throws java.io.IOException
  1031      */
  1030      */
  1032     int accumDefaultEntity(
  1031     protected int accumDefaultEntity(
  1033         java.io.Writer writer,
  1032         java.io.Writer writer,
  1034         char ch,
  1033         char ch,
  1035         int i,
  1034         int i,
  1036         char[] chars,
  1035         char[] chars,
  1037         int len,
  1036         int len,
  1046         }
  1045         }
  1047         else
  1046         else
  1048         {
  1047         {
  1049             // if this is text node character and a special one of those,
  1048             // if this is text node character and a special one of those,
  1050             // or if this is a character from attribute value and a special one of those
  1049             // or if this is a character from attribute value and a special one of those
  1051             if ((fromTextNode && m_charInfo.shouldMapTextChar(ch)) || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch)))
  1050             if ((fromTextNode && m_charInfo.isSpecialTextChar(ch)) || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch)))
  1052             {
  1051             {
  1053                 String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
  1052                 String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
  1054 
  1053 
  1055                 if (null != outputStringForChar)
  1054                 if (null != outputStringForChar)
  1056                 {
  1055                 {
  1397             return;
  1396             return;
  1398         }
  1397         }
  1399 
  1398 
  1400         if (m_cdataTagOpen)
  1399         if (m_cdataTagOpen)
  1401             closeCDATA();
  1400             closeCDATA();
       
  1401         // the check with _escaping is a bit of a hack for XLSTC
  1402 
  1402 
  1403         if (m_disableOutputEscapingStates.peekOrFalse() || (!m_escaping))
  1403         if (m_disableOutputEscapingStates.peekOrFalse() || (!m_escaping))
  1404         {
  1404         {
  1405             charactersRaw(chars, start, length);
  1405             charactersRaw(chars, start, length);
  1406 
  1406 
  1419 
  1419 
  1420 
  1420 
  1421         try
  1421         try
  1422         {
  1422         {
  1423             int i;
  1423             int i;
       
  1424             char ch1;
  1424             int startClean;
  1425             int startClean;
  1425 
  1426 
  1426             // skip any leading whitspace
  1427             // skip any leading whitspace
  1427             // don't go off the end and use a hand inlined version
  1428             // don't go off the end and use a hand inlined version
  1428             // of isWhitespace(ch)
  1429             // of isWhitespace(ch)
  1429             final int end = start + length;
  1430             final int end = start + length;
  1430             int lastDirtyCharProcessed = start - 1; // last non-clean character that was processed
  1431             int lastDirty = start - 1; // last character that needed processing
  1431                                                                                                         // that was processed
  1432             for (i = start;
  1432             final Writer writer = m_writer;
  1433                 ((i < end)
  1433             boolean isAllWhitespace = true;
  1434                     && ((ch1 = chars[i]) == 0x20
  1434 
  1435                         || (ch1 == 0xA && m_lineSepUse)
  1435             // process any leading whitspace
  1436                         || ch1 == 0xD
  1436             i = start;
  1437                         || ch1 == 0x09));
  1437             while (i < end && isAllWhitespace) {
  1438                 i++)
  1438                 char ch1 = chars[i];
  1439             {
  1439 
  1440                 /*
  1440                 if (m_charInfo.shouldMapTextChar(ch1)) {
  1441                  * We are processing leading whitespace, but are doing the same
  1441                     // The character is supposed to be replaced by a String
  1442                  * processing for dirty characters here as for non-whitespace.
  1442                     // so write out the clean whitespace characters accumulated
  1443                  *
  1443                     // so far
  1444                  */
  1444                     // then the String.
  1445                 if (!m_charInfo.isTextASCIIClean(ch1))
  1445                     writeOutCleanChars(chars, i, lastDirtyCharProcessed);
  1446                 {
  1446                     String outputStringForChar = m_charInfo
  1447                     lastDirty = processDirty(chars,end, i,ch1, lastDirty, true);
  1447                             .getOutputStringForChar(ch1);
  1448                     i = lastDirty;
  1448                     writer.write(outputStringForChar);
       
  1449                     // We can't say that everything we are writing out is
       
  1450                     // all whitespace, we just wrote out a String.
       
  1451                     isAllWhitespace = false;
       
  1452                     lastDirtyCharProcessed = i; // mark the last non-clean
       
  1453                     // character processed
       
  1454                     i++;
       
  1455                 } else {
       
  1456                     // The character is clean, but is it a whitespace ?
       
  1457                     switch (ch1) {
       
  1458                     // TODO: Any other whitespace to consider?
       
  1459                     case CharInfo.S_SPACE:
       
  1460                         // Just accumulate the clean whitespace
       
  1461                         i++;
       
  1462                         break;
       
  1463                     case CharInfo.S_LINEFEED:
       
  1464                         lastDirtyCharProcessed = processLineFeed(chars, i,
       
  1465                                 lastDirtyCharProcessed, writer);
       
  1466                         i++;
       
  1467                         break;
       
  1468                     case CharInfo.S_CARRIAGERETURN:
       
  1469                         writeOutCleanChars(chars, i, lastDirtyCharProcessed);
       
  1470                         writer.write("&#13;");
       
  1471                         lastDirtyCharProcessed = i;
       
  1472                         i++;
       
  1473                         break;
       
  1474                     case CharInfo.S_HORIZONAL_TAB:
       
  1475                         // Just accumulate the clean whitespace
       
  1476                         i++;
       
  1477                         break;
       
  1478                     default:
       
  1479                         // The character was clean, but not a whitespace
       
  1480                         // so break the loop to continue with this character
       
  1481                         // (we don't increment index i !!)
       
  1482                         isAllWhitespace = false;
       
  1483                         break;
       
  1484                 }
  1449                 }
  1485             }
       
  1486             }
  1450             }
  1487             /* If there is some non-whitespace, mark that we may need
  1451             /* If there is some non-whitespace, mark that we may need
  1488              * to preserve this. This is only important if we have indentation on.
  1452              * to preserve this. This is only important if we have indentation on.
  1489              */
  1453              */
  1490             if (i < end || !isAllWhitespace)
  1454             if (i < end)
  1491                 m_ispreserve = true;
  1455                 m_ispreserve = true;
  1492 
  1456 
       
  1457 
       
  1458 //            int lengthClean;    // number of clean characters in a row
       
  1459 //            final boolean[] isAsciiClean = m_charInfo.getASCIIClean();
       
  1460 
       
  1461             final boolean isXML10 = XMLVERSION10.equals(getVersion());
       
  1462             // we've skipped the leading whitespace, now deal with the rest
  1493             for (; i < end; i++)
  1463             for (; i < end; i++)
  1494             {
  1464             {
  1495                 char ch = chars[i];
  1465                 {
  1496 
  1466                     // A tight loop to skip over common clean chars
  1497                 if (m_charInfo.shouldMapTextChar(ch)) {
  1467                     // This tight loop makes it easier for the JIT
  1498                     // The character is supposed to be replaced by a String
  1468                     // to optimize.
  1499                     // e.g.   '&'  -->  "&amp;"
  1469                     char ch2;
  1500                     // e.g.   '<'  -->  "&lt;"
  1470                     while (i<end
  1501                     writeOutCleanChars(chars, i, lastDirtyCharProcessed);
  1471                             && ((ch2 = chars[i])<127)
  1502                     String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
  1472                             && m_charInfo.isTextASCIIClean(ch2))
  1503                     writer.write(outputStringForChar);
  1473                             i++;
  1504                     lastDirtyCharProcessed = i;
  1474                     if (i == end)
       
  1475                         break;
  1505                 }
  1476                 }
  1506                 else {
  1477 
  1507                     if (ch <= 0x1F) {
  1478                 final char ch = chars[i];
  1508                         // Range 0x00 through 0x1F inclusive
  1479                 /*  The check for isCharacterInC0orC1Ranger and
  1509                         //
  1480                  *  isNELorLSEPCharacter has been added
  1510                         // This covers the non-whitespace control characters
  1481                  *  to support Control Characters in XML 1.1
  1511                         // in the range 0x1 to 0x1F inclusive.
  1482                  */
  1512                         // It also covers the whitespace control characters in the same way:
  1483                 if (!isCharacterInC0orC1Range(ch) &&
  1513                         // 0x9   TAB
  1484                     (isXML10 || !isNELorLSEPCharacter(ch)) &&
  1514                         // 0xA   NEW LINE
  1485                     (escapingNotNeeded(ch) && (!m_charInfo.isSpecialTextChar(ch)))
  1515                         // 0xD   CARRIAGE RETURN
  1486                         || ('"' == ch))
  1516                         //
  1487                 {
  1517                         // We also cover 0x0 ... It isn't valid
  1488                     ; // a character needing no special processing
  1518                         // but we will output "&#0;"
       
  1519 
       
  1520                         // The default will handle this just fine, but this
       
  1521                         // is a little performance boost to handle the more
       
  1522                         // common TAB, NEW-LINE, CARRIAGE-RETURN
       
  1523                         switch (ch) {
       
  1524 
       
  1525                         case CharInfo.S_HORIZONAL_TAB:
       
  1526                             // Leave whitespace TAB as a real character
       
  1527                         break;
       
  1528                         case CharInfo.S_LINEFEED:
       
  1529                             lastDirtyCharProcessed = processLineFeed(chars, i, lastDirtyCharProcessed, writer);
       
  1530                             break;
       
  1531                         case CharInfo.S_CARRIAGERETURN:
       
  1532                                 writeOutCleanChars(chars, i, lastDirtyCharProcessed);
       
  1533                                 writer.write("&#13;");
       
  1534                                 lastDirtyCharProcessed = i;
       
  1535                             // Leave whitespace carriage return as a real character
       
  1536                             break;
       
  1537                         default:
       
  1538                             writeOutCleanChars(chars, i, lastDirtyCharProcessed);
       
  1539                             writer.write("&#");
       
  1540                             writer.write(Integer.toString(ch));
       
  1541                             writer.write(';');
       
  1542                             lastDirtyCharProcessed = i;
       
  1543                             break;
       
  1544 
       
  1545                 }
  1489                 }
  1546                     }
  1490                 else
  1547                     else if (ch < 0x7F) {
  1491                 {
  1548                         // Range 0x20 through 0x7E inclusive
  1492                     lastDirty = processDirty(chars,end, i, ch, lastDirty, true);
  1549                         // Normal ASCII chars, do nothing, just add it to
  1493                     i = lastDirty;
  1550                         // the clean characters
       
  1551 
       
  1552                 }
       
  1553                     else if (ch <= 0x9F){
       
  1554                         // Range 0x7F through 0x9F inclusive
       
  1555                         // More control characters, including NEL (0x85)
       
  1556                         writeOutCleanChars(chars, i, lastDirtyCharProcessed);
       
  1557                         writer.write("&#");
       
  1558                         writer.write(Integer.toString(ch));
       
  1559                         writer.write(';');
       
  1560                         lastDirtyCharProcessed = i;
       
  1561                 }
       
  1562                     else if (ch == CharInfo.S_LINE_SEPARATOR) {
       
  1563                         // LINE SEPARATOR
       
  1564                         writeOutCleanChars(chars, i, lastDirtyCharProcessed);
       
  1565                         writer.write("&#8232;");
       
  1566                         lastDirtyCharProcessed = i;
       
  1567             }
       
  1568                     else if (m_encodingInfo.isInEncoding(ch)) {
       
  1569                         // If the character is in the encoding, and
       
  1570                         // not in the normal ASCII range, we also
       
  1571                         // just leave it get added on to the clean characters
       
  1572 
       
  1573                     }
       
  1574                     else {
       
  1575                         // This is a fallback plan, we should never get here
       
  1576                         // but if the character wasn't previously handled
       
  1577                         // (i.e. isn't in the encoding, etc.) then what
       
  1578                         // should we do?  We choose to write out an entity
       
  1579                         writeOutCleanChars(chars, i, lastDirtyCharProcessed);
       
  1580                         writer.write("&#");
       
  1581                         writer.write(Integer.toString(ch));
       
  1582                         writer.write(';');
       
  1583                         lastDirtyCharProcessed = i;
       
  1584                     }
       
  1585                 }
  1494                 }
  1586             }
  1495             }
  1587 
  1496 
  1588             // we've reached the end. Any clean characters at the
  1497             // we've reached the end. Any clean characters at the
  1589             // end of the array than need to be written out?
  1498             // end of the array than need to be written out?
  1590             startClean = lastDirtyCharProcessed + 1;
  1499             startClean = lastDirty + 1;
  1591             if (i > startClean)
  1500             if (i > startClean)
  1592             {
  1501             {
  1593                 int lengthClean = i - startClean;
  1502                 int lengthClean = i - startClean;
  1594                 m_writer.write(chars, startClean, lengthClean);
  1503                 m_writer.write(chars, startClean, lengthClean);
  1595             }
  1504             }
  1604 
  1513 
  1605         // time to fire off characters generation event
  1514         // time to fire off characters generation event
  1606         if (m_tracer != null)
  1515         if (m_tracer != null)
  1607             super.fireCharEvent(chars, start, length);
  1516             super.fireCharEvent(chars, start, length);
  1608     }
  1517     }
  1609 
       
  1610         private int processLineFeed(final char[] chars, int i, int lastProcessed, final Writer writer) throws IOException {
       
  1611                 if (!m_lineSepUse
       
  1612                 || (m_lineSepLen ==1 && m_lineSep[0] == CharInfo.S_LINEFEED)){
       
  1613                     // We are leaving the new-line alone, and it is just
       
  1614                     // being added to the 'clean' characters,
       
  1615                         // so the last dirty character processed remains unchanged
       
  1616                 }
       
  1617                 else {
       
  1618                     writeOutCleanChars(chars, i, lastProcessed);
       
  1619                     writer.write(m_lineSep, 0, m_lineSepLen);
       
  1620                     lastProcessed = i;
       
  1621                 }
       
  1622                 return lastProcessed;
       
  1623         }
       
  1624 
       
  1625     private void writeOutCleanChars(final char[] chars, int i, int lastProcessed) throws IOException {
       
  1626         int startClean;
       
  1627         startClean = lastProcessed + 1;
       
  1628         if (startClean < i)
       
  1629         {
       
  1630             int lengthClean = i - startClean;
       
  1631             m_writer.write(chars, startClean, lengthClean);
       
  1632         }
       
  1633      }
       
  1634 
       
  1635     /**
  1518     /**
  1636      * This method checks if a given character is between C0 or C1 range
  1519      * This method checks if a given character is between C0 or C1 range
  1637      * of Control characters.
  1520      * of Control characters.
  1638      * This method is added to support Control Characters for XML 1.1
  1521      * This method is added to support Control Characters for XML 1.1
  1639      * If a given character is TAB (0x09), LF (0x0A) or CR (0x0D), this method
  1522      * If a given character is TAB (0x09), LF (0x0A) or CR (0x0D), this method
  1749      * @return i+1 if a character was written, i+2 if two characters
  1632      * @return i+1 if a character was written, i+2 if two characters
  1750      * were written out, else return i.
  1633      * were written out, else return i.
  1751      *
  1634      *
  1752      * @throws org.xml.sax.SAXException
  1635      * @throws org.xml.sax.SAXException
  1753      */
  1636      */
  1754     private int accumDefaultEscape(
  1637     protected int accumDefaultEscape(
  1755         Writer writer,
  1638         Writer writer,
  1756         char ch,
  1639         char ch,
  1757         int i,
  1640         int i,
  1758         char[] chars,
  1641         char[] chars,
  1759         int len,
  1642         int len,
  1813                 /*  This if check is added to support control characters in XML 1.1.
  1696                 /*  This if check is added to support control characters in XML 1.1.
  1814                  *  If a character is a Control Character within C0 and C1 range, it is desirable
  1697                  *  If a character is a Control Character within C0 and C1 range, it is desirable
  1815                  *  to write it out as Numeric Character Reference(NCR) regardless of XML Version
  1698                  *  to write it out as Numeric Character Reference(NCR) regardless of XML Version
  1816                  *  being used for output document.
  1699                  *  being used for output document.
  1817                  */
  1700                  */
  1818                 if (isCharacterInC0orC1Range(ch) || isNELorLSEPCharacter(ch))
  1701                 if (isCharacterInC0orC1Range(ch) ||
       
  1702                         (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
  1819                 {
  1703                 {
  1820                     writer.write("&#");
  1704                     writer.write("&#");
  1821                     writer.write(Integer.toString(ch));
  1705                     writer.write(Integer.toString(ch));
  1822                     writer.write(';');
  1706                     writer.write(';');
  1823                 }
  1707                 }
  1824                 else if ((!escapingNotNeeded(ch) ||
  1708                 else if ((!escapingNotNeeded(ch) ||
  1825                     (  (fromTextNode && m_charInfo.shouldMapTextChar(ch))
  1709                     (  (fromTextNode && m_charInfo.isSpecialTextChar(ch))
  1826                      || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch))))
  1710                      || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
  1827                 && m_elemContext.m_currentElemDepth > 0)
  1711                 && m_elemContext.m_currentElemDepth > 0)
  1828                 {
  1712                 {
  1829                     writer.write("&#");
  1713                     writer.write("&#");
  1830                     writer.write(Integer.toString(ch));
  1714                     writer.write(Integer.toString(ch));
  1831                     writer.write(';');
  1715                     writer.write(';');
  2085            m_attrBuff = new char[len*2 + 1];
  1969            m_attrBuff = new char[len*2 + 1];
  2086         }
  1970         }
  2087         string.getChars(0,len, m_attrBuff, 0);
  1971         string.getChars(0,len, m_attrBuff, 0);
  2088         final char[] stringChars = m_attrBuff;
  1972         final char[] stringChars = m_attrBuff;
  2089 
  1973 
  2090         for (int i = 0; i < len;)
  1974         for (int i = 0; i < len; )
  2091         {
  1975         {
  2092             char ch = stringChars[i];
  1976             char ch = stringChars[i];
  2093 
  1977             if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
  2094             if (m_charInfo.shouldMapAttrChar(ch) || !(escapingNotNeeded(ch))) {
  1978             {
  2095                 // The character is supposed to be replaced by a String
  1979                 writer.write(ch);
  2096                 // e.g.   '&'  -->  "&amp;"
  1980                 i++;
  2097                 // e.g.   '<'  -->  "&lt;"
  1981             }
       
  1982             else
       
  1983             { // I guess the parser doesn't normalize cr/lf in attributes. -sb
       
  1984 //                if ((CharInfo.S_CARRIAGERETURN == ch)
       
  1985 //                    && ((i + 1) < len)
       
  1986 //                    && (CharInfo.S_LINEFEED == stringChars[i + 1]))
       
  1987 //                {
       
  1988 //                    i++;
       
  1989 //                    ch = CharInfo.S_LINEFEED;
       
  1990 //                }
       
  1991 
  2098                 i = accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
  1992                 i = accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
  2099             }
  1993             }
  2100             else {
  1994         }
  2101                 i++;
  1995 
  2102                 if (0x0 <= ch && ch <= 0x1F) {
       
  2103                     // Range 0x00 through 0x1F inclusive
       
  2104                     // This covers the non-whitespace control characters
       
  2105                     // in the range 0x1 to 0x1F inclusive.
       
  2106                     // It also covers the whitespace control characters in the same way:
       
  2107                     // 0x9   TAB
       
  2108                     // 0xA   NEW LINE
       
  2109                     // 0xD   CARRIAGE RETURN
       
  2110                     //
       
  2111                     // We also cover 0x0 ... It isn't valid
       
  2112                     // but we will output "&#0;"
       
  2113 
       
  2114                     // The default will handle this just fine, but this
       
  2115                     // is a little performance boost to handle the more
       
  2116                     // common TAB, NEW-LINE, CARRIAGE-RETURN
       
  2117                     switch (ch) {
       
  2118 
       
  2119                     case CharInfo.S_HORIZONAL_TAB:
       
  2120                         writer.write("&#9;");
       
  2121                         break;
       
  2122                     case CharInfo.S_LINEFEED:
       
  2123                         writer.write("&#10;");
       
  2124                         break;
       
  2125                     case CharInfo.S_CARRIAGERETURN:
       
  2126                         writer.write("&#13;");
       
  2127                         break;
       
  2128                     default:
       
  2129                         writer.write("&#");
       
  2130                         writer.write(Integer.toString(ch));
       
  2131                         writer.write(';');
       
  2132                         break;
       
  2133 
       
  2134         }
       
  2135                 }
       
  2136                 else if (ch < 0x7F) {
       
  2137                     // Range 0x20 through 0x7E inclusive
       
  2138                     // Normal ASCII chars
       
  2139                         writer.write(ch);
       
  2140                 }
       
  2141                 else if (ch <= 0x9F){
       
  2142                     // Range 0x7F through 0x9F inclusive
       
  2143                     // More control characters
       
  2144                     writer.write("&#");
       
  2145                     writer.write(Integer.toString(ch));
       
  2146                     writer.write(';');
       
  2147                 }
       
  2148                 else if (ch == CharInfo.S_LINE_SEPARATOR) {
       
  2149                     // LINE SEPARATOR
       
  2150                     writer.write("&#8232;");
       
  2151                 }
       
  2152                 else if (m_encodingInfo.isInEncoding(ch)) {
       
  2153                     // If the character is in the encoding, and
       
  2154                     // not in the normal ASCII range, we also
       
  2155                     // just write it out
       
  2156                     writer.write(ch);
       
  2157                 }
       
  2158                 else {
       
  2159                     // This is a fallback plan, we should never get here
       
  2160                     // but if the character wasn't previously handled
       
  2161                     // (i.e. isn't in the encoding, etc.) then what
       
  2162                     // should we do?  We choose to write out a character ref
       
  2163                     writer.write("&#");
       
  2164                     writer.write(Integer.toString(ch));
       
  2165                     writer.write(';');
       
  2166                 }
       
  2167 
       
  2168     }
       
  2169         }
       
  2170     }
  1996     }
  2171 
  1997 
  2172     /**
  1998     /**
  2173      * Receive notification of the end of an element.
  1999      * Receive notification of the end of an element.
  2174      *
  2000      *
  2933 
  2759 
  2934             if (m_cdataTagOpen)
  2760             if (m_cdataTagOpen)
  2935             {
  2761             {
  2936                 closeCDATA();
  2762                 closeCDATA();
  2937                 m_cdataTagOpen = false;
  2763                 m_cdataTagOpen = false;
  2938             }
       
  2939             if (m_writer != null) {
       
  2940                 try {
       
  2941                     m_writer.flush();
       
  2942     }
       
  2943                 catch(IOException e) {
       
  2944                     // what? me worry?
       
  2945                 }
       
  2946             }
  2764             }
  2947     }
  2765     }
  2948 
  2766 
  2949     public void setContentHandler(ContentHandler ch)
  2767     public void setContentHandler(ContentHandler ch)
  2950     {
  2768     {