1419 |
1419 |
1420 |
1420 |
1421 try |
1421 try |
1422 { |
1422 { |
1423 int i; |
1423 int i; |
|
1424 char ch1; |
1424 int startClean; |
1425 int startClean; |
1425 |
1426 |
1426 // skip any leading whitspace |
1427 // skip any leading whitspace |
1427 // don't go off the end and use a hand inlined version |
1428 // don't go off the end and use a hand inlined version |
1428 // of isWhitespace(ch) |
1429 // of isWhitespace(ch) |
1429 final int end = start + length; |
1430 final int end = start + length; |
1430 int lastDirtyCharProcessed = start - 1; // last non-clean character that was processed |
1431 int lastDirty = start - 1; // last character that needed processing |
1431 // that was processed |
1432 for (i = start; |
1432 final Writer writer = m_writer; |
1433 ((i < end) |
1433 boolean isAllWhitespace = true; |
1434 && ((ch1 = chars[i]) == 0x20 |
1434 |
1435 || (ch1 == 0xA && m_lineSepUse) |
1435 // process any leading whitspace |
1436 || ch1 == 0xD |
1436 i = start; |
1437 || ch1 == 0x09)); |
1437 while (i < end && isAllWhitespace) { |
1438 i++) |
1438 char ch1 = chars[i]; |
1439 { |
1439 |
1440 /* |
1440 if (m_charInfo.shouldMapTextChar(ch1)) { |
1441 * We are processing leading whitespace, but are doing the same |
1441 // The character is supposed to be replaced by a String |
1442 * processing for dirty characters here as for non-whitespace. |
1442 // so write out the clean whitespace characters accumulated |
1443 * |
1443 // so far |
1444 */ |
1444 // then the String. |
1445 if (!m_charInfo.isTextASCIIClean(ch1)) |
1445 writeOutCleanChars(chars, i, lastDirtyCharProcessed); |
1446 { |
1446 String outputStringForChar = m_charInfo |
1447 lastDirty = processDirty(chars,end, i,ch1, lastDirty, true); |
1447 .getOutputStringForChar(ch1); |
1448 i = lastDirty; |
1448 writer.write(outputStringForChar); |
|
1449 // We can't say that everything we are writing out is |
|
1450 // all whitespace, we just wrote out a String. |
|
1451 isAllWhitespace = false; |
|
1452 lastDirtyCharProcessed = i; // mark the last non-clean |
|
1453 // character processed |
|
1454 i++; |
|
1455 } else { |
|
1456 // The character is clean, but is it a whitespace ? |
|
1457 switch (ch1) { |
|
1458 // TODO: Any other whitespace to consider? |
|
1459 case CharInfo.S_SPACE: |
|
1460 // Just accumulate the clean whitespace |
|
1461 i++; |
|
1462 break; |
|
1463 case CharInfo.S_LINEFEED: |
|
1464 lastDirtyCharProcessed = processLineFeed(chars, i, |
|
1465 lastDirtyCharProcessed, writer); |
|
1466 i++; |
|
1467 break; |
|
1468 case CharInfo.S_CARRIAGERETURN: |
|
1469 writeOutCleanChars(chars, i, lastDirtyCharProcessed); |
|
1470 writer.write(" "); |
|
1471 lastDirtyCharProcessed = i; |
|
1472 i++; |
|
1473 break; |
|
1474 case CharInfo.S_HORIZONAL_TAB: |
|
1475 // Just accumulate the clean whitespace |
|
1476 i++; |
|
1477 break; |
|
1478 default: |
|
1479 // The character was clean, but not a whitespace |
|
1480 // so break the loop to continue with this character |
|
1481 // (we don't increment index i !!) |
|
1482 isAllWhitespace = false; |
|
1483 break; |
|
1484 } |
1449 } |
1485 } |
|
1486 } |
1450 } |
1487 /* If there is some non-whitespace, mark that we may need |
1451 /* If there is some non-whitespace, mark that we may need |
1488 * to preserve this. This is only important if we have indentation on. |
1452 * to preserve this. This is only important if we have indentation on. |
1489 */ |
1453 */ |
1490 if (i < end || !isAllWhitespace) |
1454 if (i < end) |
1491 m_ispreserve = true; |
1455 m_ispreserve = true; |
1492 |
1456 |
|
1457 |
|
1458 // int lengthClean; // number of clean characters in a row |
|
1459 // final boolean[] isAsciiClean = m_charInfo.getASCIIClean(); |
|
1460 |
|
1461 final boolean isXML10 = XMLVERSION10.equals(getVersion()); |
|
1462 // we've skipped the leading whitespace, now deal with the rest |
1493 for (; i < end; i++) |
1463 for (; i < end; i++) |
1494 { |
1464 { |
1495 char ch = chars[i]; |
1465 { |
1496 |
1466 // A tight loop to skip over common clean chars |
1497 if (m_charInfo.shouldMapTextChar(ch)) { |
1467 // This tight loop makes it easier for the JIT |
1498 // The character is supposed to be replaced by a String |
1468 // to optimize. |
1499 // e.g. '&' --> "&" |
1469 char ch2; |
1500 // e.g. '<' --> "<" |
1470 while (i<end |
1501 writeOutCleanChars(chars, i, lastDirtyCharProcessed); |
1471 && ((ch2 = chars[i])<127) |
1502 String outputStringForChar = m_charInfo.getOutputStringForChar(ch); |
1472 && m_charInfo.isTextASCIIClean(ch2)) |
1503 writer.write(outputStringForChar); |
1473 i++; |
1504 lastDirtyCharProcessed = i; |
1474 if (i == end) |
|
1475 break; |
1505 } |
1476 } |
1506 else { |
1477 |
1507 if (ch <= 0x1F) { |
1478 final char ch = chars[i]; |
1508 // Range 0x00 through 0x1F inclusive |
1479 /* The check for isCharacterInC0orC1Ranger and |
1509 // |
1480 * isNELorLSEPCharacter has been added |
1510 // This covers the non-whitespace control characters |
1481 * to support Control Characters in XML 1.1 |
1511 // in the range 0x1 to 0x1F inclusive. |
1482 */ |
1512 // It also covers the whitespace control characters in the same way: |
1483 if (!isCharacterInC0orC1Range(ch) && |
1513 // 0x9 TAB |
1484 (isXML10 || !isNELorLSEPCharacter(ch)) && |
1514 // 0xA NEW LINE |
1485 (escapingNotNeeded(ch) && (!m_charInfo.isSpecialTextChar(ch))) |
1515 // 0xD CARRIAGE RETURN |
1486 || ('"' == ch)) |
1516 // |
1487 { |
1517 // We also cover 0x0 ... It isn't valid |
1488 ; // a character needing no special processing |
1518 // but we will output "�" |
|
1519 |
|
1520 // The default will handle this just fine, but this |
|
1521 // is a little performance boost to handle the more |
|
1522 // common TAB, NEW-LINE, CARRIAGE-RETURN |
|
1523 switch (ch) { |
|
1524 |
|
1525 case CharInfo.S_HORIZONAL_TAB: |
|
1526 // Leave whitespace TAB as a real character |
|
1527 break; |
|
1528 case CharInfo.S_LINEFEED: |
|
1529 lastDirtyCharProcessed = processLineFeed(chars, i, lastDirtyCharProcessed, writer); |
|
1530 break; |
|
1531 case CharInfo.S_CARRIAGERETURN: |
|
1532 writeOutCleanChars(chars, i, lastDirtyCharProcessed); |
|
1533 writer.write(" "); |
|
1534 lastDirtyCharProcessed = i; |
|
1535 // Leave whitespace carriage return as a real character |
|
1536 break; |
|
1537 default: |
|
1538 writeOutCleanChars(chars, i, lastDirtyCharProcessed); |
|
1539 writer.write("&#"); |
|
1540 writer.write(Integer.toString(ch)); |
|
1541 writer.write(';'); |
|
1542 lastDirtyCharProcessed = i; |
|
1543 break; |
|
1544 |
|
1545 } |
1489 } |
1546 } |
1490 else |
1547 else if (ch < 0x7F) { |
1491 { |
1548 // Range 0x20 through 0x7E inclusive |
1492 lastDirty = processDirty(chars,end, i, ch, lastDirty, true); |
1549 // Normal ASCII chars, do nothing, just add it to |
1493 i = lastDirty; |
1550 // the clean characters |
|
1551 |
|
1552 } |
|
1553 else if (ch <= 0x9F){ |
|
1554 // Range 0x7F through 0x9F inclusive |
|
1555 // More control characters, including NEL (0x85) |
|
1556 writeOutCleanChars(chars, i, lastDirtyCharProcessed); |
|
1557 writer.write("&#"); |
|
1558 writer.write(Integer.toString(ch)); |
|
1559 writer.write(';'); |
|
1560 lastDirtyCharProcessed = i; |
|
1561 } |
|
1562 else if (ch == CharInfo.S_LINE_SEPARATOR) { |
|
1563 // LINE SEPARATOR |
|
1564 writeOutCleanChars(chars, i, lastDirtyCharProcessed); |
|
1565 writer.write("
"); |
|
1566 lastDirtyCharProcessed = i; |
|
1567 } |
|
1568 else if (m_encodingInfo.isInEncoding(ch)) { |
|
1569 // If the character is in the encoding, and |
|
1570 // not in the normal ASCII range, we also |
|
1571 // just leave it get added on to the clean characters |
|
1572 |
|
1573 } |
|
1574 else { |
|
1575 // This is a fallback plan, we should never get here |
|
1576 // but if the character wasn't previously handled |
|
1577 // (i.e. isn't in the encoding, etc.) then what |
|
1578 // should we do? We choose to write out an entity |
|
1579 writeOutCleanChars(chars, i, lastDirtyCharProcessed); |
|
1580 writer.write("&#"); |
|
1581 writer.write(Integer.toString(ch)); |
|
1582 writer.write(';'); |
|
1583 lastDirtyCharProcessed = i; |
|
1584 } |
|
1585 } |
1494 } |
1586 } |
1495 } |
1587 |
1496 |
1588 // we've reached the end. Any clean characters at the |
1497 // we've reached the end. Any clean characters at the |
1589 // end of the array than need to be written out? |
1498 // end of the array than need to be written out? |
1590 startClean = lastDirtyCharProcessed + 1; |
1499 startClean = lastDirty + 1; |
1591 if (i > startClean) |
1500 if (i > startClean) |
1592 { |
1501 { |
1593 int lengthClean = i - startClean; |
1502 int lengthClean = i - startClean; |
1594 m_writer.write(chars, startClean, lengthClean); |
1503 m_writer.write(chars, startClean, lengthClean); |
1595 } |
1504 } |
1604 |
1513 |
1605 // time to fire off characters generation event |
1514 // time to fire off characters generation event |
1606 if (m_tracer != null) |
1515 if (m_tracer != null) |
1607 super.fireCharEvent(chars, start, length); |
1516 super.fireCharEvent(chars, start, length); |
1608 } |
1517 } |
1609 |
|
1610 private int processLineFeed(final char[] chars, int i, int lastProcessed, final Writer writer) throws IOException { |
|
1611 if (!m_lineSepUse |
|
1612 || (m_lineSepLen ==1 && m_lineSep[0] == CharInfo.S_LINEFEED)){ |
|
1613 // We are leaving the new-line alone, and it is just |
|
1614 // being added to the 'clean' characters, |
|
1615 // so the last dirty character processed remains unchanged |
|
1616 } |
|
1617 else { |
|
1618 writeOutCleanChars(chars, i, lastProcessed); |
|
1619 writer.write(m_lineSep, 0, m_lineSepLen); |
|
1620 lastProcessed = i; |
|
1621 } |
|
1622 return lastProcessed; |
|
1623 } |
|
1624 |
|
1625 private void writeOutCleanChars(final char[] chars, int i, int lastProcessed) throws IOException { |
|
1626 int startClean; |
|
1627 startClean = lastProcessed + 1; |
|
1628 if (startClean < i) |
|
1629 { |
|
1630 int lengthClean = i - startClean; |
|
1631 m_writer.write(chars, startClean, lengthClean); |
|
1632 } |
|
1633 } |
|
1634 |
|
1635 /** |
1518 /** |
1636 * This method checks if a given character is between C0 or C1 range |
1519 * This method checks if a given character is between C0 or C1 range |
1637 * of Control characters. |
1520 * of Control characters. |
1638 * This method is added to support Control Characters for XML 1.1 |
1521 * This method is added to support Control Characters for XML 1.1 |
1639 * If a given character is TAB (0x09), LF (0x0A) or CR (0x0D), this method |
1522 * If a given character is TAB (0x09), LF (0x0A) or CR (0x0D), this method |
2085 m_attrBuff = new char[len*2 + 1]; |
1969 m_attrBuff = new char[len*2 + 1]; |
2086 } |
1970 } |
2087 string.getChars(0,len, m_attrBuff, 0); |
1971 string.getChars(0,len, m_attrBuff, 0); |
2088 final char[] stringChars = m_attrBuff; |
1972 final char[] stringChars = m_attrBuff; |
2089 |
1973 |
2090 for (int i = 0; i < len;) |
1974 for (int i = 0; i < len; ) |
2091 { |
1975 { |
2092 char ch = stringChars[i]; |
1976 char ch = stringChars[i]; |
2093 |
1977 if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) |
2094 if (m_charInfo.shouldMapAttrChar(ch) || !(escapingNotNeeded(ch))) { |
1978 { |
2095 // The character is supposed to be replaced by a String |
1979 writer.write(ch); |
2096 // e.g. '&' --> "&" |
1980 i++; |
2097 // e.g. '<' --> "<" |
1981 } |
|
1982 else |
|
1983 { // I guess the parser doesn't normalize cr/lf in attributes. -sb |
|
1984 // if ((CharInfo.S_CARRIAGERETURN == ch) |
|
1985 // && ((i + 1) < len) |
|
1986 // && (CharInfo.S_LINEFEED == stringChars[i + 1])) |
|
1987 // { |
|
1988 // i++; |
|
1989 // ch = CharInfo.S_LINEFEED; |
|
1990 // } |
|
1991 |
2098 i = accumDefaultEscape(writer, ch, i, stringChars, len, false, true); |
1992 i = accumDefaultEscape(writer, ch, i, stringChars, len, false, true); |
2099 } |
1993 } |
2100 else { |
1994 } |
2101 i++; |
1995 |
2102 if (0x0 <= ch && ch <= 0x1F) { |
|
2103 // Range 0x00 through 0x1F inclusive |
|
2104 // This covers the non-whitespace control characters |
|
2105 // in the range 0x1 to 0x1F inclusive. |
|
2106 // It also covers the whitespace control characters in the same way: |
|
2107 // 0x9 TAB |
|
2108 // 0xA NEW LINE |
|
2109 // 0xD CARRIAGE RETURN |
|
2110 // |
|
2111 // We also cover 0x0 ... It isn't valid |
|
2112 // but we will output "�" |
|
2113 |
|
2114 // The default will handle this just fine, but this |
|
2115 // is a little performance boost to handle the more |
|
2116 // common TAB, NEW-LINE, CARRIAGE-RETURN |
|
2117 switch (ch) { |
|
2118 |
|
2119 case CharInfo.S_HORIZONAL_TAB: |
|
2120 writer.write("	"); |
|
2121 break; |
|
2122 case CharInfo.S_LINEFEED: |
|
2123 writer.write(" "); |
|
2124 break; |
|
2125 case CharInfo.S_CARRIAGERETURN: |
|
2126 writer.write(" "); |
|
2127 break; |
|
2128 default: |
|
2129 writer.write("&#"); |
|
2130 writer.write(Integer.toString(ch)); |
|
2131 writer.write(';'); |
|
2132 break; |
|
2133 |
|
2134 } |
|
2135 } |
|
2136 else if (ch < 0x7F) { |
|
2137 // Range 0x20 through 0x7E inclusive |
|
2138 // Normal ASCII chars |
|
2139 writer.write(ch); |
|
2140 } |
|
2141 else if (ch <= 0x9F){ |
|
2142 // Range 0x7F through 0x9F inclusive |
|
2143 // More control characters |
|
2144 writer.write("&#"); |
|
2145 writer.write(Integer.toString(ch)); |
|
2146 writer.write(';'); |
|
2147 } |
|
2148 else if (ch == CharInfo.S_LINE_SEPARATOR) { |
|
2149 // LINE SEPARATOR |
|
2150 writer.write("
"); |
|
2151 } |
|
2152 else if (m_encodingInfo.isInEncoding(ch)) { |
|
2153 // If the character is in the encoding, and |
|
2154 // not in the normal ASCII range, we also |
|
2155 // just write it out |
|
2156 writer.write(ch); |
|
2157 } |
|
2158 else { |
|
2159 // This is a fallback plan, we should never get here |
|
2160 // but if the character wasn't previously handled |
|
2161 // (i.e. isn't in the encoding, etc.) then what |
|
2162 // should we do? We choose to write out a character ref |
|
2163 writer.write("&#"); |
|
2164 writer.write(Integer.toString(ch)); |
|
2165 writer.write(';'); |
|
2166 } |
|
2167 |
|
2168 } |
|
2169 } |
|
2170 } |
1996 } |
2171 |
1997 |
2172 /** |
1998 /** |
2173 * Receive notification of the end of an element. |
1999 * Receive notification of the end of an element. |
2174 * |
2000 * |