8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
authorjoehw
Mon, 23 Dec 2013 13:57:42 -0800
changeset 22138 069c98fc4646
parent 22137 0411a9e723b2
child 22139 f4b2aa462b46
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars Reviewed-by: dfuchs, lancea, ulfzibis
jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityScanner.java
--- a/jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityScanner.java	Fri Dec 13 09:35:23 2013 -0800
+++ b/jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityScanner.java	Mon Dec 23 13:57:42 2013 -0800
@@ -1142,7 +1142,7 @@
             int i=0;
             for ( i = offset; i < fCurrentEntity.position; i++) {
                 fCurrentEntity.ch[i] = '\n';
-                whiteSpaceLookup[whiteSpaceLen++]=i;
+                storeWhiteSpace(i);
             }
 
             int length = fCurrentEntity.position - offset;
@@ -1163,27 +1163,18 @@
         }
 
         // scan literal value
-        while (fCurrentEntity.position < fCurrentEntity.count) {
-            c = fCurrentEntity.ch[fCurrentEntity.position++];
+        for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) {
+            c = fCurrentEntity.ch[fCurrentEntity.position];
             if ((c == quote &&
-                 (!fCurrentEntity.literal || isExternal))
-                || c == '%' || !XMLChar.isContent(c)) {
-                fCurrentEntity.position--;
+                    (!fCurrentEntity.literal || isExternal)) ||
+                    c == '%' || !XMLChar.isContent(c)) {
                 break;
             }
-            if(whiteSpaceInfoNeeded){
-                if(c == 0x20 || c == 0x9){
-                    if(whiteSpaceLen < whiteSpaceLookup.length){
-                        whiteSpaceLookup[whiteSpaceLen++]= fCurrentEntity.position-1;
-                    }else{
-                        int [] tmp = new int[whiteSpaceLookup.length*2];
-                        System.arraycopy(whiteSpaceLookup,0,tmp,0,whiteSpaceLookup.length);
-                        whiteSpaceLookup = tmp;
-                        whiteSpaceLookup[whiteSpaceLen++]= fCurrentEntity.position - 1;
-                    }
-                }
+            if (whiteSpaceInfoNeeded && c == '\t') {
+                storeWhiteSpace(fCurrentEntity.position);
             }
         }
+
         int length = fCurrentEntity.position - offset;
         fCurrentEntity.columnNumber += length - newlines;
         content.setValues(fCurrentEntity.ch, offset, length);
@@ -1209,6 +1200,24 @@
 
     } // scanLiteral(int,XMLString):int
 
+    /**
+     * Save whitespace information. Increase the whitespace buffer by 100
+     * when needed.
+     *
+     * For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR).
+     *
+     * @param whiteSpacePos position of a whitespace in the scanner entity buffer
+     */
+    private void storeWhiteSpace(int whiteSpacePos) {
+        if (whiteSpaceLen >= whiteSpaceLookup.length) {
+            int [] tmp = new int[whiteSpaceLookup.length + 100];
+            System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length);
+            whiteSpaceLookup = tmp;
+        }
+
+        whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos;
+    }
+
     //CHANGED:
     /**
      * Scans a range of character data up to the specified delimiter,