author | joehw |
Tue, 16 Feb 2016 10:22:22 -0800 | |
changeset 39798 | 550955727ef7 |
parent 37626 | d4fb6a5dc001 |
child 39799 | 2847de5336f2 |
permissions | -rw-r--r-- |
12005 | 1 |
/* |
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
2 |
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. |
12005 | 3 |
*/ |
4 |
||
5 |
/* |
|
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
6 |
* Licensed to the Apache Software Foundation (ASF) under one or more |
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
7 |
* contributor license agreements. See the NOTICE file distributed with |
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
8 |
* this work for additional information regarding copyright ownership. |
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
9 |
* The ASF licenses this file to You under the Apache License, Version 2.0 |
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
10 |
* (the "License"); you may not use this file except in compliance with |
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
11 |
* the License. You may obtain a copy of the License at |
12005 | 12 |
* |
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
13 |
* http://www.apache.org/licenses/LICENSE-2.0 |
12005 | 14 |
* |
15 |
* Unless required by applicable law or agreed to in writing, software |
|
16 |
* distributed under the License is distributed on an "AS IS" BASIS, |
|
17 |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
18 |
* See the License for the specific language governing permissions and |
|
19 |
* limitations under the License. |
|
20 |
*/ |
|
21 |
||
22 |
package com.sun.org.apache.xerces.internal.impl; |
|
23 |
||
24 |
import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; |
|
25 |
import com.sun.org.apache.xerces.internal.impl.io.UCSReader; |
|
26 |
import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; |
|
27 |
import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; |
|
28 |
import com.sun.org.apache.xerces.internal.util.EncodingMap; |
|
29 |
import com.sun.org.apache.xerces.internal.util.SymbolTable; |
|
30 |
import com.sun.org.apache.xerces.internal.util.XMLChar; |
|
31 |
import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; |
|
33352 | 32 |
import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; |
33 |
import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; |
|
34 |
import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
35 |
import com.sun.org.apache.xerces.internal.xni.*; |
12005 | 36 |
import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; |
37 |
import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
38 |
import com.sun.xml.internal.stream.Entity; |
33352 | 39 |
import com.sun.xml.internal.stream.Entity.ScannedEntity; |
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
40 |
import com.sun.xml.internal.stream.XMLBufferListener; |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
41 |
import java.io.EOFException; |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
42 |
import java.io.IOException; |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
43 |
import java.io.InputStream; |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
44 |
import java.io.InputStreamReader; |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
45 |
import java.io.Reader; |
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
46 |
import java.util.ArrayList; |
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
47 |
import java.util.Locale; |
12005 | 48 |
|
49 |
/** |
|
50 |
* Implements the entity scanner methods. |
|
51 |
* |
|
52 |
* @author Neeraj Bajaj, Sun Microsystems |
|
53 |
* @author Andy Clark, IBM |
|
54 |
* @author Arnaud Le Hors, IBM |
|
55 |
* @author K.Venugopal Sun Microsystems |
|
56 |
* |
|
57 |
*/ |
|
58 |
public class XMLEntityScanner implements XMLLocator { |
|
59 |
||
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
60 |
protected Entity.ScannedEntity fCurrentEntity = null; |
12005 | 61 |
protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE; |
62 |
||
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
63 |
protected XMLEntityManager fEntityManager; |
12005 | 64 |
|
33352 | 65 |
/** Security manager. */ |
66 |
protected XMLSecurityManager fSecurityManager = null; |
|
67 |
||
68 |
/** Limit analyzer. */ |
|
69 |
protected XMLLimitAnalyzer fLimitAnalyzer = null; |
|
70 |
||
12005 | 71 |
/** Debug switching readers for encodings. */ |
72 |
private static final boolean DEBUG_ENCODINGS = false; |
|
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
73 |
|
12005 | 74 |
/** Listeners which should know when load is being called */ |
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
75 |
private ArrayList<XMLBufferListener> listeners = new ArrayList<>(); |
12005 | 76 |
|
14939
2e992d2acc8b
8003260: [findbug] some fields should be package protected
joehw
parents:
12458
diff
changeset
|
77 |
private static final boolean [] VALID_NAMES = new boolean[127]; |
12005 | 78 |
|
79 |
/** |
|
80 |
* Debug printing of buffer. This debugging flag works best when you |
|
81 |
* resize the DEFAULT_BUFFER_SIZE down to something reasonable like |
|
82 |
* 64 characters. |
|
83 |
*/ |
|
84 |
private static final boolean DEBUG_BUFFER = false; |
|
85 |
private static final boolean DEBUG_SKIP_STRING = false; |
|
86 |
/** |
|
87 |
* To signal the end of the document entity, this exception will be thrown. |
|
88 |
*/ |
|
89 |
private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() { |
|
90 |
private static final long serialVersionUID = 980337771224675268L; |
|
91 |
public Throwable fillInStackTrace() { |
|
92 |
return this; |
|
93 |
} |
|
94 |
}; |
|
95 |
||
96 |
protected SymbolTable fSymbolTable = null; |
|
97 |
protected XMLErrorReporter fErrorReporter = null; |
|
98 |
int [] whiteSpaceLookup = new int[100]; |
|
99 |
int whiteSpaceLen = 0; |
|
100 |
boolean whiteSpaceInfoNeeded = true; |
|
101 |
||
102 |
/** |
|
103 |
* Allow Java encoding names. This feature identifier is: |
|
104 |
* http://apache.org/xml/features/allow-java-encodings |
|
105 |
*/ |
|
106 |
protected boolean fAllowJavaEncodings; |
|
107 |
||
108 |
//Will be used only during internal subsets. |
|
109 |
//for appending data. |
|
110 |
||
111 |
/** Property identifier: symbol table. */ |
|
112 |
protected static final String SYMBOL_TABLE = |
|
113 |
Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; |
|
114 |
||
115 |
/** Property identifier: error reporter. */ |
|
116 |
protected static final String ERROR_REPORTER = |
|
117 |
Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; |
|
118 |
||
119 |
/** Feature identifier: allow Java encodings. */ |
|
120 |
protected static final String ALLOW_JAVA_ENCODINGS = |
|
121 |
Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; |
|
122 |
||
123 |
protected PropertyManager fPropertyManager = null ; |
|
124 |
||
125 |
boolean isExternal = false; |
|
126 |
static { |
|
127 |
||
128 |
for(int i=0x0041;i<=0x005A ; i++){ |
|
129 |
VALID_NAMES[i]=true; |
|
130 |
} |
|
131 |
for(int i=0x0061;i<=0x007A; i++){ |
|
132 |
VALID_NAMES[i]=true; |
|
133 |
} |
|
134 |
for(int i=0x0030;i<=0x0039; i++){ |
|
135 |
VALID_NAMES[i]=true; |
|
136 |
} |
|
137 |
VALID_NAMES[45]=true; |
|
138 |
VALID_NAMES[46]=true; |
|
139 |
VALID_NAMES[58]=true; |
|
140 |
VALID_NAMES[95]=true; |
|
141 |
} |
|
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
142 |
|
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
143 |
// Remember, that the XML version has explicitly been set, |
12458 | 144 |
// so that XMLStreamReader.getVersion() can find that out. |
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
145 |
protected boolean xmlVersionSetExplicitly = false; |
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
146 |
|
12005 | 147 |
// |
148 |
// Constructors |
|
149 |
// |
|
150 |
||
151 |
/** Default constructor. */ |
|
152 |
public XMLEntityScanner() { |
|
153 |
} // <init>() |
|
154 |
||
155 |
||
156 |
/** private constructor, this class can only be instantiated within this class. Instance of this class should |
|
157 |
* be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity) |
|
158 |
* @see getEntityScanner() |
|
159 |
* @see getEntityScanner(ScannedEntity) |
|
160 |
*/ |
|
161 |
public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) { |
|
162 |
fEntityManager = entityManager ; |
|
163 |
reset(propertyManager); |
|
164 |
} // <init>() |
|
165 |
||
166 |
||
167 |
// set buffer size: |
|
168 |
public final void setBufferSize(int size) { |
|
169 |
// REVISIT: Buffer size passed to entity scanner |
|
170 |
// was not being kept in synch with the actual size |
|
171 |
// of the buffers in each scanned entity. If any |
|
172 |
// of the buffers were actually resized, it was possible |
|
173 |
// that the parser would throw an ArrayIndexOutOfBoundsException |
|
174 |
// for documents which contained names which are longer than |
|
175 |
// the current buffer size. Conceivably the buffer size passed |
|
176 |
// to entity scanner could be used to determine a minimum size |
|
177 |
// for resizing, if doubling its size is smaller than this |
|
178 |
// minimum. -- mrglavas |
|
179 |
fBufferSize = size; |
|
180 |
} |
|
181 |
||
182 |
/** |
|
183 |
* Resets the components. |
|
184 |
*/ |
|
185 |
public void reset(PropertyManager propertyManager){ |
|
186 |
fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ; |
|
187 |
fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ; |
|
33352 | 188 |
resetCommon(); |
12005 | 189 |
} |
190 |
||
191 |
/** |
|
192 |
* Resets the component. The component can query the component manager |
|
193 |
* about any features and properties that affect the operation of the |
|
194 |
* component. |
|
195 |
* |
|
196 |
* @param componentManager The component manager. |
|
197 |
* |
|
198 |
* @throws SAXException Thrown by component on initialization error. |
|
199 |
* For example, if a feature or property is |
|
200 |
* required for the operation of the component, the |
|
201 |
* component manager may throw a |
|
202 |
* SAXNotRecognizedException or a |
|
203 |
* SAXNotSupportedException. |
|
204 |
*/ |
|
205 |
public void reset(XMLComponentManager componentManager) |
|
206 |
throws XMLConfigurationException { |
|
207 |
// xerces features |
|
208 |
fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); |
|
209 |
||
210 |
//xerces properties |
|
211 |
fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); |
|
212 |
fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); |
|
33352 | 213 |
resetCommon(); |
12005 | 214 |
} // reset(XMLComponentManager) |
215 |
||
216 |
||
217 |
public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager, |
|
218 |
XMLErrorReporter reporter) { |
|
219 |
fCurrentEntity = null; |
|
220 |
fSymbolTable = symbolTable; |
|
221 |
fEntityManager = entityManager; |
|
222 |
fErrorReporter = reporter; |
|
33352 | 223 |
fLimitAnalyzer = fEntityManager.fLimitAnalyzer; |
224 |
fSecurityManager = fEntityManager.fSecurityManager; |
|
225 |
} |
|
226 |
||
227 |
private void resetCommon() { |
|
228 |
fCurrentEntity = null; |
|
229 |
whiteSpaceLen = 0; |
|
230 |
whiteSpaceInfoNeeded = true; |
|
231 |
listeners.clear(); |
|
232 |
fLimitAnalyzer = fEntityManager.fLimitAnalyzer; |
|
233 |
fSecurityManager = fEntityManager.fSecurityManager; |
|
12005 | 234 |
} |
235 |
||
236 |
/** |
|
237 |
* Returns the XML version of the current entity. This will normally be the |
|
238 |
* value from the XML or text declaration or defaulted by the parser. Note that |
|
239 |
* that this value may be different than the version of the processing rules |
|
240 |
* applied to the current entity. For instance, an XML 1.1 document may refer to |
|
241 |
* XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire |
|
242 |
* document. Also note that, for a given entity, this value can only be considered |
|
243 |
* final once the XML or text declaration has been read or once it has been |
|
244 |
* determined that there is no such declaration. |
|
245 |
*/ |
|
246 |
public final String getXMLVersion() { |
|
247 |
if (fCurrentEntity != null) { |
|
248 |
return fCurrentEntity.xmlVersion; |
|
249 |
} |
|
250 |
return null; |
|
251 |
} // getXMLVersion():String |
|
252 |
||
253 |
/** |
|
254 |
* Sets the XML version. This method is used by the |
|
255 |
* scanners to report the value of the version pseudo-attribute |
|
256 |
* in an XML or text declaration. |
|
257 |
* |
|
258 |
* @param xmlVersion the XML version of the current entity |
|
259 |
*/ |
|
260 |
public final void setXMLVersion(String xmlVersion) { |
|
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
261 |
xmlVersionSetExplicitly = true; |
12005 | 262 |
fCurrentEntity.xmlVersion = xmlVersion; |
263 |
} // setXMLVersion(String) |
|
264 |
||
265 |
||
266 |
/** set the instance of current scanned entity. |
|
267 |
* @param ScannedEntity |
|
268 |
*/ |
|
269 |
||
270 |
public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){ |
|
271 |
fCurrentEntity = scannedEntity ; |
|
272 |
if(fCurrentEntity != null){ |
|
273 |
isExternal = fCurrentEntity.isExternal(); |
|
274 |
if(DEBUG_BUFFER) |
|
275 |
System.out.println("Current Entity is "+scannedEntity.name); |
|
276 |
} |
|
277 |
} |
|
278 |
||
279 |
public Entity.ScannedEntity getCurrentEntity(){ |
|
280 |
return fCurrentEntity ; |
|
281 |
} |
|
282 |
// |
|
283 |
// XMLEntityReader methods |
|
284 |
// |
|
285 |
||
286 |
/** |
|
287 |
* Returns the base system identifier of the currently scanned |
|
288 |
* entity, or null if none is available. |
|
289 |
*/ |
|
290 |
public final String getBaseSystemId() { |
|
291 |
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; |
|
292 |
} // getBaseSystemId():String |
|
293 |
||
294 |
/** |
|
295 |
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String) |
|
296 |
*/ |
|
297 |
public void setBaseSystemId(String systemId) { |
|
298 |
//no-op |
|
299 |
} |
|
300 |
||
301 |
///////////// Locator methods start. |
|
302 |
public final int getLineNumber(){ |
|
303 |
//if the entity is closed, we should return -1 |
|
304 |
//xxx at first place why such call should be there... |
|
305 |
return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ; |
|
306 |
} |
|
307 |
||
308 |
/** |
|
309 |
* @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int) |
|
310 |
*/ |
|
311 |
public void setLineNumber(int line) { |
|
312 |
//no-op |
|
313 |
} |
|
314 |
||
315 |
||
316 |
public final int getColumnNumber(){ |
|
317 |
//if the entity is closed, we should return -1 |
|
318 |
//xxx at first place why such call should be there... |
|
319 |
return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ; |
|
320 |
} |
|
321 |
||
322 |
/** |
|
323 |
* @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int) |
|
324 |
*/ |
|
325 |
public void setColumnNumber(int col) { |
|
326 |
// no-op |
|
327 |
} |
|
328 |
||
329 |
||
330 |
public final int getCharacterOffset(){ |
|
331 |
return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ; |
|
332 |
} |
|
333 |
||
334 |
/** Returns the expanded system identifier. */ |
|
335 |
public final String getExpandedSystemId() { |
|
336 |
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; |
|
337 |
} |
|
338 |
||
339 |
/** |
|
340 |
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String) |
|
341 |
*/ |
|
342 |
public void setExpandedSystemId(String systemId) { |
|
343 |
//no-op |
|
344 |
} |
|
345 |
||
346 |
/** Returns the literal system identifier. */ |
|
347 |
public final String getLiteralSystemId() { |
|
348 |
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null; |
|
349 |
} |
|
350 |
||
351 |
/** |
|
352 |
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String) |
|
353 |
*/ |
|
354 |
public void setLiteralSystemId(String systemId) { |
|
355 |
//no-op |
|
356 |
} |
|
357 |
||
358 |
/** Returns the public identifier. */ |
|
359 |
public final String getPublicId() { |
|
360 |
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; |
|
361 |
} |
|
362 |
||
363 |
/** |
|
364 |
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String) |
|
365 |
*/ |
|
366 |
public void setPublicId(String publicId) { |
|
367 |
//no-op |
|
368 |
} |
|
369 |
||
370 |
///////////////// Locator methods finished. |
|
371 |
||
372 |
/** the version of the current entity being scanned */ |
|
373 |
public void setVersion(String version){ |
|
374 |
fCurrentEntity.version = version; |
|
375 |
} |
|
376 |
||
377 |
public String getVersion(){ |
|
378 |
if (fCurrentEntity != null) |
|
379 |
return fCurrentEntity.version ; |
|
380 |
return null; |
|
381 |
} |
|
382 |
||
383 |
/** |
|
384 |
* Returns the encoding of the current entity. |
|
385 |
* Note that, for a given entity, this value can only be |
|
386 |
* considered final once the encoding declaration has been read (or once it |
|
387 |
* has been determined that there is no such declaration) since, no encoding |
|
388 |
* having been specified on the XMLInputSource, the parser |
|
389 |
* will make an initial "guess" which could be in error. |
|
390 |
*/ |
|
391 |
public final String getEncoding() { |
|
392 |
if (fCurrentEntity != null) { |
|
393 |
return fCurrentEntity.encoding; |
|
394 |
} |
|
395 |
return null; |
|
396 |
} // getEncoding():String |
|
397 |
||
398 |
/** |
|
399 |
* Sets the encoding of the scanner. This method is used by the |
|
400 |
* scanners if the XMLDecl or TextDecl line contains an encoding |
|
401 |
* pseudo-attribute. |
|
402 |
* <p> |
|
403 |
* <strong>Note:</strong> The underlying character reader on the |
|
404 |
* current entity will be changed to accomodate the new encoding. |
|
405 |
* However, the new encoding is ignored if the current reader was |
|
406 |
* not constructed from an input stream (e.g. an external entity |
|
407 |
* that is resolved directly to the appropriate java.io.Reader |
|
408 |
* object). |
|
409 |
* |
|
410 |
* @param encoding The IANA encoding name of the new encoding. |
|
411 |
* |
|
412 |
* @throws IOException Thrown if the new encoding is not supported. |
|
413 |
* |
|
414 |
* @see com.sun.org.apache.xerces.internal.util.EncodingMap |
|
415 |
*/ |
|
416 |
public final void setEncoding(String encoding) throws IOException { |
|
417 |
||
418 |
if (DEBUG_ENCODINGS) { |
|
419 |
System.out.println("$$$ setEncoding: "+encoding); |
|
420 |
} |
|
421 |
||
422 |
if (fCurrentEntity.stream != null) { |
|
423 |
// if the encoding is the same, don't change the reader and |
|
424 |
// re-use the original reader used by the OneCharReader |
|
425 |
// NOTE: Besides saving an object, this overcomes deficiencies |
|
426 |
// in the UTF-16 reader supplied with the standard Java |
|
427 |
// distribution (up to and including 1.3). The UTF-16 |
|
428 |
// decoder buffers 8K blocks even when only asked to read |
|
429 |
// a single char! -Ac |
|
430 |
if (fCurrentEntity.encoding == null || |
|
431 |
!fCurrentEntity.encoding.equals(encoding)) { |
|
432 |
// UTF-16 is a bit of a special case. If the encoding is UTF-16, |
|
433 |
// and we know the endian-ness, we shouldn't change readers. |
|
434 |
// If it's ISO-10646-UCS-(2|4), then we'll have to deduce |
|
435 |
// the endian-ness from the encoding we presently have. |
|
436 |
if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) { |
|
437 |
String ENCODING = encoding.toUpperCase(Locale.ENGLISH); |
|
438 |
if(ENCODING.equals("UTF-16")) return; |
|
439 |
if(ENCODING.equals("ISO-10646-UCS-4")) { |
|
440 |
if(fCurrentEntity.encoding.equals("UTF-16BE")) { |
|
441 |
fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE); |
|
442 |
} else { |
|
443 |
fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE); |
|
444 |
} |
|
445 |
return; |
|
446 |
} |
|
447 |
if(ENCODING.equals("ISO-10646-UCS-2")) { |
|
448 |
if(fCurrentEntity.encoding.equals("UTF-16BE")) { |
|
449 |
fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE); |
|
450 |
} else { |
|
451 |
fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE); |
|
452 |
} |
|
453 |
return; |
|
454 |
} |
|
455 |
} |
|
456 |
// wrap a new reader around the input stream, changing |
|
457 |
// the encoding |
|
458 |
if (DEBUG_ENCODINGS) { |
|
459 |
System.out.println("$$$ creating new reader from stream: "+ |
|
460 |
fCurrentEntity.stream); |
|
461 |
} |
|
462 |
//fCurrentEntity.stream.reset(); |
|
463 |
fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null); |
|
464 |
fCurrentEntity.encoding = encoding; |
|
465 |
||
466 |
} else { |
|
467 |
if (DEBUG_ENCODINGS) |
|
468 |
System.out.println("$$$ reusing old reader on stream"); |
|
469 |
} |
|
470 |
} |
|
471 |
||
472 |
} // setEncoding(String) |
|
473 |
||
474 |
/** Returns true if the current entity being scanned is external. */ |
|
475 |
public final boolean isExternal() { |
|
476 |
return fCurrentEntity.isExternal(); |
|
477 |
} // isExternal():boolean |
|
478 |
||
479 |
public int getChar(int relative) throws IOException{ |
|
480 |
if(arrangeCapacity(relative + 1, false)){ |
|
481 |
return fCurrentEntity.ch[fCurrentEntity.position + relative]; |
|
482 |
}else{ |
|
483 |
return -1; |
|
484 |
} |
|
485 |
}//getChar() |
|
486 |
||
487 |
/** |
|
488 |
* Returns the next character on the input. |
|
489 |
* <p> |
|
490 |
* <strong>Note:</strong> The character is <em>not</em> consumed. |
|
491 |
* |
|
492 |
* @throws IOException Thrown if i/o error occurs. |
|
493 |
* @throws EOFException Thrown on end of file. |
|
494 |
*/ |
|
495 |
public int peekChar() throws IOException { |
|
496 |
if (DEBUG_BUFFER) { |
|
497 |
System.out.print("(peekChar: "); |
|
498 |
print(); |
|
499 |
System.out.println(); |
|
500 |
} |
|
501 |
||
502 |
// load more characters, if needed |
|
503 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
504 |
load(0, true, true); |
12005 | 505 |
} |
506 |
||
507 |
// peek at character |
|
508 |
int c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
509 |
||
510 |
// return peeked character |
|
511 |
if (DEBUG_BUFFER) { |
|
512 |
System.out.print(")peekChar: "); |
|
513 |
print(); |
|
514 |
if (isExternal) { |
|
515 |
System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'"); |
|
516 |
} else { |
|
517 |
System.out.println(" -> '"+(char)c+"'"); |
|
518 |
} |
|
519 |
} |
|
520 |
if (isExternal) { |
|
521 |
return c != '\r' ? c : '\n'; |
|
522 |
} else { |
|
523 |
return c; |
|
524 |
} |
|
525 |
||
526 |
} // peekChar():int |
|
527 |
||
528 |
/** |
|
529 |
* Returns the next character on the input. |
|
530 |
* <p> |
|
531 |
* <strong>Note:</strong> The character is consumed. |
|
532 |
* |
|
533 |
* @throws IOException Thrown if i/o error occurs. |
|
534 |
* @throws EOFException Thrown on end of file. |
|
535 |
*/ |
|
536 |
public int scanChar() throws IOException { |
|
537 |
if (DEBUG_BUFFER) { |
|
538 |
System.out.print("(scanChar: "); |
|
539 |
print(); |
|
540 |
System.out.println(); |
|
541 |
} |
|
542 |
||
543 |
// load more characters, if needed |
|
544 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
545 |
load(0, true, true); |
12005 | 546 |
} |
547 |
||
548 |
// scan character |
|
549 |
int c = fCurrentEntity.ch[fCurrentEntity.position++]; |
|
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
550 |
if (c == '\n' || (c == '\r' && isExternal)) { |
12005 | 551 |
fCurrentEntity.lineNumber++; |
552 |
fCurrentEntity.columnNumber = 1; |
|
553 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
27536
533660d72131
8059327: XML parser returns corrupt attribute value
joehw
parents:
25868
diff
changeset
|
554 |
invokeListeners(1); |
12005 | 555 |
fCurrentEntity.ch[0] = (char)c; |
27536
533660d72131
8059327: XML parser returns corrupt attribute value
joehw
parents:
25868
diff
changeset
|
556 |
load(1, false, false); |
12005 | 557 |
} |
558 |
if (c == '\r' && isExternal) { |
|
559 |
if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') { |
|
560 |
fCurrentEntity.position--; |
|
561 |
} |
|
562 |
c = '\n'; |
|
563 |
} |
|
564 |
} |
|
565 |
||
566 |
// return character that was scanned |
|
567 |
if (DEBUG_BUFFER) { |
|
568 |
System.out.print(")scanChar: "); |
|
569 |
print(); |
|
570 |
System.out.println(" -> '"+(char)c+"'"); |
|
571 |
} |
|
572 |
fCurrentEntity.columnNumber++; |
|
573 |
return c; |
|
574 |
||
575 |
} // scanChar():int |
|
576 |
||
577 |
/** |
|
578 |
* Returns a string matching the NMTOKEN production appearing immediately |
|
579 |
* on the input as a symbol, or null if NMTOKEN Name string is present. |
|
580 |
* <p> |
|
581 |
* <strong>Note:</strong> The NMTOKEN characters are consumed. |
|
582 |
* <p> |
|
583 |
* <strong>Note:</strong> The string returned must be a symbol. The |
|
584 |
* SymbolTable can be used for this purpose. |
|
585 |
* |
|
586 |
* @throws IOException Thrown if i/o error occurs. |
|
587 |
* @throws EOFException Thrown on end of file. |
|
588 |
* |
|
589 |
* @see com.sun.org.apache.xerces.internal.util.SymbolTable |
|
590 |
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isName |
|
591 |
*/ |
|
592 |
public String scanNmtoken() throws IOException { |
|
593 |
if (DEBUG_BUFFER) { |
|
594 |
System.out.print("(scanNmtoken: "); |
|
595 |
print(); |
|
596 |
System.out.println(); |
|
597 |
} |
|
598 |
||
599 |
// load more characters, if needed |
|
600 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
601 |
load(0, true, true); |
12005 | 602 |
} |
603 |
||
604 |
// scan nmtoken |
|
605 |
int offset = fCurrentEntity.position; |
|
606 |
boolean vc = false; |
|
607 |
char c; |
|
608 |
while (true){ |
|
609 |
//while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) { |
|
610 |
c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
611 |
if(c < 127){ |
|
612 |
vc = VALID_NAMES[c]; |
|
613 |
}else{ |
|
614 |
vc = XMLChar.isName(c); |
|
615 |
} |
|
616 |
if(!vc)break; |
|
617 |
||
618 |
if (++fCurrentEntity.position == fCurrentEntity.count) { |
|
619 |
int length = fCurrentEntity.position - offset; |
|
620 |
invokeListeners(length); |
|
621 |
if (length == fCurrentEntity.fBufferSize) { |
|
622 |
// bad luck we have to resize our buffer |
|
623 |
char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; |
|
624 |
System.arraycopy(fCurrentEntity.ch, offset, |
|
625 |
tmp, 0, length); |
|
626 |
fCurrentEntity.ch = tmp; |
|
627 |
fCurrentEntity.fBufferSize *= 2; |
|
628 |
} else { |
|
629 |
System.arraycopy(fCurrentEntity.ch, offset, |
|
630 |
fCurrentEntity.ch, 0, length); |
|
631 |
} |
|
632 |
offset = 0; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
633 |
if (load(length, false, false)) { |
12005 | 634 |
break; |
635 |
} |
|
636 |
} |
|
637 |
} |
|
638 |
int length = fCurrentEntity.position - offset; |
|
639 |
fCurrentEntity.columnNumber += length; |
|
640 |
||
641 |
// return nmtoken |
|
642 |
String symbol = null; |
|
643 |
if (length > 0) { |
|
644 |
symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); |
|
645 |
} |
|
646 |
if (DEBUG_BUFFER) { |
|
647 |
System.out.print(")scanNmtoken: "); |
|
648 |
print(); |
|
649 |
System.out.println(" -> "+String.valueOf(symbol)); |
|
650 |
} |
|
651 |
return symbol; |
|
652 |
||
653 |
} // scanNmtoken():String |
|
654 |
||
655 |
/** |
|
656 |
* Returns a string matching the Name production appearing immediately |
|
657 |
* on the input as a symbol, or null if no Name string is present. |
|
658 |
* <p> |
|
659 |
* <strong>Note:</strong> The Name characters are consumed. |
|
660 |
* <p> |
|
661 |
* <strong>Note:</strong> The string returned must be a symbol. The |
|
662 |
* SymbolTable can be used for this purpose. |
|
663 |
* |
|
664 |
* @throws IOException Thrown if i/o error occurs. |
|
665 |
* @throws EOFException Thrown on end of file. |
|
666 |
* |
|
667 |
* @see com.sun.org.apache.xerces.internal.util.SymbolTable |
|
668 |
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isName |
|
669 |
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart |
|
670 |
*/ |
|
671 |
public String scanName() throws IOException { |
|
672 |
if (DEBUG_BUFFER) { |
|
673 |
System.out.print("(scanName: "); |
|
674 |
print(); |
|
675 |
System.out.println(); |
|
676 |
} |
|
677 |
||
678 |
// load more characters, if needed |
|
679 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
680 |
load(0, true, true); |
12005 | 681 |
} |
682 |
||
683 |
// scan name |
|
684 |
int offset = fCurrentEntity.position; |
|
39798 | 685 |
int length; |
12005 | 686 |
if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { |
687 |
if (++fCurrentEntity.position == fCurrentEntity.count) { |
|
27536
533660d72131
8059327: XML parser returns corrupt attribute value
joehw
parents:
25868
diff
changeset
|
688 |
invokeListeners(1); |
12005 | 689 |
fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; |
690 |
offset = 0; |
|
27536
533660d72131
8059327: XML parser returns corrupt attribute value
joehw
parents:
25868
diff
changeset
|
691 |
if (load(1, false, false)) { |
12005 | 692 |
fCurrentEntity.columnNumber++; |
693 |
String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); |
|
694 |
||
695 |
if (DEBUG_BUFFER) { |
|
696 |
System.out.print(")scanName: "); |
|
697 |
print(); |
|
698 |
System.out.println(" -> "+String.valueOf(symbol)); |
|
699 |
} |
|
700 |
return symbol; |
|
701 |
} |
|
702 |
} |
|
703 |
boolean vc =false; |
|
704 |
while (true ){ |
|
705 |
//XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; |
|
706 |
char c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
707 |
if(c < 127){ |
|
708 |
vc = VALID_NAMES[c]; |
|
709 |
}else{ |
|
710 |
vc = XMLChar.isName(c); |
|
711 |
} |
|
712 |
if(!vc)break; |
|
39798 | 713 |
if ((length = checkBeforeLoad(fCurrentEntity, offset, offset)) > 0) { |
12005 | 714 |
offset = 0; |
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
715 |
if (load(length, false, false)) { |
12005 | 716 |
break; |
717 |
} |
|
718 |
} |
|
719 |
} |
|
720 |
} |
|
39798 | 721 |
length = fCurrentEntity.position - offset; |
12005 | 722 |
fCurrentEntity.columnNumber += length; |
723 |
||
724 |
// return name |
|
725 |
String symbol; |
|
726 |
if (length > 0) { |
|
39798 | 727 |
checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length); |
12005 | 728 |
symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); |
729 |
} else |
|
730 |
symbol = null; |
|
731 |
if (DEBUG_BUFFER) { |
|
732 |
System.out.print(")scanName: "); |
|
733 |
print(); |
|
734 |
System.out.println(" -> "+String.valueOf(symbol)); |
|
735 |
} |
|
736 |
return symbol; |
|
737 |
||
738 |
} // scanName():String |
|
739 |
||
740 |
/** |
|
741 |
* Scans a qualified name from the input, setting the fields of the |
|
742 |
* QName structure appropriately. |
|
743 |
* <p> |
|
744 |
* <strong>Note:</strong> The qualified name characters are consumed. |
|
745 |
* <p> |
|
746 |
* <strong>Note:</strong> The strings used to set the values of the |
|
747 |
* QName structure must be symbols. The SymbolTable can be used for |
|
748 |
* this purpose. |
|
749 |
* |
|
750 |
* @param qname The qualified name structure to fill. |
|
751 |
* |
|
752 |
* @return Returns true if a qualified name appeared immediately on |
|
753 |
* the input and was scanned, false otherwise. |
|
754 |
* |
|
755 |
* @throws IOException Thrown if i/o error occurs. |
|
756 |
* @throws EOFException Thrown on end of file. |
|
757 |
* |
|
758 |
* @see com.sun.org.apache.xerces.internal.util.SymbolTable |
|
759 |
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isName |
|
760 |
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart |
|
761 |
*/ |
|
762 |
public boolean scanQName(QName qname) throws IOException { |
|
763 |
if (DEBUG_BUFFER) { |
|
764 |
System.out.print("(scanQName, "+qname+": "); |
|
765 |
print(); |
|
766 |
System.out.println(); |
|
767 |
} |
|
768 |
||
769 |
// load more characters, if needed |
|
770 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
771 |
load(0, true, true); |
12005 | 772 |
} |
773 |
||
774 |
// scan qualified name |
|
775 |
int offset = fCurrentEntity.position; |
|
776 |
||
777 |
//making a check if if the specified character is a valid name start character |
|
778 |
//as defined by production [5] in the XML 1.0 specification. |
|
779 |
// Name ::= (Letter | '_' | ':') (NameChar)* |
|
780 |
||
781 |
if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { |
|
782 |
if (++fCurrentEntity.position == fCurrentEntity.count) { |
|
27536
533660d72131
8059327: XML parser returns corrupt attribute value
joehw
parents:
25868
diff
changeset
|
783 |
invokeListeners(1); |
12005 | 784 |
fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; |
785 |
offset = 0; |
|
786 |
||
27536
533660d72131
8059327: XML parser returns corrupt attribute value
joehw
parents:
25868
diff
changeset
|
787 |
if (load(1, false, false)) { |
12005 | 788 |
fCurrentEntity.columnNumber++; |
789 |
//adding into symbol table. |
|
790 |
//XXX We are trying to add single character in SymbolTable?????? |
|
791 |
String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); |
|
792 |
qname.setValues(null, name, name, null); |
|
793 |
if (DEBUG_BUFFER) { |
|
794 |
System.out.print(")scanQName, "+qname+": "); |
|
795 |
print(); |
|
796 |
System.out.println(" -> true"); |
|
797 |
} |
|
798 |
return true; |
|
799 |
} |
|
800 |
} |
|
801 |
int index = -1; |
|
802 |
boolean vc = false; |
|
39798 | 803 |
int length; |
12005 | 804 |
while ( true){ |
805 |
||
806 |
//XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; |
|
807 |
char c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
808 |
if(c < 127){ |
|
809 |
vc = VALID_NAMES[c]; |
|
810 |
}else{ |
|
811 |
vc = XMLChar.isName(c); |
|
812 |
} |
|
813 |
if(!vc)break; |
|
814 |
if (c == ':') { |
|
815 |
if (index != -1) { |
|
816 |
break; |
|
817 |
} |
|
818 |
index = fCurrentEntity.position; |
|
33352 | 819 |
//check prefix before further read |
820 |
checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, index - offset); |
|
12005 | 821 |
} |
39798 | 822 |
if ((length = checkBeforeLoad(fCurrentEntity, offset, index)) > 0) { |
12005 | 823 |
if (index != -1) { |
824 |
index = index - offset; |
|
825 |
} |
|
826 |
offset = 0; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
827 |
if (load(length, false, false)) { |
12005 | 828 |
break; |
829 |
} |
|
830 |
} |
|
831 |
} |
|
39798 | 832 |
length = fCurrentEntity.position - offset; |
12005 | 833 |
fCurrentEntity.columnNumber += length; |
834 |
if (length > 0) { |
|
835 |
String prefix = null; |
|
836 |
String localpart = null; |
|
837 |
String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch, |
|
838 |
offset, length); |
|
839 |
||
840 |
if (index != -1) { |
|
841 |
int prefixLength = index - offset; |
|
33352 | 842 |
//check the result: prefix |
843 |
checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, prefixLength); |
|
12005 | 844 |
prefix = fSymbolTable.addSymbol(fCurrentEntity.ch, |
845 |
offset, prefixLength); |
|
846 |
int len = length - prefixLength - 1; |
|
33352 | 847 |
//check the result: localpart |
848 |
checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, index + 1, len); |
|
12005 | 849 |
localpart = fSymbolTable.addSymbol(fCurrentEntity.ch, |
850 |
index + 1, len); |
|
851 |
||
852 |
} else { |
|
853 |
localpart = rawname; |
|
33352 | 854 |
//check the result: localpart |
855 |
checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length); |
|
12005 | 856 |
} |
857 |
qname.setValues(prefix, localpart, rawname, null); |
|
858 |
if (DEBUG_BUFFER) { |
|
859 |
System.out.print(")scanQName, "+qname+": "); |
|
860 |
print(); |
|
861 |
System.out.println(" -> true"); |
|
862 |
} |
|
863 |
return true; |
|
864 |
} |
|
865 |
} |
|
866 |
||
867 |
// no qualified name found |
|
868 |
if (DEBUG_BUFFER) { |
|
869 |
System.out.print(")scanQName, "+qname+": "); |
|
870 |
print(); |
|
871 |
System.out.println(" -> false"); |
|
872 |
} |
|
873 |
return false; |
|
874 |
||
875 |
} // scanQName(QName):boolean |
|
876 |
||
877 |
/** |
|
39798 | 878 |
* Checks whether the end of the entity buffer has been reached. If yes, |
879 |
* checks against the limit and buffer size before loading more characters. |
|
880 |
* |
|
881 |
* @param entity the current entity |
|
882 |
* @param offset the offset from which the current read was started |
|
883 |
* @param nameOffset the offset from which the current name starts |
|
884 |
* @return the length of characters scanned before the end of the buffer, |
|
885 |
* zero if there is more to be read in the buffer |
|
886 |
*/ |
|
887 |
protected int checkBeforeLoad(Entity.ScannedEntity entity, int offset, |
|
888 |
int nameOffset) throws IOException { |
|
889 |
int length = 0; |
|
890 |
if (++entity.position == entity.count) { |
|
891 |
length = entity.position - offset; |
|
892 |
int nameLength = length; |
|
893 |
if (nameOffset != -1) { |
|
894 |
nameOffset = nameOffset - offset; |
|
895 |
nameLength = length - nameOffset - 1; |
|
896 |
} else { |
|
897 |
nameOffset = offset; |
|
898 |
} |
|
899 |
//check limit before loading more data |
|
900 |
checkLimit(Limit.MAX_NAME_LIMIT, entity, nameOffset, nameLength); |
|
901 |
invokeListeners(length); |
|
902 |
if (length == entity.ch.length) { |
|
903 |
// bad luck we have to resize our buffer |
|
904 |
char[] tmp = new char[entity.fBufferSize * 2]; |
|
905 |
System.arraycopy(entity.ch, offset, tmp, 0, length); |
|
906 |
entity.ch = tmp; |
|
907 |
entity.fBufferSize *= 2; |
|
908 |
} |
|
909 |
else { |
|
910 |
System.arraycopy(entity.ch, offset, entity.ch, 0, length); |
|
911 |
} |
|
912 |
} |
|
913 |
return length; |
|
914 |
} |
|
915 |
||
916 |
/** |
|
33352 | 917 |
* Checks whether the value of the specified Limit exceeds its limit |
918 |
* |
|
919 |
* @param limit The Limit to be checked. |
|
920 |
* @param entity The current entity. |
|
921 |
* @param offset The index of the first byte |
|
922 |
* @param length The length of the entity scanned. |
|
923 |
*/ |
|
924 |
protected void checkLimit(Limit limit, ScannedEntity entity, int offset, int length) { |
|
925 |
fLimitAnalyzer.addValue(limit, null, length); |
|
926 |
if (fSecurityManager.isOverLimit(limit, fLimitAnalyzer)) { |
|
927 |
fSecurityManager.debugPrint(fLimitAnalyzer); |
|
928 |
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, limit.key(), |
|
929 |
new Object[]{new String(entity.ch, offset, length), |
|
930 |
fLimitAnalyzer.getTotalValue(limit), |
|
931 |
fSecurityManager.getLimit(limit), |
|
932 |
fSecurityManager.getStateLiteral(limit)}, |
|
933 |
XMLErrorReporter.SEVERITY_FATAL_ERROR); |
|
934 |
} |
|
935 |
} |
|
936 |
||
937 |
/** |
|
12005 | 938 |
* CHANGED: |
939 |
* Scans a range of parsed character data, This function appends the character data to |
|
940 |
* the supplied buffer. |
|
941 |
* <p> |
|
942 |
* <strong>Note:</strong> The characters are consumed. |
|
943 |
* <p> |
|
944 |
* <strong>Note:</strong> This method does not guarantee to return |
|
945 |
* the longest run of parsed character data. This method may return |
|
946 |
* before markup due to reaching the end of the input buffer or any |
|
947 |
* other reason. |
|
948 |
* <p> |
|
949 |
* |
|
950 |
* @param content The content structure to fill. |
|
951 |
* |
|
952 |
* @return Returns the next character on the input, if known. This |
|
953 |
* value may be -1 but this does <em>note</em> designate |
|
954 |
* end of file. |
|
955 |
* |
|
956 |
* @throws IOException Thrown if i/o error occurs. |
|
957 |
* @throws EOFException Thrown on end of file. |
|
958 |
*/ |
|
959 |
public int scanContent(XMLString content) throws IOException { |
|
960 |
if (DEBUG_BUFFER) { |
|
961 |
System.out.print("(scanContent: "); |
|
962 |
print(); |
|
963 |
System.out.println(); |
|
964 |
} |
|
965 |
||
966 |
// load more characters, if needed |
|
967 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
968 |
load(0, true, true); |
12005 | 969 |
} else if (fCurrentEntity.position == fCurrentEntity.count - 1) { |
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
970 |
invokeListeners(1); |
12005 | 971 |
fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; |
27536
533660d72131
8059327: XML parser returns corrupt attribute value
joehw
parents:
25868
diff
changeset
|
972 |
load(1, false, false); |
12005 | 973 |
fCurrentEntity.position = 0; |
974 |
} |
|
975 |
||
976 |
// normalize newlines |
|
977 |
int offset = fCurrentEntity.position; |
|
978 |
int c = fCurrentEntity.ch[offset]; |
|
979 |
int newlines = 0; |
|
980 |
if (c == '\n' || (c == '\r' && isExternal)) { |
|
981 |
if (DEBUG_BUFFER) { |
|
982 |
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); |
|
983 |
print(); |
|
984 |
System.out.println(); |
|
985 |
} |
|
986 |
do { |
|
987 |
c = fCurrentEntity.ch[fCurrentEntity.position++]; |
|
988 |
if (c == '\r' && isExternal) { |
|
989 |
newlines++; |
|
990 |
fCurrentEntity.lineNumber++; |
|
991 |
fCurrentEntity.columnNumber = 1; |
|
992 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
993 |
offset = 0; |
|
994 |
fCurrentEntity.position = newlines; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
995 |
if (load(newlines, false, true)) { |
12005 | 996 |
break; |
997 |
} |
|
998 |
} |
|
999 |
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { |
|
1000 |
fCurrentEntity.position++; |
|
1001 |
offset++; |
|
1002 |
} |
|
1003 |
/*** NEWLINE NORMALIZATION ***/ |
|
1004 |
else { |
|
1005 |
newlines++; |
|
1006 |
} |
|
1007 |
} else if (c == '\n') { |
|
1008 |
newlines++; |
|
1009 |
fCurrentEntity.lineNumber++; |
|
1010 |
fCurrentEntity.columnNumber = 1; |
|
1011 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
1012 |
offset = 0; |
|
1013 |
fCurrentEntity.position = newlines; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1014 |
if (load(newlines, false, true)) { |
12005 | 1015 |
break; |
1016 |
} |
|
1017 |
} |
|
1018 |
} else { |
|
1019 |
fCurrentEntity.position--; |
|
1020 |
break; |
|
1021 |
} |
|
1022 |
} while (fCurrentEntity.position < fCurrentEntity.count - 1); |
|
1023 |
for (int i = offset; i < fCurrentEntity.position; i++) { |
|
1024 |
fCurrentEntity.ch[i] = '\n'; |
|
1025 |
} |
|
1026 |
int length = fCurrentEntity.position - offset; |
|
1027 |
if (fCurrentEntity.position == fCurrentEntity.count - 1) { |
|
1028 |
//CHANGED: dont replace the value.. append to the buffer. This gives control to the callee |
|
1029 |
//on buffering the data.. |
|
1030 |
content.setValues(fCurrentEntity.ch, offset, length); |
|
1031 |
//content.append(fCurrentEntity.ch, offset, length); |
|
1032 |
if (DEBUG_BUFFER) { |
|
1033 |
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); |
|
1034 |
print(); |
|
1035 |
System.out.println(); |
|
1036 |
} |
|
1037 |
return -1; |
|
1038 |
} |
|
1039 |
if (DEBUG_BUFFER) { |
|
1040 |
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); |
|
1041 |
print(); |
|
1042 |
System.out.println(); |
|
1043 |
} |
|
1044 |
} |
|
1045 |
||
1046 |
while (fCurrentEntity.position < fCurrentEntity.count) { |
|
1047 |
c = fCurrentEntity.ch[fCurrentEntity.position++]; |
|
1048 |
if (!XMLChar.isContent(c)) { |
|
1049 |
fCurrentEntity.position--; |
|
1050 |
break; |
|
1051 |
} |
|
1052 |
} |
|
1053 |
int length = fCurrentEntity.position - offset; |
|
1054 |
fCurrentEntity.columnNumber += length - newlines; |
|
35334 | 1055 |
if (fCurrentEntity.isGE) { |
33352 | 1056 |
checkLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fCurrentEntity, offset, length); |
1057 |
} |
|
12005 | 1058 |
|
1059 |
//CHANGED: dont replace the value.. append to the buffer. This gives control to the callee |
|
1060 |
//on buffering the data.. |
|
1061 |
content.setValues(fCurrentEntity.ch, offset, length); |
|
1062 |
//content.append(fCurrentEntity.ch, offset, length); |
|
1063 |
// return next character |
|
1064 |
if (fCurrentEntity.position != fCurrentEntity.count) { |
|
1065 |
c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
1066 |
// REVISIT: Does this need to be updated to fix the |
|
1067 |
// #x0D ^#x0A newline normalization problem? -Ac |
|
1068 |
if (c == '\r' && isExternal) { |
|
1069 |
c = '\n'; |
|
1070 |
} |
|
1071 |
} else { |
|
1072 |
c = -1; |
|
1073 |
} |
|
1074 |
if (DEBUG_BUFFER) { |
|
1075 |
System.out.print(")scanContent: "); |
|
1076 |
print(); |
|
1077 |
System.out.println(" -> '"+(char)c+"'"); |
|
1078 |
} |
|
1079 |
return c; |
|
1080 |
||
1081 |
} // scanContent(XMLString):int |
|
1082 |
||
1083 |
/** |
|
1084 |
* Scans a range of attribute value data, setting the fields of the |
|
1085 |
* XMLString structure, appropriately. |
|
1086 |
* <p> |
|
1087 |
* <strong>Note:</strong> The characters are consumed. |
|
1088 |
* <p> |
|
1089 |
* <strong>Note:</strong> This method does not guarantee to return |
|
1090 |
* the longest run of attribute value data. This method may return |
|
1091 |
* before the quote character due to reaching the end of the input |
|
1092 |
* buffer or any other reason. |
|
1093 |
* <p> |
|
1094 |
* <strong>Note:</strong> The fields contained in the XMLString |
|
1095 |
* structure are not guaranteed to remain valid upon subsequent calls |
|
1096 |
* to the entity scanner. Therefore, the caller is responsible for |
|
1097 |
* immediately using the returned character data or making a copy of |
|
1098 |
* the character data. |
|
1099 |
* |
|
1100 |
* @param quote The quote character that signifies the end of the |
|
1101 |
* attribute value data. |
|
1102 |
* @param content The content structure to fill. |
|
39798 | 1103 |
* @param isNSURI a flag indicating whether the content is a Namespace URI |
12005 | 1104 |
* |
1105 |
* @return Returns the next character on the input, if known. This |
|
1106 |
* value may be -1 but this does <em>note</em> designate |
|
1107 |
* end of file. |
|
1108 |
* |
|
1109 |
* @throws IOException Thrown if i/o error occurs. |
|
1110 |
* @throws EOFException Thrown on end of file. |
|
1111 |
*/ |
|
39798 | 1112 |
public int scanLiteral(int quote, XMLString content, boolean isNSURI) |
12005 | 1113 |
throws IOException { |
1114 |
if (DEBUG_BUFFER) { |
|
1115 |
System.out.print("(scanLiteral, '"+(char)quote+"': "); |
|
1116 |
print(); |
|
1117 |
System.out.println(); |
|
1118 |
} |
|
1119 |
// load more characters, if needed |
|
1120 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1121 |
load(0, true, true); |
12005 | 1122 |
} else if (fCurrentEntity.position == fCurrentEntity.count - 1) { |
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
1123 |
invokeListeners(1); |
12005 | 1124 |
fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; |
27536
533660d72131
8059327: XML parser returns corrupt attribute value
joehw
parents:
25868
diff
changeset
|
1125 |
load(1, false, false); |
12005 | 1126 |
fCurrentEntity.position = 0; |
1127 |
} |
|
1128 |
||
1129 |
// normalize newlines |
|
1130 |
int offset = fCurrentEntity.position; |
|
1131 |
int c = fCurrentEntity.ch[offset]; |
|
1132 |
int newlines = 0; |
|
1133 |
if(whiteSpaceInfoNeeded) |
|
1134 |
whiteSpaceLen=0; |
|
1135 |
if (c == '\n' || (c == '\r' && isExternal)) { |
|
1136 |
if (DEBUG_BUFFER) { |
|
1137 |
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); |
|
1138 |
print(); |
|
1139 |
System.out.println(); |
|
1140 |
} |
|
1141 |
do { |
|
1142 |
c = fCurrentEntity.ch[fCurrentEntity.position++]; |
|
1143 |
if (c == '\r' && isExternal) { |
|
1144 |
newlines++; |
|
1145 |
fCurrentEntity.lineNumber++; |
|
1146 |
fCurrentEntity.columnNumber = 1; |
|
1147 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
1148 |
offset = 0; |
|
1149 |
fCurrentEntity.position = newlines; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1150 |
if (load(newlines, false, true)) { |
12005 | 1151 |
break; |
1152 |
} |
|
1153 |
} |
|
1154 |
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { |
|
1155 |
fCurrentEntity.position++; |
|
1156 |
offset++; |
|
1157 |
} |
|
1158 |
/*** NEWLINE NORMALIZATION ***/ |
|
1159 |
else { |
|
1160 |
newlines++; |
|
1161 |
} |
|
1162 |
/***/ |
|
1163 |
} else if (c == '\n') { |
|
1164 |
newlines++; |
|
1165 |
fCurrentEntity.lineNumber++; |
|
1166 |
fCurrentEntity.columnNumber = 1; |
|
1167 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
1168 |
offset = 0; |
|
1169 |
fCurrentEntity.position = newlines; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1170 |
if (load(newlines, false, true)) { |
12005 | 1171 |
break; |
1172 |
} |
|
1173 |
} |
|
1174 |
/*** NEWLINE NORMALIZATION *** |
|
1175 |
* if (fCurrentEntity.ch[fCurrentEntity.position] == '\r' |
|
1176 |
* && external) { |
|
1177 |
* fCurrentEntity.position++; |
|
1178 |
* offset++; |
|
1179 |
* } |
|
1180 |
* /***/ |
|
1181 |
} else { |
|
1182 |
fCurrentEntity.position--; |
|
1183 |
break; |
|
1184 |
} |
|
1185 |
} while (fCurrentEntity.position < fCurrentEntity.count - 1); |
|
1186 |
int i=0; |
|
1187 |
for ( i = offset; i < fCurrentEntity.position; i++) { |
|
1188 |
fCurrentEntity.ch[i] = '\n'; |
|
22138
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1189 |
storeWhiteSpace(i); |
12005 | 1190 |
} |
1191 |
||
1192 |
int length = fCurrentEntity.position - offset; |
|
1193 |
if (fCurrentEntity.position == fCurrentEntity.count - 1) { |
|
1194 |
content.setValues(fCurrentEntity.ch, offset, length); |
|
1195 |
if (DEBUG_BUFFER) { |
|
1196 |
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); |
|
1197 |
print(); |
|
1198 |
System.out.println(); |
|
1199 |
} |
|
1200 |
return -1; |
|
1201 |
} |
|
1202 |
if (DEBUG_BUFFER) { |
|
1203 |
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); |
|
1204 |
print(); |
|
1205 |
System.out.println(); |
|
1206 |
} |
|
1207 |
} |
|
1208 |
||
1209 |
// scan literal value |
|
22138
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1210 |
for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) { |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1211 |
c = fCurrentEntity.ch[fCurrentEntity.position]; |
12005 | 1212 |
if ((c == quote && |
22138
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1213 |
(!fCurrentEntity.literal || isExternal)) || |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1214 |
c == '%' || !XMLChar.isContent(c)) { |
12005 | 1215 |
break; |
1216 |
} |
|
22138
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1217 |
if (whiteSpaceInfoNeeded && c == '\t') { |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1218 |
storeWhiteSpace(fCurrentEntity.position); |
12005 | 1219 |
} |
1220 |
} |
|
1221 |
int length = fCurrentEntity.position - offset; |
|
1222 |
fCurrentEntity.columnNumber += length - newlines; |
|
35334 | 1223 |
if (fCurrentEntity.isGE) { |
1224 |
checkLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fCurrentEntity, offset, length); |
|
1225 |
} |
|
39798 | 1226 |
if (isNSURI) { |
1227 |
checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length); |
|
1228 |
} |
|
12005 | 1229 |
content.setValues(fCurrentEntity.ch, offset, length); |
1230 |
||
1231 |
// return next character |
|
1232 |
if (fCurrentEntity.position != fCurrentEntity.count) { |
|
1233 |
c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
1234 |
// NOTE: We don't want to accidentally signal the |
|
1235 |
// end of the literal if we're expanding an |
|
1236 |
// entity appearing in the literal. -Ac |
|
1237 |
if (c == quote && fCurrentEntity.literal) { |
|
1238 |
c = -1; |
|
1239 |
} |
|
1240 |
} else { |
|
1241 |
c = -1; |
|
1242 |
} |
|
1243 |
if (DEBUG_BUFFER) { |
|
1244 |
System.out.print(")scanLiteral, '"+(char)quote+"': "); |
|
1245 |
print(); |
|
1246 |
System.out.println(" -> '"+(char)c+"'"); |
|
1247 |
} |
|
1248 |
return c; |
|
1249 |
||
1250 |
} // scanLiteral(int,XMLString):int |
|
1251 |
||
22138
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1252 |
/** |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1253 |
* Save whitespace information. Increase the whitespace buffer by 100 |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1254 |
* when needed. |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1255 |
* |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1256 |
* For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1257 |
* |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1258 |
* @param whiteSpacePos position of a whitespace in the scanner entity buffer |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1259 |
*/ |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1260 |
private void storeWhiteSpace(int whiteSpacePos) { |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1261 |
if (whiteSpaceLen >= whiteSpaceLookup.length) { |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1262 |
int [] tmp = new int[whiteSpaceLookup.length + 100]; |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1263 |
System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length); |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1264 |
whiteSpaceLookup = tmp; |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1265 |
} |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1266 |
|
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1267 |
whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos; |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1268 |
} |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1269 |
|
12005 | 1270 |
//CHANGED: |
1271 |
/** |
|
1272 |
* Scans a range of character data up to the specified delimiter, |
|
1273 |
* setting the fields of the XMLString structure, appropriately. |
|
1274 |
* <p> |
|
1275 |
* <strong>Note:</strong> The characters are consumed. |
|
1276 |
* <p> |
|
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
1277 |
* <strong>Note:</strong> This assumes that the delimiter contains at |
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
1278 |
* least one character. |
12005 | 1279 |
* <p> |
1280 |
* <strong>Note:</strong> This method does not guarantee to return |
|
1281 |
* the longest run of character data. This method may return before |
|
1282 |
* the delimiter due to reaching the end of the input buffer or any |
|
1283 |
* other reason. |
|
1284 |
* <p> |
|
1285 |
* @param delimiter The string that signifies the end of the character |
|
1286 |
* data to be scanned. |
|
1287 |
* @param buffer The XMLStringBuffer to fill. |
|
1288 |
* |
|
1289 |
* @return Returns true if there is more data to scan, false otherwise. |
|
1290 |
* |
|
1291 |
* @throws IOException Thrown if i/o error occurs. |
|
1292 |
* @throws EOFException Thrown on end of file. |
|
1293 |
*/ |
|
1294 |
public boolean scanData(String delimiter, XMLStringBuffer buffer) |
|
1295 |
throws IOException { |
|
1296 |
||
1297 |
boolean done = false; |
|
1298 |
int delimLen = delimiter.length(); |
|
1299 |
char charAt0 = delimiter.charAt(0); |
|
1300 |
do { |
|
1301 |
if (DEBUG_BUFFER) { |
|
1302 |
System.out.print("(scanData: "); |
|
1303 |
print(); |
|
1304 |
System.out.println(); |
|
1305 |
} |
|
1306 |
||
1307 |
// load more characters, if needed |
|
1308 |
||
1309 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1310 |
load(0, true, false); |
12005 | 1311 |
} |
1312 |
||
1313 |
boolean bNextEntity = false; |
|
1314 |
||
1315 |
while ((fCurrentEntity.position > fCurrentEntity.count - delimLen) |
|
1316 |
&& (!bNextEntity)) |
|
1317 |
{ |
|
1318 |
System.arraycopy(fCurrentEntity.ch, |
|
1319 |
fCurrentEntity.position, |
|
1320 |
fCurrentEntity.ch, |
|
1321 |
0, |
|
1322 |
fCurrentEntity.count - fCurrentEntity.position); |
|
1323 |
||
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1324 |
bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false); |
12005 | 1325 |
fCurrentEntity.position = 0; |
1326 |
fCurrentEntity.startPosition = 0; |
|
1327 |
} |
|
1328 |
||
1329 |
if (fCurrentEntity.position > fCurrentEntity.count - delimLen) { |
|
1330 |
// something must be wrong with the input: e.g., file ends in an unterminated comment |
|
1331 |
int length = fCurrentEntity.count - fCurrentEntity.position; |
|
1332 |
buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); |
|
1333 |
fCurrentEntity.columnNumber += fCurrentEntity.count; |
|
1334 |
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); |
|
1335 |
fCurrentEntity.position = fCurrentEntity.count; |
|
1336 |
fCurrentEntity.startPosition = fCurrentEntity.count; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1337 |
load(0, true, false); |
12005 | 1338 |
return false; |
1339 |
} |
|
1340 |
||
1341 |
// normalize newlines |
|
1342 |
int offset = fCurrentEntity.position; |
|
1343 |
int c = fCurrentEntity.ch[offset]; |
|
1344 |
int newlines = 0; |
|
1345 |
if (c == '\n' || (c == '\r' && isExternal)) { |
|
1346 |
if (DEBUG_BUFFER) { |
|
1347 |
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); |
|
1348 |
print(); |
|
1349 |
System.out.println(); |
|
1350 |
} |
|
1351 |
do { |
|
1352 |
c = fCurrentEntity.ch[fCurrentEntity.position++]; |
|
1353 |
if (c == '\r' && isExternal) { |
|
1354 |
newlines++; |
|
1355 |
fCurrentEntity.lineNumber++; |
|
1356 |
fCurrentEntity.columnNumber = 1; |
|
1357 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
1358 |
offset = 0; |
|
1359 |
fCurrentEntity.position = newlines; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1360 |
if (load(newlines, false, true)) { |
12005 | 1361 |
break; |
1362 |
} |
|
1363 |
} |
|
1364 |
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { |
|
1365 |
fCurrentEntity.position++; |
|
1366 |
offset++; |
|
1367 |
} |
|
1368 |
/*** NEWLINE NORMALIZATION ***/ |
|
1369 |
else { |
|
1370 |
newlines++; |
|
1371 |
} |
|
1372 |
} else if (c == '\n') { |
|
1373 |
newlines++; |
|
1374 |
fCurrentEntity.lineNumber++; |
|
1375 |
fCurrentEntity.columnNumber = 1; |
|
1376 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
1377 |
offset = 0; |
|
1378 |
fCurrentEntity.position = newlines; |
|
1379 |
fCurrentEntity.count = newlines; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1380 |
if (load(newlines, false, true)) { |
12005 | 1381 |
break; |
1382 |
} |
|
1383 |
} |
|
1384 |
} else { |
|
1385 |
fCurrentEntity.position--; |
|
1386 |
break; |
|
1387 |
} |
|
1388 |
} while (fCurrentEntity.position < fCurrentEntity.count - 1); |
|
1389 |
for (int i = offset; i < fCurrentEntity.position; i++) { |
|
1390 |
fCurrentEntity.ch[i] = '\n'; |
|
1391 |
} |
|
1392 |
int length = fCurrentEntity.position - offset; |
|
1393 |
if (fCurrentEntity.position == fCurrentEntity.count - 1) { |
|
1394 |
buffer.append(fCurrentEntity.ch, offset, length); |
|
1395 |
if (DEBUG_BUFFER) { |
|
1396 |
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); |
|
1397 |
print(); |
|
1398 |
System.out.println(); |
|
1399 |
} |
|
1400 |
return true; |
|
1401 |
} |
|
1402 |
if (DEBUG_BUFFER) { |
|
1403 |
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); |
|
1404 |
print(); |
|
1405 |
System.out.println(); |
|
1406 |
} |
|
1407 |
} |
|
1408 |
||
1409 |
// iterate over buffer looking for delimiter |
|
1410 |
OUTER: while (fCurrentEntity.position < fCurrentEntity.count) { |
|
1411 |
c = fCurrentEntity.ch[fCurrentEntity.position++]; |
|
1412 |
if (c == charAt0) { |
|
1413 |
// looks like we just hit the delimiter |
|
1414 |
int delimOffset = fCurrentEntity.position - 1; |
|
1415 |
for (int i = 1; i < delimLen; i++) { |
|
1416 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
1417 |
fCurrentEntity.position -= i; |
|
1418 |
break OUTER; |
|
1419 |
} |
|
1420 |
c = fCurrentEntity.ch[fCurrentEntity.position++]; |
|
1421 |
if (delimiter.charAt(i) != c) { |
|
1422 |
fCurrentEntity.position -= i; |
|
1423 |
break; |
|
1424 |
} |
|
1425 |
} |
|
1426 |
if (fCurrentEntity.position == delimOffset + delimLen) { |
|
1427 |
done = true; |
|
1428 |
break; |
|
1429 |
} |
|
1430 |
} else if (c == '\n' || (isExternal && c == '\r')) { |
|
1431 |
fCurrentEntity.position--; |
|
1432 |
break; |
|
1433 |
} else if (XMLChar.isInvalid(c)) { |
|
1434 |
fCurrentEntity.position--; |
|
1435 |
int length = fCurrentEntity.position - offset; |
|
1436 |
fCurrentEntity.columnNumber += length - newlines; |
|
1437 |
buffer.append(fCurrentEntity.ch, offset, length); |
|
1438 |
return true; |
|
1439 |
} |
|
1440 |
} |
|
1441 |
int length = fCurrentEntity.position - offset; |
|
1442 |
fCurrentEntity.columnNumber += length - newlines; |
|
1443 |
if (done) { |
|
1444 |
length -= delimLen; |
|
1445 |
} |
|
1446 |
buffer.append(fCurrentEntity.ch, offset, length); |
|
1447 |
||
1448 |
// return true if string was skipped |
|
1449 |
if (DEBUG_BUFFER) { |
|
1450 |
System.out.print(")scanData: "); |
|
1451 |
print(); |
|
1452 |
System.out.println(" -> " + done); |
|
1453 |
} |
|
1454 |
} while (!done); |
|
1455 |
return !done; |
|
1456 |
||
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
1457 |
} // scanData(String, XMLStringBuffer) |
12005 | 1458 |
|
1459 |
/** |
|
1460 |
* Skips a character appearing immediately on the input. |
|
1461 |
* <p> |
|
1462 |
* <strong>Note:</strong> The character is consumed only if it matches |
|
1463 |
* the specified character. |
|
1464 |
* |
|
1465 |
* @param c The character to skip. |
|
1466 |
* |
|
1467 |
* @return Returns true if the character was skipped. |
|
1468 |
* |
|
1469 |
* @throws IOException Thrown if i/o error occurs. |
|
1470 |
* @throws EOFException Thrown on end of file. |
|
1471 |
*/ |
|
1472 |
public boolean skipChar(int c) throws IOException { |
|
1473 |
if (DEBUG_BUFFER) { |
|
1474 |
System.out.print("(skipChar, '"+(char)c+"': "); |
|
1475 |
print(); |
|
1476 |
System.out.println(); |
|
1477 |
} |
|
1478 |
||
1479 |
// load more characters, if needed |
|
1480 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1481 |
load(0, true, true); |
12005 | 1482 |
} |
1483 |
||
1484 |
// skip character |
|
1485 |
int cc = fCurrentEntity.ch[fCurrentEntity.position]; |
|
1486 |
if (cc == c) { |
|
1487 |
fCurrentEntity.position++; |
|
1488 |
if (c == '\n') { |
|
1489 |
fCurrentEntity.lineNumber++; |
|
1490 |
fCurrentEntity.columnNumber = 1; |
|
1491 |
} else { |
|
1492 |
fCurrentEntity.columnNumber++; |
|
1493 |
} |
|
1494 |
if (DEBUG_BUFFER) { |
|
1495 |
System.out.print(")skipChar, '"+(char)c+"': "); |
|
1496 |
print(); |
|
1497 |
System.out.println(" -> true"); |
|
1498 |
} |
|
1499 |
return true; |
|
1500 |
} else if (c == '\n' && cc == '\r' && isExternal) { |
|
1501 |
// handle newlines |
|
1502 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
27536
533660d72131
8059327: XML parser returns corrupt attribute value
joehw
parents:
25868
diff
changeset
|
1503 |
invokeListeners(1); |
12005 | 1504 |
fCurrentEntity.ch[0] = (char)cc; |
27536
533660d72131
8059327: XML parser returns corrupt attribute value
joehw
parents:
25868
diff
changeset
|
1505 |
load(1, false, false); |
12005 | 1506 |
} |
1507 |
fCurrentEntity.position++; |
|
1508 |
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { |
|
1509 |
fCurrentEntity.position++; |
|
1510 |
} |
|
1511 |
fCurrentEntity.lineNumber++; |
|
1512 |
fCurrentEntity.columnNumber = 1; |
|
1513 |
if (DEBUG_BUFFER) { |
|
1514 |
System.out.print(")skipChar, '"+(char)c+"': "); |
|
1515 |
print(); |
|
1516 |
System.out.println(" -> true"); |
|
1517 |
} |
|
1518 |
return true; |
|
1519 |
} |
|
1520 |
||
1521 |
// character was not skipped |
|
1522 |
if (DEBUG_BUFFER) { |
|
1523 |
System.out.print(")skipChar, '"+(char)c+"': "); |
|
1524 |
print(); |
|
1525 |
System.out.println(" -> false"); |
|
1526 |
} |
|
1527 |
return false; |
|
1528 |
||
1529 |
} // skipChar(int):boolean |
|
1530 |
||
1531 |
public boolean isSpace(char ch){ |
|
1532 |
return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r'); |
|
1533 |
} |
|
1534 |
/** |
|
1535 |
* Skips space characters appearing immediately on the input. |
|
1536 |
* <p> |
|
1537 |
* <strong>Note:</strong> The characters are consumed only if they are |
|
1538 |
* space characters. |
|
1539 |
* |
|
1540 |
* @return Returns true if at least one space character was skipped. |
|
1541 |
* |
|
1542 |
* @throws IOException Thrown if i/o error occurs. |
|
1543 |
* @throws EOFException Thrown on end of file. |
|
1544 |
* |
|
1545 |
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace |
|
1546 |
*/ |
|
1547 |
public boolean skipSpaces() throws IOException { |
|
1548 |
if (DEBUG_BUFFER) { |
|
1549 |
System.out.print("(skipSpaces: "); |
|
1550 |
print(); |
|
1551 |
System.out.println(); |
|
1552 |
} |
|
1553 |
//boolean entityChanged = false; |
|
1554 |
// load more characters, if needed |
|
1555 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1556 |
load(0, true, true); |
12005 | 1557 |
} |
1558 |
||
1559 |
//we are doing this check only in skipSpace() because it is called by |
|
1560 |
//fMiscDispatcher and we want the parser to exit gracefully when document |
|
1561 |
//is well-formed. |
|
1562 |
//it is possible that end of document is reached and |
|
1563 |
//fCurrentEntity becomes null |
|
1564 |
//nothing was read so entity changed 'false' should be returned. |
|
1565 |
if(fCurrentEntity == null){ |
|
1566 |
return false ; |
|
1567 |
} |
|
1568 |
||
1569 |
// skip spaces |
|
1570 |
int c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
1571 |
if (XMLChar.isSpace(c)) { |
|
1572 |
do { |
|
1573 |
boolean entityChanged = false; |
|
1574 |
// handle newlines |
|
1575 |
if (c == '\n' || (isExternal && c == '\r')) { |
|
1576 |
fCurrentEntity.lineNumber++; |
|
1577 |
fCurrentEntity.columnNumber = 1; |
|
1578 |
if (fCurrentEntity.position == fCurrentEntity.count - 1) { |
|
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
1579 |
invokeListeners(1); |
12005 | 1580 |
fCurrentEntity.ch[0] = (char)c; |
27536
533660d72131
8059327: XML parser returns corrupt attribute value
joehw
parents:
25868
diff
changeset
|
1581 |
entityChanged = load(1, true, false); |
12005 | 1582 |
if (!entityChanged){ |
1583 |
// the load change the position to be 1, |
|
1584 |
// need to restore it when entity not changed |
|
1585 |
fCurrentEntity.position = 0; |
|
1586 |
}else if(fCurrentEntity == null){ |
|
1587 |
return true ; |
|
1588 |
} |
|
1589 |
} |
|
1590 |
if (c == '\r' && isExternal) { |
|
1591 |
// REVISIT: Does this need to be updated to fix the |
|
1592 |
// #x0D ^#x0A newline normalization problem? -Ac |
|
1593 |
if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { |
|
1594 |
fCurrentEntity.position--; |
|
1595 |
} |
|
1596 |
} |
|
1597 |
} else { |
|
1598 |
fCurrentEntity.columnNumber++; |
|
1599 |
} |
|
1600 |
// load more characters, if needed |
|
1601 |
if (!entityChanged){ |
|
1602 |
fCurrentEntity.position++; |
|
1603 |
} |
|
1604 |
||
1605 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1606 |
load(0, true, true); |
12005 | 1607 |
|
1608 |
//we are doing this check only in skipSpace() because it is called by |
|
1609 |
//fMiscDispatcher and we want the parser to exit gracefully when document |
|
1610 |
//is well-formed. |
|
1611 |
||
1612 |
//it is possible that end of document is reached and |
|
1613 |
//fCurrentEntity becomes null |
|
1614 |
//nothing was read so entity changed 'false' should be returned. |
|
1615 |
if(fCurrentEntity == null){ |
|
1616 |
return true ; |
|
1617 |
} |
|
1618 |
||
1619 |
} |
|
1620 |
} while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); |
|
1621 |
if (DEBUG_BUFFER) { |
|
1622 |
System.out.print(")skipSpaces: "); |
|
1623 |
print(); |
|
1624 |
System.out.println(" -> true"); |
|
1625 |
} |
|
1626 |
return true; |
|
1627 |
} |
|
1628 |
||
1629 |
// no spaces were found |
|
1630 |
if (DEBUG_BUFFER) { |
|
1631 |
System.out.print(")skipSpaces: "); |
|
1632 |
print(); |
|
1633 |
System.out.println(" -> false"); |
|
1634 |
} |
|
1635 |
return false; |
|
1636 |
||
1637 |
} // skipSpaces():boolean |
|
1638 |
||
1639 |
||
1640 |
/** |
|
1641 |
* @param legnth This function checks that following number of characters are available. |
|
1642 |
* to the underlying buffer. |
|
1643 |
* @return This function returns true if capacity asked is available. |
|
1644 |
*/ |
|
1645 |
public boolean arrangeCapacity(int length) throws IOException{ |
|
1646 |
return arrangeCapacity(length, false); |
|
1647 |
} |
|
1648 |
||
1649 |
/** |
|
1650 |
* @param legnth This function checks that following number of characters are available. |
|
1651 |
* to the underlying buffer. |
|
1652 |
* @param if the underlying function should change the entity |
|
1653 |
* @return This function returns true if capacity asked is available. |
|
1654 |
* |
|
1655 |
*/ |
|
1656 |
public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{ |
|
1657 |
//check if the capacity is availble in the current buffer |
|
1658 |
//count is no. of characters in the buffer [x][m][l] |
|
1659 |
//position is '0' based |
|
1660 |
//System.out.println("fCurrent Entity " + fCurrentEntity); |
|
1661 |
if((fCurrentEntity.count - fCurrentEntity.position) >= length) { |
|
1662 |
return true; |
|
1663 |
} |
|
1664 |
if(DEBUG_SKIP_STRING){ |
|
1665 |
System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); |
|
1666 |
System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); |
|
1667 |
System.out.println("length = " + length); |
|
1668 |
} |
|
1669 |
boolean entityChanged = false; |
|
1670 |
//load more characters -- this function shouldn't change the entity |
|
1671 |
while((fCurrentEntity.count - fCurrentEntity.position) < length){ |
|
1672 |
if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){ |
|
1673 |
invokeListeners(0); |
|
1674 |
System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position); |
|
1675 |
fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position; |
|
1676 |
fCurrentEntity.position = 0; |
|
1677 |
} |
|
1678 |
||
1679 |
if((fCurrentEntity.count - fCurrentEntity.position) < length){ |
|
1680 |
int pos = fCurrentEntity.position; |
|
1681 |
invokeListeners(pos); |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1682 |
entityChanged = load(fCurrentEntity.count, changeEntity, false); |
12005 | 1683 |
fCurrentEntity.position = pos; |
1684 |
if(entityChanged)break; |
|
1685 |
} |
|
1686 |
if(DEBUG_SKIP_STRING){ |
|
1687 |
System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); |
|
1688 |
System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); |
|
1689 |
System.out.println("length = " + length); |
|
1690 |
} |
|
1691 |
} |
|
1692 |
//load changes the position.. set it back to the point where we started. |
|
1693 |
||
1694 |
//after loading check again. |
|
1695 |
if((fCurrentEntity.count - fCurrentEntity.position) >= length) { |
|
1696 |
return true; |
|
1697 |
} else { |
|
1698 |
return false; |
|
1699 |
} |
|
1700 |
} |
|
1701 |
||
1702 |
/** |
|
1703 |
* Skips the specified string appearing immediately on the input. |
|
1704 |
* <p> |
|
1705 |
* <strong>Note:</strong> The characters are consumed only if all |
|
1706 |
* the characters are skipped. |
|
1707 |
* |
|
1708 |
* @param s The string to skip. |
|
1709 |
* |
|
1710 |
* @return Returns true if the string was skipped. |
|
1711 |
* |
|
1712 |
* @throws IOException Thrown if i/o error occurs. |
|
1713 |
* @throws EOFException Thrown on end of file. |
|
1714 |
*/ |
|
1715 |
public boolean skipString(String s) throws IOException { |
|
1716 |
||
1717 |
final int length = s.length(); |
|
1718 |
||
1719 |
//first make sure that required capacity is avaible |
|
1720 |
if(arrangeCapacity(length, false)){ |
|
1721 |
final int beforeSkip = fCurrentEntity.position ; |
|
1722 |
int afterSkip = fCurrentEntity.position + length - 1 ; |
|
1723 |
if(DEBUG_SKIP_STRING){ |
|
1724 |
System.out.println("skipString,length = " + s + "," + length); |
|
1725 |
System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip, length)); |
|
1726 |
} |
|
1727 |
||
1728 |
//s.charAt() indexes are 0 to 'Length -1' based. |
|
1729 |
int i = length - 1 ; |
|
1730 |
//check from reverse |
|
1731 |
while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){ |
|
1732 |
if(afterSkip-- == beforeSkip){ |
|
1733 |
fCurrentEntity.position = fCurrentEntity.position + length ; |
|
1734 |
fCurrentEntity.columnNumber += length; |
|
1735 |
return true; |
|
1736 |
} |
|
1737 |
} |
|
1738 |
} |
|
1739 |
||
1740 |
return false; |
|
1741 |
} // skipString(String):boolean |
|
1742 |
||
1743 |
public boolean skipString(char [] s) throws IOException { |
|
1744 |
||
1745 |
final int length = s.length; |
|
1746 |
//first make sure that required capacity is avaible |
|
1747 |
if(arrangeCapacity(length, false)){ |
|
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
1748 |
int beforeSkip = fCurrentEntity.position; |
12005 | 1749 |
|
1750 |
if(DEBUG_SKIP_STRING){ |
|
1751 |
System.out.println("skipString,length = " + new String(s) + "," + length); |
|
1752 |
System.out.println("skipString,length = " + new String(s) + "," + length); |
|
1753 |
} |
|
1754 |
||
1755 |
for(int i=0;i<length;i++){ |
|
1756 |
if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){ |
|
1757 |
return false; |
|
1758 |
} |
|
1759 |
} |
|
1760 |
fCurrentEntity.position = fCurrentEntity.position + length ; |
|
1761 |
fCurrentEntity.columnNumber += length; |
|
1762 |
return true; |
|
1763 |
||
1764 |
} |
|
1765 |
||
1766 |
return false; |
|
1767 |
} |
|
1768 |
||
1769 |
// |
|
1770 |
// Locator methods |
|
1771 |
// |
|
1772 |
// |
|
1773 |
// Private methods |
|
1774 |
// |
|
1775 |
||
1776 |
/** |
|
1777 |
* Loads a chunk of text. |
|
1778 |
* |
|
1779 |
* @param offset The offset into the character buffer to |
|
1780 |
* read the next batch of characters. |
|
1781 |
* @param changeEntity True if the load should change entities |
|
1782 |
* at the end of the entity, otherwise leave |
|
1783 |
* the current entity in place and the entity |
|
1784 |
* boundary will be signaled by the return |
|
1785 |
* value. |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1786 |
* @param notify Determine whether to notify listeners of |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1787 |
* the event |
12005 | 1788 |
* |
1789 |
* @returns Returns true if the entity changed as a result of this |
|
1790 |
* load operation. |
|
1791 |
*/ |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1792 |
final boolean load(int offset, boolean changeEntity, boolean notify) |
12005 | 1793 |
throws IOException { |
1794 |
if (DEBUG_BUFFER) { |
|
1795 |
System.out.print("(load, "+offset+": "); |
|
1796 |
print(); |
|
1797 |
System.out.println(); |
|
1798 |
} |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1799 |
if (notify) { |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1800 |
invokeListeners(offset); |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1801 |
} |
12005 | 1802 |
//maintaing the count till last load |
1803 |
fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ; |
|
1804 |
// read characters |
|
1805 |
int length = fCurrentEntity.ch.length - offset; |
|
1806 |
if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) { |
|
1807 |
length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE; |
|
1808 |
} |
|
1809 |
if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length); |
|
1810 |
int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length); |
|
1811 |
if (DEBUG_BUFFER) System.out.println(" length actually read: "+count); |
|
1812 |
||
1813 |
// reset count and position |
|
1814 |
boolean entityChanged = false; |
|
1815 |
if (count != -1) { |
|
1816 |
if (count != 0) { |
|
1817 |
// record the last count |
|
1818 |
fCurrentEntity.fLastCount = count; |
|
1819 |
fCurrentEntity.count = count + offset; |
|
1820 |
fCurrentEntity.position = offset; |
|
1821 |
} |
|
1822 |
} |
|
1823 |
// end of this entity |
|
1824 |
else { |
|
1825 |
fCurrentEntity.count = offset; |
|
1826 |
fCurrentEntity.position = offset; |
|
1827 |
entityChanged = true; |
|
1828 |
||
1829 |
if (changeEntity) { |
|
1830 |
//notify the entity manager about the end of entity |
|
1831 |
fEntityManager.endEntity(); |
|
1832 |
//return if the current entity becomes null |
|
1833 |
if(fCurrentEntity == null){ |
|
1834 |
throw END_OF_DOCUMENT_ENTITY; |
|
1835 |
} |
|
1836 |
// handle the trailing edges |
|
1837 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1838 |
load(0, true, false); |
12005 | 1839 |
} |
1840 |
} |
|
1841 |
||
1842 |
} |
|
1843 |
if (DEBUG_BUFFER) { |
|
1844 |
System.out.print(")load, "+offset+": "); |
|
1845 |
print(); |
|
1846 |
System.out.println(); |
|
1847 |
} |
|
1848 |
||
1849 |
return entityChanged; |
|
1850 |
||
1851 |
} // load(int, boolean):boolean |
|
1852 |
||
1853 |
/** |
|
1854 |
* Creates a reader capable of reading the given input stream in |
|
1855 |
* the specified encoding. |
|
1856 |
* |
|
1857 |
* @param inputStream The input stream. |
|
1858 |
* @param encoding The encoding name that the input stream is |
|
1859 |
* encoded using. If the user has specified that |
|
1860 |
* Java encoding names are allowed, then the |
|
1861 |
* encoding name may be a Java encoding name; |
|
1862 |
* otherwise, it is an ianaEncoding name. |
|
1863 |
* @param isBigEndian For encodings (like uCS-4), whose names cannot |
|
1864 |
* specify a byte order, this tells whether the order is bigEndian. null menas |
|
1865 |
* unknown or not relevant. |
|
1866 |
* |
|
1867 |
* @return Returns a reader. |
|
1868 |
*/ |
|
1869 |
protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) |
|
1870 |
throws IOException { |
|
1871 |
||
1872 |
// normalize encoding name |
|
1873 |
if (encoding == null) { |
|
1874 |
encoding = "UTF-8"; |
|
1875 |
} |
|
1876 |
||
1877 |
// try to use an optimized reader |
|
1878 |
String ENCODING = encoding.toUpperCase(Locale.ENGLISH); |
|
1879 |
if (ENCODING.equals("UTF-8")) { |
|
1880 |
if (DEBUG_ENCODINGS) { |
|
1881 |
System.out.println("$$$ creating UTF8Reader"); |
|
1882 |
} |
|
1883 |
return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); |
|
1884 |
} |
|
1885 |
if (ENCODING.equals("US-ASCII")) { |
|
1886 |
if (DEBUG_ENCODINGS) { |
|
1887 |
System.out.println("$$$ creating ASCIIReader"); |
|
1888 |
} |
|
1889 |
return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); |
|
1890 |
} |
|
1891 |
if(ENCODING.equals("ISO-10646-UCS-4")) { |
|
1892 |
if(isBigEndian != null) { |
|
1893 |
boolean isBE = isBigEndian.booleanValue(); |
|
1894 |
if(isBE) { |
|
1895 |
return new UCSReader(inputStream, UCSReader.UCS4BE); |
|
1896 |
} else { |
|
1897 |
return new UCSReader(inputStream, UCSReader.UCS4LE); |
|
1898 |
} |
|
1899 |
} else { |
|
1900 |
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, |
|
1901 |
"EncodingByteOrderUnsupported", |
|
1902 |
new Object[] { encoding }, |
|
1903 |
XMLErrorReporter.SEVERITY_FATAL_ERROR); |
|
1904 |
} |
|
1905 |
} |
|
1906 |
if(ENCODING.equals("ISO-10646-UCS-2")) { |
|
1907 |
if(isBigEndian != null) { // sould never happen with this encoding... |
|
1908 |
boolean isBE = isBigEndian.booleanValue(); |
|
1909 |
if(isBE) { |
|
1910 |
return new UCSReader(inputStream, UCSReader.UCS2BE); |
|
1911 |
} else { |
|
1912 |
return new UCSReader(inputStream, UCSReader.UCS2LE); |
|
1913 |
} |
|
1914 |
} else { |
|
1915 |
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, |
|
1916 |
"EncodingByteOrderUnsupported", |
|
1917 |
new Object[] { encoding }, |
|
1918 |
XMLErrorReporter.SEVERITY_FATAL_ERROR); |
|
1919 |
} |
|
1920 |
} |
|
1921 |
||
1922 |
// check for valid name |
|
1923 |
boolean validIANA = XMLChar.isValidIANAEncoding(encoding); |
|
1924 |
boolean validJava = XMLChar.isValidJavaEncoding(encoding); |
|
1925 |
if (!validIANA || (fAllowJavaEncodings && !validJava)) { |
|
1926 |
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, |
|
1927 |
"EncodingDeclInvalid", |
|
1928 |
new Object[] { encoding }, |
|
1929 |
XMLErrorReporter.SEVERITY_FATAL_ERROR); |
|
1930 |
// NOTE: AndyH suggested that, on failure, we use ISO Latin 1 |
|
1931 |
// because every byte is a valid ISO Latin 1 character. |
|
1932 |
// It may not translate correctly but if we failed on |
|
1933 |
// the encoding anyway, then we're expecting the content |
|
1934 |
// of the document to be bad. This will just prevent an |
|
1935 |
// invalid UTF-8 sequence to be detected. This is only |
|
1936 |
// important when continue-after-fatal-error is turned |
|
1937 |
// on. -Ac |
|
1938 |
encoding = "ISO-8859-1"; |
|
1939 |
} |
|
1940 |
||
1941 |
// try to use a Java reader |
|
1942 |
String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); |
|
1943 |
if (javaEncoding == null) { |
|
1944 |
if(fAllowJavaEncodings) { |
|
1945 |
javaEncoding = encoding; |
|
1946 |
} else { |
|
1947 |
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, |
|
1948 |
"EncodingDeclInvalid", |
|
1949 |
new Object[] { encoding }, |
|
1950 |
XMLErrorReporter.SEVERITY_FATAL_ERROR); |
|
1951 |
// see comment above. |
|
1952 |
javaEncoding = "ISO8859_1"; |
|
1953 |
} |
|
1954 |
} |
|
1955 |
else if (javaEncoding.equals("ASCII")) { |
|
1956 |
if (DEBUG_ENCODINGS) { |
|
1957 |
System.out.println("$$$ creating ASCIIReader"); |
|
1958 |
} |
|
1959 |
return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); |
|
1960 |
} |
|
1961 |
||
1962 |
if (DEBUG_ENCODINGS) { |
|
1963 |
System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); |
|
1964 |
if (javaEncoding == encoding) { |
|
1965 |
System.out.print(" (IANA encoding)"); |
|
1966 |
} |
|
1967 |
System.out.println(); |
|
1968 |
} |
|
1969 |
return new InputStreamReader(inputStream, javaEncoding); |
|
1970 |
||
1971 |
} // createReader(InputStream,String, Boolean): Reader |
|
1972 |
||
1973 |
/** |
|
1974 |
* Returns the IANA encoding name that is auto-detected from |
|
1975 |
* the bytes specified, with the endian-ness of that encoding where appropriate. |
|
1976 |
* |
|
1977 |
* @param b4 The first four bytes of the input. |
|
1978 |
* @param count The number of bytes actually read. |
|
1979 |
* @return a 2-element array: the first element, an IANA-encoding string, |
|
1980 |
* the second element a Boolean which is true iff the document is big endian, false |
|
1981 |
* if it's little-endian, and null if the distinction isn't relevant. |
|
1982 |
*/ |
|
1983 |
protected Object[] getEncodingName(byte[] b4, int count) { |
|
1984 |
||
1985 |
if (count < 2) { |
|
1986 |
return new Object[]{"UTF-8", null}; |
|
1987 |
} |
|
1988 |
||
1989 |
// UTF-16, with BOM |
|
1990 |
int b0 = b4[0] & 0xFF; |
|
1991 |
int b1 = b4[1] & 0xFF; |
|
1992 |
if (b0 == 0xFE && b1 == 0xFF) { |
|
1993 |
// UTF-16, big-endian |
|
1994 |
return new Object [] {"UTF-16BE", new Boolean(true)}; |
|
1995 |
} |
|
1996 |
if (b0 == 0xFF && b1 == 0xFE) { |
|
1997 |
// UTF-16, little-endian |
|
1998 |
return new Object [] {"UTF-16LE", new Boolean(false)}; |
|
1999 |
} |
|
2000 |
||
2001 |
// default to UTF-8 if we don't have enough bytes to make a |
|
2002 |
// good determination of the encoding |
|
2003 |
if (count < 3) { |
|
2004 |
return new Object [] {"UTF-8", null}; |
|
2005 |
} |
|
2006 |
||
2007 |
// UTF-8 with a BOM |
|
2008 |
int b2 = b4[2] & 0xFF; |
|
2009 |
if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { |
|
2010 |
return new Object [] {"UTF-8", null}; |
|
2011 |
} |
|
2012 |
||
2013 |
// default to UTF-8 if we don't have enough bytes to make a |
|
2014 |
// good determination of the encoding |
|
2015 |
if (count < 4) { |
|
2016 |
return new Object [] {"UTF-8", null}; |
|
2017 |
} |
|
2018 |
||
2019 |
// other encodings |
|
2020 |
int b3 = b4[3] & 0xFF; |
|
2021 |
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { |
|
2022 |
// UCS-4, big endian (1234) |
|
2023 |
return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; |
|
2024 |
} |
|
2025 |
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { |
|
2026 |
// UCS-4, little endian (4321) |
|
2027 |
return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; |
|
2028 |
} |
|
2029 |
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { |
|
2030 |
// UCS-4, unusual octet order (2143) |
|
2031 |
// REVISIT: What should this be? |
|
2032 |
return new Object [] {"ISO-10646-UCS-4", null}; |
|
2033 |
} |
|
2034 |
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { |
|
2035 |
// UCS-4, unusual octect order (3412) |
|
2036 |
// REVISIT: What should this be? |
|
2037 |
return new Object [] {"ISO-10646-UCS-4", null}; |
|
2038 |
} |
|
2039 |
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { |
|
2040 |
// UTF-16, big-endian, no BOM |
|
2041 |
// (or could turn out to be UCS-2... |
|
2042 |
// REVISIT: What should this be? |
|
2043 |
return new Object [] {"UTF-16BE", new Boolean(true)}; |
|
2044 |
} |
|
2045 |
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { |
|
2046 |
// UTF-16, little-endian, no BOM |
|
2047 |
// (or could turn out to be UCS-2... |
|
2048 |
return new Object [] {"UTF-16LE", new Boolean(false)}; |
|
2049 |
} |
|
2050 |
if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { |
|
2051 |
// EBCDIC |
|
2052 |
// a la xerces1, return CP037 instead of EBCDIC here |
|
2053 |
return new Object [] {"CP037", null}; |
|
2054 |
} |
|
2055 |
||
2056 |
// default encoding |
|
2057 |
return new Object [] {"UTF-8", null}; |
|
2058 |
||
2059 |
} // getEncodingName(byte[],int):Object[] |
|
2060 |
||
2061 |
/** |
|
2062 |
* xxx not removing endEntity() so that i remember that we need to implement it. |
|
2063 |
* Ends an entity. |
|
2064 |
* |
|
2065 |
* @throws XNIException Thrown by entity handler to signal an error. |
|
2066 |
*/ |
|
2067 |
// |
|
2068 |
/** Prints the contents of the buffer. */ |
|
2069 |
final void print() { |
|
2070 |
if (DEBUG_BUFFER) { |
|
2071 |
if (fCurrentEntity != null) { |
|
2072 |
System.out.print('['); |
|
2073 |
System.out.print(fCurrentEntity.count); |
|
2074 |
System.out.print(' '); |
|
2075 |
System.out.print(fCurrentEntity.position); |
|
2076 |
if (fCurrentEntity.count > 0) { |
|
2077 |
System.out.print(" \""); |
|
2078 |
for (int i = 0; i < fCurrentEntity.count; i++) { |
|
2079 |
if (i == fCurrentEntity.position) { |
|
2080 |
System.out.print('^'); |
|
2081 |
} |
|
2082 |
char c = fCurrentEntity.ch[i]; |
|
2083 |
switch (c) { |
|
2084 |
case '\n': { |
|
2085 |
System.out.print("\\n"); |
|
2086 |
break; |
|
2087 |
} |
|
2088 |
case '\r': { |
|
2089 |
System.out.print("\\r"); |
|
2090 |
break; |
|
2091 |
} |
|
2092 |
case '\t': { |
|
2093 |
System.out.print("\\t"); |
|
2094 |
break; |
|
2095 |
} |
|
2096 |
case '\\': { |
|
2097 |
System.out.print("\\\\"); |
|
2098 |
break; |
|
2099 |
} |
|
2100 |
default: { |
|
2101 |
System.out.print(c); |
|
2102 |
} |
|
2103 |
} |
|
2104 |
} |
|
2105 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
2106 |
System.out.print('^'); |
|
2107 |
} |
|
2108 |
System.out.print('"'); |
|
2109 |
} |
|
2110 |
System.out.print(']'); |
|
2111 |
System.out.print(" @ "); |
|
2112 |
System.out.print(fCurrentEntity.lineNumber); |
|
2113 |
System.out.print(','); |
|
2114 |
System.out.print(fCurrentEntity.columnNumber); |
|
2115 |
} else { |
|
2116 |
System.out.print("*NO CURRENT ENTITY*"); |
|
2117 |
} |
|
2118 |
} |
|
2119 |
} |
|
2120 |
||
2121 |
/** |
|
2122 |
* Registers the listener object and provides callback. |
|
2123 |
* @param listener listener to which call back should be provided when scanner buffer |
|
2124 |
* is being changed. |
|
2125 |
*/ |
|
2126 |
public void registerListener(XMLBufferListener listener) { |
|
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
2127 |
if (!listeners.contains(listener)) { |
12005 | 2128 |
listeners.add(listener); |
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
2129 |
} |
12005 | 2130 |
} |
2131 |
||
2132 |
/** |
|
2133 |
* |
|
2134 |
* @param loadPos Starting position from which new data is being loaded into scanner buffer. |
|
2135 |
*/ |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
2136 |
public void invokeListeners(int loadPos){ |
37626
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
2137 |
for (int i=0; i<listeners.size(); i++) { |
d4fb6a5dc001
8153781: Issue in XMLScanner: EXPECTED_SQUARE_BRACKET_TO_CLOSE_INTERNAL_SUBSET when skipping large DOCTYPE section with CRLF at wrong place
joehw
parents:
35334
diff
changeset
|
2138 |
listeners.get(i).refresh(loadPos); |
12005 | 2139 |
} |
2140 |
} |
|
2141 |
||
2142 |
/** |
|
2143 |
* Skips space characters appearing immediately on the input that would |
|
2144 |
* match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line |
|
2145 |
* normalization is performed. This is useful when scanning structures |
|
2146 |
* such as the XMLDecl and TextDecl that can only contain US-ASCII |
|
2147 |
* characters. |
|
2148 |
* <p> |
|
2149 |
* <strong>Note:</strong> The characters are consumed only if they would |
|
2150 |
* match non-terminal S before end of line normalization is performed. |
|
2151 |
* |
|
2152 |
* @return Returns true if at least one space character was skipped. |
|
2153 |
* |
|
2154 |
* @throws IOException Thrown if i/o error occurs. |
|
2155 |
* @throws EOFException Thrown on end of file. |
|
2156 |
* |
|
2157 |
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace |
|
2158 |
*/ |
|
2159 |
public final boolean skipDeclSpaces() throws IOException { |
|
2160 |
if (DEBUG_BUFFER) { |
|
2161 |
System.out.print("(skipDeclSpaces: "); |
|
2162 |
//XMLEntityManager.print(fCurrentEntity); |
|
2163 |
System.out.println(); |
|
2164 |
} |
|
2165 |
||
2166 |
// load more characters, if needed |
|
2167 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
2168 |
load(0, true, false); |
12005 | 2169 |
} |
2170 |
||
2171 |
// skip spaces |
|
2172 |
int c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
2173 |
if (XMLChar.isSpace(c)) { |
|
2174 |
boolean external = fCurrentEntity.isExternal(); |
|
2175 |
do { |
|
2176 |
boolean entityChanged = false; |
|
2177 |
// handle newlines |
|
2178 |
if (c == '\n' || (external && c == '\r')) { |
|
2179 |
fCurrentEntity.lineNumber++; |
|
2180 |
fCurrentEntity.columnNumber = 1; |
|
2181 |
if (fCurrentEntity.position == fCurrentEntity.count - 1) { |
|
2182 |
fCurrentEntity.ch[0] = (char)c; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
2183 |
entityChanged = load(1, true, false); |
12005 | 2184 |
if (!entityChanged) |
2185 |
// the load change the position to be 1, |
|
2186 |
// need to restore it when entity not changed |
|
2187 |
fCurrentEntity.position = 0; |
|
2188 |
} |
|
2189 |
if (c == '\r' && external) { |
|
2190 |
// REVISIT: Does this need to be updated to fix the |
|
2191 |
// #x0D ^#x0A newline normalization problem? -Ac |
|
2192 |
if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { |
|
2193 |
fCurrentEntity.position--; |
|
2194 |
} |
|
2195 |
} |
|
2196 |
/*** NEWLINE NORMALIZATION *** |
|
2197 |
* else { |
|
2198 |
* if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r' |
|
2199 |
* && external) { |
|
2200 |
* fCurrentEntity.position++; |
|
2201 |
* } |
|
2202 |
* } |
|
2203 |
* /***/ |
|
2204 |
} else { |
|
2205 |
fCurrentEntity.columnNumber++; |
|
2206 |
} |
|
2207 |
// load more characters, if needed |
|
2208 |
if (!entityChanged) |
|
2209 |
fCurrentEntity.position++; |
|
2210 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
2211 |
load(0, true, false); |
12005 | 2212 |
} |
2213 |
} while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); |
|
2214 |
if (DEBUG_BUFFER) { |
|
2215 |
System.out.print(")skipDeclSpaces: "); |
|
2216 |
// XMLEntityManager.print(fCurrentEntity); |
|
2217 |
System.out.println(" -> true"); |
|
2218 |
} |
|
2219 |
return true; |
|
2220 |
} |
|
2221 |
||
2222 |
// no spaces were found |
|
2223 |
if (DEBUG_BUFFER) { |
|
2224 |
System.out.print(")skipDeclSpaces: "); |
|
2225 |
//XMLEntityManager.print(fCurrentEntity); |
|
2226 |
System.out.println(" -> false"); |
|
2227 |
} |
|
2228 |
return false; |
|
2229 |
||
2230 |
} // skipDeclSpaces():boolean |
|
2231 |
||
2232 |
||
2233 |
} // class XMLEntityScanner |