author | joehw |
Sun, 05 Jan 2014 21:00:00 -0800 | |
changeset 22140 | f2634f2bc36c |
parent 22138 | 069c98fc4646 |
permissions | -rw-r--r-- |
12005 | 1 |
/* |
2 |
* Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. |
|
3 |
*/ |
|
4 |
||
5 |
/* |
|
6 |
* Copyright 2005 The Apache Software Foundation. |
|
7 |
* |
|
8 |
* Licensed under the Apache License, Version 2.0 (the "License"); |
|
9 |
* you may not use this file except in compliance with the License. |
|
10 |
* You may obtain a copy of the License at |
|
11 |
* |
|
12 |
* http://www.apache.org/licenses/LICENSE-2.0 |
|
13 |
* |
|
14 |
* Unless required by applicable law or agreed to in writing, software |
|
15 |
* distributed under the License is distributed on an "AS IS" BASIS, |
|
16 |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
17 |
* See the License for the specific language governing permissions and |
|
18 |
* limitations under the License. |
|
19 |
*/ |
|
20 |
||
21 |
package com.sun.org.apache.xerces.internal.impl; |
|
22 |
||
23 |
||
24 |
||
25 |
import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; |
|
26 |
import com.sun.org.apache.xerces.internal.impl.io.UCSReader; |
|
27 |
import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; |
|
28 |
import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; |
|
29 |
import com.sun.org.apache.xerces.internal.util.EncodingMap; |
|
30 |
import com.sun.org.apache.xerces.internal.util.SymbolTable; |
|
31 |
import com.sun.org.apache.xerces.internal.util.XMLChar; |
|
32 |
import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
33 |
import com.sun.org.apache.xerces.internal.xni.*; |
12005 | 34 |
import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; |
35 |
import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
36 |
import com.sun.xml.internal.stream.Entity; |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
37 |
import com.sun.xml.internal.stream.XMLBufferListener; |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
38 |
import java.io.EOFException; |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
39 |
import java.io.IOException; |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
40 |
import java.io.InputStream; |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
41 |
import java.io.InputStreamReader; |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
42 |
import java.io.Reader; |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
43 |
import java.util.Locale; |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
44 |
import java.util.Vector; |
12005 | 45 |
|
46 |
/** |
|
47 |
* Implements the entity scanner methods. |
|
48 |
* |
|
49 |
* @author Neeraj Bajaj, Sun Microsystems |
|
50 |
* @author Andy Clark, IBM |
|
51 |
* @author Arnaud Le Hors, IBM |
|
52 |
* @author K.Venugopal Sun Microsystems |
|
53 |
* |
|
54 |
*/ |
|
55 |
public class XMLEntityScanner implements XMLLocator { |
|
56 |
||
57 |
||
58 |
protected Entity.ScannedEntity fCurrentEntity = null ; |
|
59 |
protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE; |
|
60 |
||
61 |
protected XMLEntityManager fEntityManager ; |
|
62 |
||
63 |
/** Debug switching readers for encodings. */ |
|
64 |
private static final boolean DEBUG_ENCODINGS = false; |
|
65 |
/** Listeners which should know when load is being called */ |
|
66 |
private Vector listeners = new Vector(); |
|
67 |
||
14939
2e992d2acc8b
8003260: [findbug] some fields should be package protected
joehw
parents:
12458
diff
changeset
|
68 |
private static final boolean [] VALID_NAMES = new boolean[127]; |
12005 | 69 |
|
70 |
/** |
|
71 |
* Debug printing of buffer. This debugging flag works best when you |
|
72 |
* resize the DEFAULT_BUFFER_SIZE down to something reasonable like |
|
73 |
* 64 characters. |
|
74 |
*/ |
|
75 |
private static final boolean DEBUG_BUFFER = false; |
|
76 |
private static final boolean DEBUG_SKIP_STRING = false; |
|
77 |
/** |
|
78 |
* To signal the end of the document entity, this exception will be thrown. |
|
79 |
*/ |
|
80 |
private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() { |
|
81 |
private static final long serialVersionUID = 980337771224675268L; |
|
82 |
public Throwable fillInStackTrace() { |
|
83 |
return this; |
|
84 |
} |
|
85 |
}; |
|
86 |
||
87 |
protected SymbolTable fSymbolTable = null; |
|
88 |
protected XMLErrorReporter fErrorReporter = null; |
|
89 |
int [] whiteSpaceLookup = new int[100]; |
|
90 |
int whiteSpaceLen = 0; |
|
91 |
boolean whiteSpaceInfoNeeded = true; |
|
92 |
||
93 |
/** |
|
94 |
* Allow Java encoding names. This feature identifier is: |
|
95 |
* http://apache.org/xml/features/allow-java-encodings |
|
96 |
*/ |
|
97 |
protected boolean fAllowJavaEncodings; |
|
98 |
||
99 |
//Will be used only during internal subsets. |
|
100 |
//for appending data. |
|
101 |
||
102 |
/** Property identifier: symbol table. */ |
|
103 |
protected static final String SYMBOL_TABLE = |
|
104 |
Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; |
|
105 |
||
106 |
/** Property identifier: error reporter. */ |
|
107 |
protected static final String ERROR_REPORTER = |
|
108 |
Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; |
|
109 |
||
110 |
/** Feature identifier: allow Java encodings. */ |
|
111 |
protected static final String ALLOW_JAVA_ENCODINGS = |
|
112 |
Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; |
|
113 |
||
114 |
protected PropertyManager fPropertyManager = null ; |
|
115 |
||
116 |
boolean isExternal = false; |
|
117 |
static { |
|
118 |
||
119 |
for(int i=0x0041;i<=0x005A ; i++){ |
|
120 |
VALID_NAMES[i]=true; |
|
121 |
} |
|
122 |
for(int i=0x0061;i<=0x007A; i++){ |
|
123 |
VALID_NAMES[i]=true; |
|
124 |
} |
|
125 |
for(int i=0x0030;i<=0x0039; i++){ |
|
126 |
VALID_NAMES[i]=true; |
|
127 |
} |
|
128 |
VALID_NAMES[45]=true; |
|
129 |
VALID_NAMES[46]=true; |
|
130 |
VALID_NAMES[58]=true; |
|
131 |
VALID_NAMES[95]=true; |
|
132 |
} |
|
12458 | 133 |
// SAPJVM: Remember, that the XML version has explicitly been set, |
134 |
// so that XMLStreamReader.getVersion() can find that out. |
|
135 |
boolean xmlVersionSetExplicitly = false; |
|
12005 | 136 |
// |
137 |
// Constructors |
|
138 |
// |
|
139 |
||
140 |
/** Default constructor. */ |
|
141 |
public XMLEntityScanner() { |
|
142 |
} // <init>() |
|
143 |
||
144 |
||
145 |
/** private constructor, this class can only be instantiated within this class. Instance of this class should |
|
146 |
* be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity) |
|
147 |
* @see getEntityScanner() |
|
148 |
* @see getEntityScanner(ScannedEntity) |
|
149 |
*/ |
|
150 |
public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) { |
|
151 |
fEntityManager = entityManager ; |
|
152 |
reset(propertyManager); |
|
153 |
} // <init>() |
|
154 |
||
155 |
||
156 |
// set buffer size: |
|
157 |
public final void setBufferSize(int size) { |
|
158 |
// REVISIT: Buffer size passed to entity scanner |
|
159 |
// was not being kept in synch with the actual size |
|
160 |
// of the buffers in each scanned entity. If any |
|
161 |
// of the buffers were actually resized, it was possible |
|
162 |
// that the parser would throw an ArrayIndexOutOfBoundsException |
|
163 |
// for documents which contained names which are longer than |
|
164 |
// the current buffer size. Conceivably the buffer size passed |
|
165 |
// to entity scanner could be used to determine a minimum size |
|
166 |
// for resizing, if doubling its size is smaller than this |
|
167 |
// minimum. -- mrglavas |
|
168 |
fBufferSize = size; |
|
169 |
} |
|
170 |
||
171 |
/** |
|
172 |
* Resets the components. |
|
173 |
*/ |
|
174 |
public void reset(PropertyManager propertyManager){ |
|
175 |
fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ; |
|
176 |
fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ; |
|
177 |
fCurrentEntity = null; |
|
178 |
whiteSpaceLen = 0; |
|
179 |
whiteSpaceInfoNeeded = true; |
|
180 |
listeners.clear(); |
|
181 |
} |
|
182 |
||
183 |
/** |
|
184 |
* Resets the component. The component can query the component manager |
|
185 |
* about any features and properties that affect the operation of the |
|
186 |
* component. |
|
187 |
* |
|
188 |
* @param componentManager The component manager. |
|
189 |
* |
|
190 |
* @throws SAXException Thrown by component on initialization error. |
|
191 |
* For example, if a feature or property is |
|
192 |
* required for the operation of the component, the |
|
193 |
* component manager may throw a |
|
194 |
* SAXNotRecognizedException or a |
|
195 |
* SAXNotSupportedException. |
|
196 |
*/ |
|
197 |
public void reset(XMLComponentManager componentManager) |
|
198 |
throws XMLConfigurationException { |
|
199 |
||
200 |
//System.out.println(" this is being called"); |
|
201 |
// xerces features |
|
202 |
fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); |
|
203 |
||
204 |
//xerces properties |
|
205 |
fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); |
|
206 |
fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); |
|
207 |
fCurrentEntity = null; |
|
208 |
whiteSpaceLen = 0; |
|
209 |
whiteSpaceInfoNeeded = true; |
|
210 |
listeners.clear(); |
|
211 |
} // reset(XMLComponentManager) |
|
212 |
||
213 |
||
214 |
public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager, |
|
215 |
XMLErrorReporter reporter) { |
|
216 |
fCurrentEntity = null; |
|
217 |
fSymbolTable = symbolTable; |
|
218 |
fEntityManager = entityManager; |
|
219 |
fErrorReporter = reporter; |
|
220 |
} |
|
221 |
||
222 |
/** |
|
223 |
* Returns the XML version of the current entity. This will normally be the |
|
224 |
* value from the XML or text declaration or defaulted by the parser. Note that |
|
225 |
* that this value may be different than the version of the processing rules |
|
226 |
* applied to the current entity. For instance, an XML 1.1 document may refer to |
|
227 |
* XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire |
|
228 |
* document. Also note that, for a given entity, this value can only be considered |
|
229 |
* final once the XML or text declaration has been read or once it has been |
|
230 |
* determined that there is no such declaration. |
|
231 |
*/ |
|
232 |
public final String getXMLVersion() { |
|
233 |
if (fCurrentEntity != null) { |
|
234 |
return fCurrentEntity.xmlVersion; |
|
235 |
} |
|
236 |
return null; |
|
237 |
} // getXMLVersion():String |
|
238 |
||
239 |
/** |
|
240 |
* Sets the XML version. This method is used by the |
|
241 |
* scanners to report the value of the version pseudo-attribute |
|
242 |
* in an XML or text declaration. |
|
243 |
* |
|
244 |
* @param xmlVersion the XML version of the current entity |
|
245 |
*/ |
|
246 |
public final void setXMLVersion(String xmlVersion) { |
|
12458 | 247 |
xmlVersionSetExplicitly = true; // SAPJVM |
12005 | 248 |
fCurrentEntity.xmlVersion = xmlVersion; |
249 |
} // setXMLVersion(String) |
|
250 |
||
251 |
||
252 |
/** set the instance of current scanned entity. |
|
253 |
* @param ScannedEntity |
|
254 |
*/ |
|
255 |
||
256 |
public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){ |
|
257 |
fCurrentEntity = scannedEntity ; |
|
258 |
if(fCurrentEntity != null){ |
|
259 |
isExternal = fCurrentEntity.isExternal(); |
|
260 |
if(DEBUG_BUFFER) |
|
261 |
System.out.println("Current Entity is "+scannedEntity.name); |
|
262 |
} |
|
263 |
} |
|
264 |
||
265 |
public Entity.ScannedEntity getCurrentEntity(){ |
|
266 |
return fCurrentEntity ; |
|
267 |
} |
|
268 |
// |
|
269 |
// XMLEntityReader methods |
|
270 |
// |
|
271 |
||
272 |
/** |
|
273 |
* Returns the base system identifier of the currently scanned |
|
274 |
* entity, or null if none is available. |
|
275 |
*/ |
|
276 |
public final String getBaseSystemId() { |
|
277 |
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; |
|
278 |
} // getBaseSystemId():String |
|
279 |
||
280 |
/** |
|
281 |
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String) |
|
282 |
*/ |
|
283 |
public void setBaseSystemId(String systemId) { |
|
284 |
//no-op |
|
285 |
} |
|
286 |
||
287 |
///////////// Locator methods start. |
|
288 |
public final int getLineNumber(){ |
|
289 |
//if the entity is closed, we should return -1 |
|
290 |
//xxx at first place why such call should be there... |
|
291 |
return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ; |
|
292 |
} |
|
293 |
||
294 |
/** |
|
295 |
* @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int) |
|
296 |
*/ |
|
297 |
public void setLineNumber(int line) { |
|
298 |
//no-op |
|
299 |
} |
|
300 |
||
301 |
||
302 |
public final int getColumnNumber(){ |
|
303 |
//if the entity is closed, we should return -1 |
|
304 |
//xxx at first place why such call should be there... |
|
305 |
return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ; |
|
306 |
} |
|
307 |
||
308 |
/** |
|
309 |
* @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int) |
|
310 |
*/ |
|
311 |
public void setColumnNumber(int col) { |
|
312 |
// no-op |
|
313 |
} |
|
314 |
||
315 |
||
316 |
public final int getCharacterOffset(){ |
|
317 |
return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ; |
|
318 |
} |
|
319 |
||
320 |
/** Returns the expanded system identifier. */ |
|
321 |
public final String getExpandedSystemId() { |
|
322 |
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; |
|
323 |
} |
|
324 |
||
325 |
/** |
|
326 |
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String) |
|
327 |
*/ |
|
328 |
public void setExpandedSystemId(String systemId) { |
|
329 |
//no-op |
|
330 |
} |
|
331 |
||
332 |
/** Returns the literal system identifier. */ |
|
333 |
public final String getLiteralSystemId() { |
|
334 |
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null; |
|
335 |
} |
|
336 |
||
337 |
/** |
|
338 |
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String) |
|
339 |
*/ |
|
340 |
public void setLiteralSystemId(String systemId) { |
|
341 |
//no-op |
|
342 |
} |
|
343 |
||
344 |
/** Returns the public identifier. */ |
|
345 |
public final String getPublicId() { |
|
346 |
return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; |
|
347 |
} |
|
348 |
||
349 |
/** |
|
350 |
* @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String) |
|
351 |
*/ |
|
352 |
public void setPublicId(String publicId) { |
|
353 |
//no-op |
|
354 |
} |
|
355 |
||
356 |
///////////////// Locator methods finished. |
|
357 |
||
358 |
/** the version of the current entity being scanned */ |
|
359 |
public void setVersion(String version){ |
|
360 |
fCurrentEntity.version = version; |
|
361 |
} |
|
362 |
||
363 |
public String getVersion(){ |
|
364 |
if (fCurrentEntity != null) |
|
365 |
return fCurrentEntity.version ; |
|
366 |
return null; |
|
367 |
} |
|
368 |
||
369 |
/** |
|
370 |
* Returns the encoding of the current entity. |
|
371 |
* Note that, for a given entity, this value can only be |
|
372 |
* considered final once the encoding declaration has been read (or once it |
|
373 |
* has been determined that there is no such declaration) since, no encoding |
|
374 |
* having been specified on the XMLInputSource, the parser |
|
375 |
* will make an initial "guess" which could be in error. |
|
376 |
*/ |
|
377 |
public final String getEncoding() { |
|
378 |
if (fCurrentEntity != null) { |
|
379 |
return fCurrentEntity.encoding; |
|
380 |
} |
|
381 |
return null; |
|
382 |
} // getEncoding():String |
|
383 |
||
384 |
/** |
|
385 |
* Sets the encoding of the scanner. This method is used by the |
|
386 |
* scanners if the XMLDecl or TextDecl line contains an encoding |
|
387 |
* pseudo-attribute. |
|
388 |
* <p> |
|
389 |
* <strong>Note:</strong> The underlying character reader on the |
|
390 |
* current entity will be changed to accomodate the new encoding. |
|
391 |
* However, the new encoding is ignored if the current reader was |
|
392 |
* not constructed from an input stream (e.g. an external entity |
|
393 |
* that is resolved directly to the appropriate java.io.Reader |
|
394 |
* object). |
|
395 |
* |
|
396 |
* @param encoding The IANA encoding name of the new encoding. |
|
397 |
* |
|
398 |
* @throws IOException Thrown if the new encoding is not supported. |
|
399 |
* |
|
400 |
* @see com.sun.org.apache.xerces.internal.util.EncodingMap |
|
401 |
*/ |
|
402 |
public final void setEncoding(String encoding) throws IOException { |
|
403 |
||
404 |
if (DEBUG_ENCODINGS) { |
|
405 |
System.out.println("$$$ setEncoding: "+encoding); |
|
406 |
} |
|
407 |
||
408 |
if (fCurrentEntity.stream != null) { |
|
409 |
// if the encoding is the same, don't change the reader and |
|
410 |
// re-use the original reader used by the OneCharReader |
|
411 |
// NOTE: Besides saving an object, this overcomes deficiencies |
|
412 |
// in the UTF-16 reader supplied with the standard Java |
|
413 |
// distribution (up to and including 1.3). The UTF-16 |
|
414 |
// decoder buffers 8K blocks even when only asked to read |
|
415 |
// a single char! -Ac |
|
416 |
if (fCurrentEntity.encoding == null || |
|
417 |
!fCurrentEntity.encoding.equals(encoding)) { |
|
418 |
// UTF-16 is a bit of a special case. If the encoding is UTF-16, |
|
419 |
// and we know the endian-ness, we shouldn't change readers. |
|
420 |
// If it's ISO-10646-UCS-(2|4), then we'll have to deduce |
|
421 |
// the endian-ness from the encoding we presently have. |
|
422 |
if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) { |
|
423 |
String ENCODING = encoding.toUpperCase(Locale.ENGLISH); |
|
424 |
if(ENCODING.equals("UTF-16")) return; |
|
425 |
if(ENCODING.equals("ISO-10646-UCS-4")) { |
|
426 |
if(fCurrentEntity.encoding.equals("UTF-16BE")) { |
|
427 |
fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE); |
|
428 |
} else { |
|
429 |
fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE); |
|
430 |
} |
|
431 |
return; |
|
432 |
} |
|
433 |
if(ENCODING.equals("ISO-10646-UCS-2")) { |
|
434 |
if(fCurrentEntity.encoding.equals("UTF-16BE")) { |
|
435 |
fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE); |
|
436 |
} else { |
|
437 |
fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE); |
|
438 |
} |
|
439 |
return; |
|
440 |
} |
|
441 |
} |
|
442 |
// wrap a new reader around the input stream, changing |
|
443 |
// the encoding |
|
444 |
if (DEBUG_ENCODINGS) { |
|
445 |
System.out.println("$$$ creating new reader from stream: "+ |
|
446 |
fCurrentEntity.stream); |
|
447 |
} |
|
448 |
//fCurrentEntity.stream.reset(); |
|
449 |
fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null); |
|
450 |
fCurrentEntity.encoding = encoding; |
|
451 |
||
452 |
} else { |
|
453 |
if (DEBUG_ENCODINGS) |
|
454 |
System.out.println("$$$ reusing old reader on stream"); |
|
455 |
} |
|
456 |
} |
|
457 |
||
458 |
} // setEncoding(String) |
|
459 |
||
460 |
/** Returns true if the current entity being scanned is external. */ |
|
461 |
public final boolean isExternal() { |
|
462 |
return fCurrentEntity.isExternal(); |
|
463 |
} // isExternal():boolean |
|
464 |
||
465 |
public int getChar(int relative) throws IOException{ |
|
466 |
if(arrangeCapacity(relative + 1, false)){ |
|
467 |
return fCurrentEntity.ch[fCurrentEntity.position + relative]; |
|
468 |
}else{ |
|
469 |
return -1; |
|
470 |
} |
|
471 |
}//getChar() |
|
472 |
||
473 |
/** |
|
474 |
* Returns the next character on the input. |
|
475 |
* <p> |
|
476 |
* <strong>Note:</strong> The character is <em>not</em> consumed. |
|
477 |
* |
|
478 |
* @throws IOException Thrown if i/o error occurs. |
|
479 |
* @throws EOFException Thrown on end of file. |
|
480 |
*/ |
|
481 |
public int peekChar() throws IOException { |
|
482 |
if (DEBUG_BUFFER) { |
|
483 |
System.out.print("(peekChar: "); |
|
484 |
print(); |
|
485 |
System.out.println(); |
|
486 |
} |
|
487 |
||
488 |
// load more characters, if needed |
|
489 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
490 |
load(0, true, true); |
12005 | 491 |
} |
492 |
||
493 |
// peek at character |
|
494 |
int c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
495 |
||
496 |
// return peeked character |
|
497 |
if (DEBUG_BUFFER) { |
|
498 |
System.out.print(")peekChar: "); |
|
499 |
print(); |
|
500 |
if (isExternal) { |
|
501 |
System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'"); |
|
502 |
} else { |
|
503 |
System.out.println(" -> '"+(char)c+"'"); |
|
504 |
} |
|
505 |
} |
|
506 |
if (isExternal) { |
|
507 |
return c != '\r' ? c : '\n'; |
|
508 |
} else { |
|
509 |
return c; |
|
510 |
} |
|
511 |
||
512 |
} // peekChar():int |
|
513 |
||
514 |
/** |
|
515 |
* Returns the next character on the input. |
|
516 |
* <p> |
|
517 |
* <strong>Note:</strong> The character is consumed. |
|
518 |
* |
|
519 |
* @throws IOException Thrown if i/o error occurs. |
|
520 |
* @throws EOFException Thrown on end of file. |
|
521 |
*/ |
|
522 |
public int scanChar() throws IOException { |
|
523 |
if (DEBUG_BUFFER) { |
|
524 |
System.out.print("(scanChar: "); |
|
525 |
print(); |
|
526 |
System.out.println(); |
|
527 |
} |
|
528 |
||
529 |
// load more characters, if needed |
|
530 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
531 |
load(0, true, true); |
12005 | 532 |
} |
533 |
||
534 |
// scan character |
|
535 |
int c = fCurrentEntity.ch[fCurrentEntity.position++]; |
|
536 |
if (c == '\n' || |
|
537 |
(c == '\r' && isExternal)) { |
|
538 |
fCurrentEntity.lineNumber++; |
|
539 |
fCurrentEntity.columnNumber = 1; |
|
540 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
541 |
fCurrentEntity.ch[0] = (char)c; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
542 |
load(1, false, true); |
12005 | 543 |
} |
544 |
if (c == '\r' && isExternal) { |
|
545 |
if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') { |
|
546 |
fCurrentEntity.position--; |
|
547 |
} |
|
548 |
c = '\n'; |
|
549 |
} |
|
550 |
} |
|
551 |
||
552 |
// return character that was scanned |
|
553 |
if (DEBUG_BUFFER) { |
|
554 |
System.out.print(")scanChar: "); |
|
555 |
print(); |
|
556 |
System.out.println(" -> '"+(char)c+"'"); |
|
557 |
} |
|
558 |
fCurrentEntity.columnNumber++; |
|
559 |
return c; |
|
560 |
||
561 |
} // scanChar():int |
|
562 |
||
563 |
/** |
|
564 |
* Returns a string matching the NMTOKEN production appearing immediately |
|
565 |
* on the input as a symbol, or null if NMTOKEN Name string is present. |
|
566 |
* <p> |
|
567 |
* <strong>Note:</strong> The NMTOKEN characters are consumed. |
|
568 |
* <p> |
|
569 |
* <strong>Note:</strong> The string returned must be a symbol. The |
|
570 |
* SymbolTable can be used for this purpose. |
|
571 |
* |
|
572 |
* @throws IOException Thrown if i/o error occurs. |
|
573 |
* @throws EOFException Thrown on end of file. |
|
574 |
* |
|
575 |
* @see com.sun.org.apache.xerces.internal.util.SymbolTable |
|
576 |
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isName |
|
577 |
*/ |
|
578 |
public String scanNmtoken() throws IOException { |
|
579 |
if (DEBUG_BUFFER) { |
|
580 |
System.out.print("(scanNmtoken: "); |
|
581 |
print(); |
|
582 |
System.out.println(); |
|
583 |
} |
|
584 |
||
585 |
// load more characters, if needed |
|
586 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
587 |
load(0, true, true); |
12005 | 588 |
} |
589 |
||
590 |
// scan nmtoken |
|
591 |
int offset = fCurrentEntity.position; |
|
592 |
boolean vc = false; |
|
593 |
char c; |
|
594 |
while (true){ |
|
595 |
//while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) { |
|
596 |
c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
597 |
if(c < 127){ |
|
598 |
vc = VALID_NAMES[c]; |
|
599 |
}else{ |
|
600 |
vc = XMLChar.isName(c); |
|
601 |
} |
|
602 |
if(!vc)break; |
|
603 |
||
604 |
if (++fCurrentEntity.position == fCurrentEntity.count) { |
|
605 |
int length = fCurrentEntity.position - offset; |
|
606 |
invokeListeners(length); |
|
607 |
if (length == fCurrentEntity.fBufferSize) { |
|
608 |
// bad luck we have to resize our buffer |
|
609 |
char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; |
|
610 |
System.arraycopy(fCurrentEntity.ch, offset, |
|
611 |
tmp, 0, length); |
|
612 |
fCurrentEntity.ch = tmp; |
|
613 |
fCurrentEntity.fBufferSize *= 2; |
|
614 |
} else { |
|
615 |
System.arraycopy(fCurrentEntity.ch, offset, |
|
616 |
fCurrentEntity.ch, 0, length); |
|
617 |
} |
|
618 |
offset = 0; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
619 |
if (load(length, false, false)) { |
12005 | 620 |
break; |
621 |
} |
|
622 |
} |
|
623 |
} |
|
624 |
int length = fCurrentEntity.position - offset; |
|
625 |
fCurrentEntity.columnNumber += length; |
|
626 |
||
627 |
// return nmtoken |
|
628 |
String symbol = null; |
|
629 |
if (length > 0) { |
|
630 |
symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); |
|
631 |
} |
|
632 |
if (DEBUG_BUFFER) { |
|
633 |
System.out.print(")scanNmtoken: "); |
|
634 |
print(); |
|
635 |
System.out.println(" -> "+String.valueOf(symbol)); |
|
636 |
} |
|
637 |
return symbol; |
|
638 |
||
639 |
} // scanNmtoken():String |
|
640 |
||
641 |
/** |
|
642 |
* Returns a string matching the Name production appearing immediately |
|
643 |
* on the input as a symbol, or null if no Name string is present. |
|
644 |
* <p> |
|
645 |
* <strong>Note:</strong> The Name characters are consumed. |
|
646 |
* <p> |
|
647 |
* <strong>Note:</strong> The string returned must be a symbol. The |
|
648 |
* SymbolTable can be used for this purpose. |
|
649 |
* |
|
650 |
* @throws IOException Thrown if i/o error occurs. |
|
651 |
* @throws EOFException Thrown on end of file. |
|
652 |
* |
|
653 |
* @see com.sun.org.apache.xerces.internal.util.SymbolTable |
|
654 |
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isName |
|
655 |
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart |
|
656 |
*/ |
|
657 |
public String scanName() throws IOException { |
|
658 |
if (DEBUG_BUFFER) { |
|
659 |
System.out.print("(scanName: "); |
|
660 |
print(); |
|
661 |
System.out.println(); |
|
662 |
} |
|
663 |
||
664 |
// load more characters, if needed |
|
665 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
666 |
load(0, true, true); |
12005 | 667 |
} |
668 |
||
669 |
// scan name |
|
670 |
int offset = fCurrentEntity.position; |
|
671 |
if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { |
|
672 |
if (++fCurrentEntity.position == fCurrentEntity.count) { |
|
673 |
fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; |
|
674 |
offset = 0; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
675 |
if (load(1, false, true)) { |
12005 | 676 |
fCurrentEntity.columnNumber++; |
677 |
String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); |
|
678 |
||
679 |
if (DEBUG_BUFFER) { |
|
680 |
System.out.print(")scanName: "); |
|
681 |
print(); |
|
682 |
System.out.println(" -> "+String.valueOf(symbol)); |
|
683 |
} |
|
684 |
return symbol; |
|
685 |
} |
|
686 |
} |
|
687 |
boolean vc =false; |
|
688 |
while (true ){ |
|
689 |
//XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; |
|
690 |
char c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
691 |
if(c < 127){ |
|
692 |
vc = VALID_NAMES[c]; |
|
693 |
}else{ |
|
694 |
vc = XMLChar.isName(c); |
|
695 |
} |
|
696 |
if(!vc)break; |
|
697 |
if (++fCurrentEntity.position == fCurrentEntity.count) { |
|
698 |
int length = fCurrentEntity.position - offset; |
|
699 |
invokeListeners(length); |
|
700 |
if (length == fCurrentEntity.fBufferSize) { |
|
701 |
// bad luck we have to resize our buffer |
|
702 |
char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; |
|
703 |
System.arraycopy(fCurrentEntity.ch, offset, |
|
704 |
tmp, 0, length); |
|
705 |
fCurrentEntity.ch = tmp; |
|
706 |
fCurrentEntity.fBufferSize *= 2; |
|
707 |
} else { |
|
708 |
System.arraycopy(fCurrentEntity.ch, offset, |
|
709 |
fCurrentEntity.ch, 0, length); |
|
710 |
} |
|
711 |
offset = 0; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
712 |
if (load(length, false, false)) { |
12005 | 713 |
break; |
714 |
} |
|
715 |
} |
|
716 |
} |
|
717 |
} |
|
718 |
int length = fCurrentEntity.position - offset; |
|
719 |
fCurrentEntity.columnNumber += length; |
|
720 |
||
721 |
// return name |
|
722 |
String symbol; |
|
723 |
if (length > 0) { |
|
724 |
symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); |
|
725 |
} else |
|
726 |
symbol = null; |
|
727 |
if (DEBUG_BUFFER) { |
|
728 |
System.out.print(")scanName: "); |
|
729 |
print(); |
|
730 |
System.out.println(" -> "+String.valueOf(symbol)); |
|
731 |
} |
|
732 |
return symbol; |
|
733 |
||
734 |
} // scanName():String |
|
735 |
||
736 |
/** |
|
737 |
* Scans a qualified name from the input, setting the fields of the |
|
738 |
* QName structure appropriately. |
|
739 |
* <p> |
|
740 |
* <strong>Note:</strong> The qualified name characters are consumed. |
|
741 |
* <p> |
|
742 |
* <strong>Note:</strong> The strings used to set the values of the |
|
743 |
* QName structure must be symbols. The SymbolTable can be used for |
|
744 |
* this purpose. |
|
745 |
* |
|
746 |
* @param qname The qualified name structure to fill. |
|
747 |
* |
|
748 |
* @return Returns true if a qualified name appeared immediately on |
|
749 |
* the input and was scanned, false otherwise. |
|
750 |
* |
|
751 |
* @throws IOException Thrown if i/o error occurs. |
|
752 |
* @throws EOFException Thrown on end of file. |
|
753 |
* |
|
754 |
* @see com.sun.org.apache.xerces.internal.util.SymbolTable |
|
755 |
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isName |
|
756 |
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart |
|
757 |
*/ |
|
758 |
public boolean scanQName(QName qname) throws IOException { |
|
759 |
if (DEBUG_BUFFER) { |
|
760 |
System.out.print("(scanQName, "+qname+": "); |
|
761 |
print(); |
|
762 |
System.out.println(); |
|
763 |
} |
|
764 |
||
765 |
// load more characters, if needed |
|
766 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
767 |
load(0, true, true); |
12005 | 768 |
} |
769 |
||
770 |
// scan qualified name |
|
771 |
int offset = fCurrentEntity.position; |
|
772 |
||
773 |
//making a check if if the specified character is a valid name start character |
|
774 |
//as defined by production [5] in the XML 1.0 specification. |
|
775 |
// Name ::= (Letter | '_' | ':') (NameChar)* |
|
776 |
||
777 |
if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { |
|
778 |
if (++fCurrentEntity.position == fCurrentEntity.count) { |
|
779 |
fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; |
|
780 |
offset = 0; |
|
781 |
||
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
782 |
if (load(1, false, true)) { |
12005 | 783 |
fCurrentEntity.columnNumber++; |
784 |
//adding into symbol table. |
|
785 |
//XXX We are trying to add single character in SymbolTable?????? |
|
786 |
String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); |
|
787 |
qname.setValues(null, name, name, null); |
|
788 |
if (DEBUG_BUFFER) { |
|
789 |
System.out.print(")scanQName, "+qname+": "); |
|
790 |
print(); |
|
791 |
System.out.println(" -> true"); |
|
792 |
} |
|
793 |
return true; |
|
794 |
} |
|
795 |
} |
|
796 |
int index = -1; |
|
797 |
boolean vc = false; |
|
798 |
while ( true){ |
|
799 |
||
800 |
//XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; |
|
801 |
char c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
802 |
if(c < 127){ |
|
803 |
vc = VALID_NAMES[c]; |
|
804 |
}else{ |
|
805 |
vc = XMLChar.isName(c); |
|
806 |
} |
|
807 |
if(!vc)break; |
|
808 |
if (c == ':') { |
|
809 |
if (index != -1) { |
|
810 |
break; |
|
811 |
} |
|
812 |
index = fCurrentEntity.position; |
|
813 |
} |
|
814 |
if (++fCurrentEntity.position == fCurrentEntity.count) { |
|
815 |
int length = fCurrentEntity.position - offset; |
|
816 |
invokeListeners(length); |
|
817 |
if (length == fCurrentEntity.fBufferSize) { |
|
818 |
// bad luck we have to resize our buffer |
|
819 |
char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; |
|
820 |
System.arraycopy(fCurrentEntity.ch, offset, |
|
821 |
tmp, 0, length); |
|
822 |
fCurrentEntity.ch = tmp; |
|
823 |
fCurrentEntity.fBufferSize *= 2; |
|
824 |
} else { |
|
825 |
System.arraycopy(fCurrentEntity.ch, offset, |
|
826 |
fCurrentEntity.ch, 0, length); |
|
827 |
} |
|
828 |
if (index != -1) { |
|
829 |
index = index - offset; |
|
830 |
} |
|
831 |
offset = 0; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
832 |
if (load(length, false, false)) { |
12005 | 833 |
break; |
834 |
} |
|
835 |
} |
|
836 |
} |
|
837 |
int length = fCurrentEntity.position - offset; |
|
838 |
fCurrentEntity.columnNumber += length; |
|
839 |
if (length > 0) { |
|
840 |
String prefix = null; |
|
841 |
String localpart = null; |
|
842 |
String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch, |
|
843 |
offset, length); |
|
844 |
||
845 |
if (index != -1) { |
|
846 |
int prefixLength = index - offset; |
|
847 |
prefix = fSymbolTable.addSymbol(fCurrentEntity.ch, |
|
848 |
offset, prefixLength); |
|
849 |
int len = length - prefixLength - 1; |
|
850 |
localpart = fSymbolTable.addSymbol(fCurrentEntity.ch, |
|
851 |
index + 1, len); |
|
852 |
||
853 |
} else { |
|
854 |
localpart = rawname; |
|
855 |
} |
|
856 |
qname.setValues(prefix, localpart, rawname, null); |
|
857 |
if (DEBUG_BUFFER) { |
|
858 |
System.out.print(")scanQName, "+qname+": "); |
|
859 |
print(); |
|
860 |
System.out.println(" -> true"); |
|
861 |
} |
|
862 |
return true; |
|
863 |
} |
|
864 |
} |
|
865 |
||
866 |
// no qualified name found |
|
867 |
if (DEBUG_BUFFER) { |
|
868 |
System.out.print(")scanQName, "+qname+": "); |
|
869 |
print(); |
|
870 |
System.out.println(" -> false"); |
|
871 |
} |
|
872 |
return false; |
|
873 |
||
874 |
} // scanQName(QName):boolean |
|
875 |
||
876 |
/** |
|
877 |
* CHANGED: |
|
878 |
* Scans a range of parsed character data, This function appends the character data to |
|
879 |
* the supplied buffer. |
|
880 |
* <p> |
|
881 |
* <strong>Note:</strong> The characters are consumed. |
|
882 |
* <p> |
|
883 |
* <strong>Note:</strong> This method does not guarantee to return |
|
884 |
* the longest run of parsed character data. This method may return |
|
885 |
* before markup due to reaching the end of the input buffer or any |
|
886 |
* other reason. |
|
887 |
* <p> |
|
888 |
* |
|
889 |
* @param content The content structure to fill. |
|
890 |
* |
|
891 |
* @return Returns the next character on the input, if known. This |
|
892 |
* value may be -1 but this does <em>note</em> designate |
|
893 |
* end of file. |
|
894 |
* |
|
895 |
* @throws IOException Thrown if i/o error occurs. |
|
896 |
* @throws EOFException Thrown on end of file. |
|
897 |
*/ |
|
898 |
public int scanContent(XMLString content) throws IOException { |
|
899 |
if (DEBUG_BUFFER) { |
|
900 |
System.out.print("(scanContent: "); |
|
901 |
print(); |
|
902 |
System.out.println(); |
|
903 |
} |
|
904 |
||
905 |
// load more characters, if needed |
|
906 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
907 |
load(0, true, true); |
12005 | 908 |
} else if (fCurrentEntity.position == fCurrentEntity.count - 1) { |
909 |
fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
910 |
load(1, false, true); |
12005 | 911 |
fCurrentEntity.position = 0; |
912 |
} |
|
913 |
||
914 |
// normalize newlines |
|
915 |
int offset = fCurrentEntity.position; |
|
916 |
int c = fCurrentEntity.ch[offset]; |
|
917 |
int newlines = 0; |
|
918 |
if (c == '\n' || (c == '\r' && isExternal)) { |
|
919 |
if (DEBUG_BUFFER) { |
|
920 |
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); |
|
921 |
print(); |
|
922 |
System.out.println(); |
|
923 |
} |
|
924 |
do { |
|
925 |
c = fCurrentEntity.ch[fCurrentEntity.position++]; |
|
926 |
if (c == '\r' && isExternal) { |
|
927 |
newlines++; |
|
928 |
fCurrentEntity.lineNumber++; |
|
929 |
fCurrentEntity.columnNumber = 1; |
|
930 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
931 |
offset = 0; |
|
932 |
fCurrentEntity.position = newlines; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
933 |
if (load(newlines, false, true)) { |
12005 | 934 |
break; |
935 |
} |
|
936 |
} |
|
937 |
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { |
|
938 |
fCurrentEntity.position++; |
|
939 |
offset++; |
|
940 |
} |
|
941 |
/*** NEWLINE NORMALIZATION ***/ |
|
942 |
else { |
|
943 |
newlines++; |
|
944 |
} |
|
945 |
} else if (c == '\n') { |
|
946 |
newlines++; |
|
947 |
fCurrentEntity.lineNumber++; |
|
948 |
fCurrentEntity.columnNumber = 1; |
|
949 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
950 |
offset = 0; |
|
951 |
fCurrentEntity.position = newlines; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
952 |
if (load(newlines, false, true)) { |
12005 | 953 |
break; |
954 |
} |
|
955 |
} |
|
956 |
} else { |
|
957 |
fCurrentEntity.position--; |
|
958 |
break; |
|
959 |
} |
|
960 |
} while (fCurrentEntity.position < fCurrentEntity.count - 1); |
|
961 |
for (int i = offset; i < fCurrentEntity.position; i++) { |
|
962 |
fCurrentEntity.ch[i] = '\n'; |
|
963 |
} |
|
964 |
int length = fCurrentEntity.position - offset; |
|
965 |
if (fCurrentEntity.position == fCurrentEntity.count - 1) { |
|
966 |
//CHANGED: dont replace the value.. append to the buffer. This gives control to the callee |
|
967 |
//on buffering the data.. |
|
968 |
content.setValues(fCurrentEntity.ch, offset, length); |
|
969 |
//content.append(fCurrentEntity.ch, offset, length); |
|
970 |
if (DEBUG_BUFFER) { |
|
971 |
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); |
|
972 |
print(); |
|
973 |
System.out.println(); |
|
974 |
} |
|
975 |
return -1; |
|
976 |
} |
|
977 |
if (DEBUG_BUFFER) { |
|
978 |
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); |
|
979 |
print(); |
|
980 |
System.out.println(); |
|
981 |
} |
|
982 |
} |
|
983 |
||
984 |
while (fCurrentEntity.position < fCurrentEntity.count) { |
|
985 |
c = fCurrentEntity.ch[fCurrentEntity.position++]; |
|
986 |
if (!XMLChar.isContent(c)) { |
|
987 |
fCurrentEntity.position--; |
|
988 |
break; |
|
989 |
} |
|
990 |
} |
|
991 |
int length = fCurrentEntity.position - offset; |
|
992 |
fCurrentEntity.columnNumber += length - newlines; |
|
993 |
||
994 |
//CHANGED: dont replace the value.. append to the buffer. This gives control to the callee |
|
995 |
//on buffering the data.. |
|
996 |
content.setValues(fCurrentEntity.ch, offset, length); |
|
997 |
//content.append(fCurrentEntity.ch, offset, length); |
|
998 |
// return next character |
|
999 |
if (fCurrentEntity.position != fCurrentEntity.count) { |
|
1000 |
c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
1001 |
// REVISIT: Does this need to be updated to fix the |
|
1002 |
// #x0D ^#x0A newline normalization problem? -Ac |
|
1003 |
if (c == '\r' && isExternal) { |
|
1004 |
c = '\n'; |
|
1005 |
} |
|
1006 |
} else { |
|
1007 |
c = -1; |
|
1008 |
} |
|
1009 |
if (DEBUG_BUFFER) { |
|
1010 |
System.out.print(")scanContent: "); |
|
1011 |
print(); |
|
1012 |
System.out.println(" -> '"+(char)c+"'"); |
|
1013 |
} |
|
1014 |
return c; |
|
1015 |
||
1016 |
} // scanContent(XMLString):int |
|
1017 |
||
1018 |
/** |
|
1019 |
* Scans a range of attribute value data, setting the fields of the |
|
1020 |
* XMLString structure, appropriately. |
|
1021 |
* <p> |
|
1022 |
* <strong>Note:</strong> The characters are consumed. |
|
1023 |
* <p> |
|
1024 |
* <strong>Note:</strong> This method does not guarantee to return |
|
1025 |
* the longest run of attribute value data. This method may return |
|
1026 |
* before the quote character due to reaching the end of the input |
|
1027 |
* buffer or any other reason. |
|
1028 |
* <p> |
|
1029 |
* <strong>Note:</strong> The fields contained in the XMLString |
|
1030 |
* structure are not guaranteed to remain valid upon subsequent calls |
|
1031 |
* to the entity scanner. Therefore, the caller is responsible for |
|
1032 |
* immediately using the returned character data or making a copy of |
|
1033 |
* the character data. |
|
1034 |
* |
|
1035 |
* @param quote The quote character that signifies the end of the |
|
1036 |
* attribute value data. |
|
1037 |
* @param content The content structure to fill. |
|
1038 |
* |
|
1039 |
* @return Returns the next character on the input, if known. This |
|
1040 |
* value may be -1 but this does <em>note</em> designate |
|
1041 |
* end of file. |
|
1042 |
* |
|
1043 |
* @throws IOException Thrown if i/o error occurs. |
|
1044 |
* @throws EOFException Thrown on end of file. |
|
1045 |
*/ |
|
1046 |
public int scanLiteral(int quote, XMLString content) |
|
1047 |
throws IOException { |
|
1048 |
if (DEBUG_BUFFER) { |
|
1049 |
System.out.print("(scanLiteral, '"+(char)quote+"': "); |
|
1050 |
print(); |
|
1051 |
System.out.println(); |
|
1052 |
} |
|
1053 |
// load more characters, if needed |
|
1054 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1055 |
load(0, true, true); |
12005 | 1056 |
} else if (fCurrentEntity.position == fCurrentEntity.count - 1) { |
1057 |
fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1058 |
load(1, false, true); |
12005 | 1059 |
fCurrentEntity.position = 0; |
1060 |
} |
|
1061 |
||
1062 |
// normalize newlines |
|
1063 |
int offset = fCurrentEntity.position; |
|
1064 |
int c = fCurrentEntity.ch[offset]; |
|
1065 |
int newlines = 0; |
|
1066 |
if(whiteSpaceInfoNeeded) |
|
1067 |
whiteSpaceLen=0; |
|
1068 |
if (c == '\n' || (c == '\r' && isExternal)) { |
|
1069 |
if (DEBUG_BUFFER) { |
|
1070 |
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); |
|
1071 |
print(); |
|
1072 |
System.out.println(); |
|
1073 |
} |
|
1074 |
do { |
|
1075 |
c = fCurrentEntity.ch[fCurrentEntity.position++]; |
|
1076 |
if (c == '\r' && isExternal) { |
|
1077 |
newlines++; |
|
1078 |
fCurrentEntity.lineNumber++; |
|
1079 |
fCurrentEntity.columnNumber = 1; |
|
1080 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
1081 |
offset = 0; |
|
1082 |
fCurrentEntity.position = newlines; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1083 |
if (load(newlines, false, true)) { |
12005 | 1084 |
break; |
1085 |
} |
|
1086 |
} |
|
1087 |
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { |
|
1088 |
fCurrentEntity.position++; |
|
1089 |
offset++; |
|
1090 |
} |
|
1091 |
/*** NEWLINE NORMALIZATION ***/ |
|
1092 |
else { |
|
1093 |
newlines++; |
|
1094 |
} |
|
1095 |
/***/ |
|
1096 |
} else if (c == '\n') { |
|
1097 |
newlines++; |
|
1098 |
fCurrentEntity.lineNumber++; |
|
1099 |
fCurrentEntity.columnNumber = 1; |
|
1100 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
1101 |
offset = 0; |
|
1102 |
fCurrentEntity.position = newlines; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1103 |
if (load(newlines, false, true)) { |
12005 | 1104 |
break; |
1105 |
} |
|
1106 |
} |
|
1107 |
/*** NEWLINE NORMALIZATION *** |
|
1108 |
* if (fCurrentEntity.ch[fCurrentEntity.position] == '\r' |
|
1109 |
* && external) { |
|
1110 |
* fCurrentEntity.position++; |
|
1111 |
* offset++; |
|
1112 |
* } |
|
1113 |
* /***/ |
|
1114 |
} else { |
|
1115 |
fCurrentEntity.position--; |
|
1116 |
break; |
|
1117 |
} |
|
1118 |
} while (fCurrentEntity.position < fCurrentEntity.count - 1); |
|
1119 |
int i=0; |
|
1120 |
for ( i = offset; i < fCurrentEntity.position; i++) { |
|
1121 |
fCurrentEntity.ch[i] = '\n'; |
|
22138
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1122 |
storeWhiteSpace(i); |
12005 | 1123 |
} |
1124 |
||
1125 |
int length = fCurrentEntity.position - offset; |
|
1126 |
if (fCurrentEntity.position == fCurrentEntity.count - 1) { |
|
1127 |
content.setValues(fCurrentEntity.ch, offset, length); |
|
1128 |
if (DEBUG_BUFFER) { |
|
1129 |
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); |
|
1130 |
print(); |
|
1131 |
System.out.println(); |
|
1132 |
} |
|
1133 |
return -1; |
|
1134 |
} |
|
1135 |
if (DEBUG_BUFFER) { |
|
1136 |
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); |
|
1137 |
print(); |
|
1138 |
System.out.println(); |
|
1139 |
} |
|
1140 |
} |
|
1141 |
||
1142 |
// scan literal value |
|
22138
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1143 |
for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) { |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1144 |
c = fCurrentEntity.ch[fCurrentEntity.position]; |
12005 | 1145 |
if ((c == quote && |
22138
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1146 |
(!fCurrentEntity.literal || isExternal)) || |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1147 |
c == '%' || !XMLChar.isContent(c)) { |
12005 | 1148 |
break; |
1149 |
} |
|
22138
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1150 |
if (whiteSpaceInfoNeeded && c == '\t') { |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1151 |
storeWhiteSpace(fCurrentEntity.position); |
12005 | 1152 |
} |
1153 |
} |
|
1154 |
int length = fCurrentEntity.position - offset; |
|
1155 |
fCurrentEntity.columnNumber += length - newlines; |
|
1156 |
content.setValues(fCurrentEntity.ch, offset, length); |
|
1157 |
||
1158 |
// return next character |
|
1159 |
if (fCurrentEntity.position != fCurrentEntity.count) { |
|
1160 |
c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
1161 |
// NOTE: We don't want to accidentally signal the |
|
1162 |
// end of the literal if we're expanding an |
|
1163 |
// entity appearing in the literal. -Ac |
|
1164 |
if (c == quote && fCurrentEntity.literal) { |
|
1165 |
c = -1; |
|
1166 |
} |
|
1167 |
} else { |
|
1168 |
c = -1; |
|
1169 |
} |
|
1170 |
if (DEBUG_BUFFER) { |
|
1171 |
System.out.print(")scanLiteral, '"+(char)quote+"': "); |
|
1172 |
print(); |
|
1173 |
System.out.println(" -> '"+(char)c+"'"); |
|
1174 |
} |
|
1175 |
return c; |
|
1176 |
||
1177 |
} // scanLiteral(int,XMLString):int |
|
1178 |
||
22138
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1179 |
/** |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1180 |
* Save whitespace information. Increase the whitespace buffer by 100 |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1181 |
* when needed. |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1182 |
* |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1183 |
* For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1184 |
* |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1185 |
* @param whiteSpacePos position of a whitespace in the scanner entity buffer |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1186 |
*/ |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1187 |
private void storeWhiteSpace(int whiteSpacePos) { |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1188 |
if (whiteSpaceLen >= whiteSpaceLookup.length) { |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1189 |
int [] tmp = new int[whiteSpaceLookup.length + 100]; |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1190 |
System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length); |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1191 |
whiteSpaceLookup = tmp; |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1192 |
} |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1193 |
|
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1194 |
whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos; |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1195 |
} |
069c98fc4646
8029955: AIOB in XMLEntityScanner.scanLiteral upon parsing literals with > 100 LF chars
joehw
parents:
14939
diff
changeset
|
1196 |
|
12005 | 1197 |
//CHANGED: |
1198 |
/** |
|
1199 |
* Scans a range of character data up to the specified delimiter, |
|
1200 |
* setting the fields of the XMLString structure, appropriately. |
|
1201 |
* <p> |
|
1202 |
* <strong>Note:</strong> The characters are consumed. |
|
1203 |
* <p> |
|
1204 |
* <strong>Note:</strong> This assumes that the length of the delimiter |
|
1205 |
* and that the delimiter contains at least one character. |
|
1206 |
* <p> |
|
1207 |
* <strong>Note:</strong> This method does not guarantee to return |
|
1208 |
* the longest run of character data. This method may return before |
|
1209 |
* the delimiter due to reaching the end of the input buffer or any |
|
1210 |
* other reason. |
|
1211 |
* <p> |
|
1212 |
* @param delimiter The string that signifies the end of the character |
|
1213 |
* data to be scanned. |
|
1214 |
* @param buffer The XMLStringBuffer to fill. |
|
1215 |
* |
|
1216 |
* @return Returns true if there is more data to scan, false otherwise. |
|
1217 |
* |
|
1218 |
* @throws IOException Thrown if i/o error occurs. |
|
1219 |
* @throws EOFException Thrown on end of file. |
|
1220 |
*/ |
|
1221 |
public boolean scanData(String delimiter, XMLStringBuffer buffer) |
|
1222 |
throws IOException { |
|
1223 |
||
1224 |
boolean done = false; |
|
1225 |
int delimLen = delimiter.length(); |
|
1226 |
char charAt0 = delimiter.charAt(0); |
|
1227 |
do { |
|
1228 |
if (DEBUG_BUFFER) { |
|
1229 |
System.out.print("(scanData: "); |
|
1230 |
print(); |
|
1231 |
System.out.println(); |
|
1232 |
} |
|
1233 |
||
1234 |
// load more characters, if needed |
|
1235 |
||
1236 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1237 |
load(0, true, false); |
12005 | 1238 |
} |
1239 |
||
1240 |
boolean bNextEntity = false; |
|
1241 |
||
1242 |
while ((fCurrentEntity.position > fCurrentEntity.count - delimLen) |
|
1243 |
&& (!bNextEntity)) |
|
1244 |
{ |
|
1245 |
System.arraycopy(fCurrentEntity.ch, |
|
1246 |
fCurrentEntity.position, |
|
1247 |
fCurrentEntity.ch, |
|
1248 |
0, |
|
1249 |
fCurrentEntity.count - fCurrentEntity.position); |
|
1250 |
||
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1251 |
bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false); |
12005 | 1252 |
fCurrentEntity.position = 0; |
1253 |
fCurrentEntity.startPosition = 0; |
|
1254 |
} |
|
1255 |
||
1256 |
if (fCurrentEntity.position > fCurrentEntity.count - delimLen) { |
|
1257 |
// something must be wrong with the input: e.g., file ends in an unterminated comment |
|
1258 |
int length = fCurrentEntity.count - fCurrentEntity.position; |
|
1259 |
buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); |
|
1260 |
fCurrentEntity.columnNumber += fCurrentEntity.count; |
|
1261 |
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); |
|
1262 |
fCurrentEntity.position = fCurrentEntity.count; |
|
1263 |
fCurrentEntity.startPosition = fCurrentEntity.count; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1264 |
load(0, true, false); |
12005 | 1265 |
return false; |
1266 |
} |
|
1267 |
||
1268 |
// normalize newlines |
|
1269 |
int offset = fCurrentEntity.position; |
|
1270 |
int c = fCurrentEntity.ch[offset]; |
|
1271 |
int newlines = 0; |
|
1272 |
if (c == '\n' || (c == '\r' && isExternal)) { |
|
1273 |
if (DEBUG_BUFFER) { |
|
1274 |
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); |
|
1275 |
print(); |
|
1276 |
System.out.println(); |
|
1277 |
} |
|
1278 |
do { |
|
1279 |
c = fCurrentEntity.ch[fCurrentEntity.position++]; |
|
1280 |
if (c == '\r' && isExternal) { |
|
1281 |
newlines++; |
|
1282 |
fCurrentEntity.lineNumber++; |
|
1283 |
fCurrentEntity.columnNumber = 1; |
|
1284 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
1285 |
offset = 0; |
|
1286 |
fCurrentEntity.position = newlines; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1287 |
if (load(newlines, false, true)) { |
12005 | 1288 |
break; |
1289 |
} |
|
1290 |
} |
|
1291 |
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { |
|
1292 |
fCurrentEntity.position++; |
|
1293 |
offset++; |
|
1294 |
} |
|
1295 |
/*** NEWLINE NORMALIZATION ***/ |
|
1296 |
else { |
|
1297 |
newlines++; |
|
1298 |
} |
|
1299 |
} else if (c == '\n') { |
|
1300 |
newlines++; |
|
1301 |
fCurrentEntity.lineNumber++; |
|
1302 |
fCurrentEntity.columnNumber = 1; |
|
1303 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
1304 |
offset = 0; |
|
1305 |
fCurrentEntity.position = newlines; |
|
1306 |
fCurrentEntity.count = newlines; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1307 |
if (load(newlines, false, true)) { |
12005 | 1308 |
break; |
1309 |
} |
|
1310 |
} |
|
1311 |
} else { |
|
1312 |
fCurrentEntity.position--; |
|
1313 |
break; |
|
1314 |
} |
|
1315 |
} while (fCurrentEntity.position < fCurrentEntity.count - 1); |
|
1316 |
for (int i = offset; i < fCurrentEntity.position; i++) { |
|
1317 |
fCurrentEntity.ch[i] = '\n'; |
|
1318 |
} |
|
1319 |
int length = fCurrentEntity.position - offset; |
|
1320 |
if (fCurrentEntity.position == fCurrentEntity.count - 1) { |
|
1321 |
buffer.append(fCurrentEntity.ch, offset, length); |
|
1322 |
if (DEBUG_BUFFER) { |
|
1323 |
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); |
|
1324 |
print(); |
|
1325 |
System.out.println(); |
|
1326 |
} |
|
1327 |
return true; |
|
1328 |
} |
|
1329 |
if (DEBUG_BUFFER) { |
|
1330 |
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); |
|
1331 |
print(); |
|
1332 |
System.out.println(); |
|
1333 |
} |
|
1334 |
} |
|
1335 |
||
1336 |
// iterate over buffer looking for delimiter |
|
1337 |
OUTER: while (fCurrentEntity.position < fCurrentEntity.count) { |
|
1338 |
c = fCurrentEntity.ch[fCurrentEntity.position++]; |
|
1339 |
if (c == charAt0) { |
|
1340 |
// looks like we just hit the delimiter |
|
1341 |
int delimOffset = fCurrentEntity.position - 1; |
|
1342 |
for (int i = 1; i < delimLen; i++) { |
|
1343 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
1344 |
fCurrentEntity.position -= i; |
|
1345 |
break OUTER; |
|
1346 |
} |
|
1347 |
c = fCurrentEntity.ch[fCurrentEntity.position++]; |
|
1348 |
if (delimiter.charAt(i) != c) { |
|
1349 |
fCurrentEntity.position -= i; |
|
1350 |
break; |
|
1351 |
} |
|
1352 |
} |
|
1353 |
if (fCurrentEntity.position == delimOffset + delimLen) { |
|
1354 |
done = true; |
|
1355 |
break; |
|
1356 |
} |
|
1357 |
} else if (c == '\n' || (isExternal && c == '\r')) { |
|
1358 |
fCurrentEntity.position--; |
|
1359 |
break; |
|
1360 |
} else if (XMLChar.isInvalid(c)) { |
|
1361 |
fCurrentEntity.position--; |
|
1362 |
int length = fCurrentEntity.position - offset; |
|
1363 |
fCurrentEntity.columnNumber += length - newlines; |
|
1364 |
buffer.append(fCurrentEntity.ch, offset, length); |
|
1365 |
return true; |
|
1366 |
} |
|
1367 |
} |
|
1368 |
int length = fCurrentEntity.position - offset; |
|
1369 |
fCurrentEntity.columnNumber += length - newlines; |
|
1370 |
if (done) { |
|
1371 |
length -= delimLen; |
|
1372 |
} |
|
1373 |
buffer.append(fCurrentEntity.ch, offset, length); |
|
1374 |
||
1375 |
// return true if string was skipped |
|
1376 |
if (DEBUG_BUFFER) { |
|
1377 |
System.out.print(")scanData: "); |
|
1378 |
print(); |
|
1379 |
System.out.println(" -> " + done); |
|
1380 |
} |
|
1381 |
} while (!done); |
|
1382 |
return !done; |
|
1383 |
||
1384 |
} // scanData(String,XMLString) |
|
1385 |
||
1386 |
/** |
|
1387 |
* Skips a character appearing immediately on the input. |
|
1388 |
* <p> |
|
1389 |
* <strong>Note:</strong> The character is consumed only if it matches |
|
1390 |
* the specified character. |
|
1391 |
* |
|
1392 |
* @param c The character to skip. |
|
1393 |
* |
|
1394 |
* @return Returns true if the character was skipped. |
|
1395 |
* |
|
1396 |
* @throws IOException Thrown if i/o error occurs. |
|
1397 |
* @throws EOFException Thrown on end of file. |
|
1398 |
*/ |
|
1399 |
public boolean skipChar(int c) throws IOException { |
|
1400 |
if (DEBUG_BUFFER) { |
|
1401 |
System.out.print("(skipChar, '"+(char)c+"': "); |
|
1402 |
print(); |
|
1403 |
System.out.println(); |
|
1404 |
} |
|
1405 |
||
1406 |
// load more characters, if needed |
|
1407 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1408 |
load(0, true, true); |
12005 | 1409 |
} |
1410 |
||
1411 |
// skip character |
|
1412 |
int cc = fCurrentEntity.ch[fCurrentEntity.position]; |
|
1413 |
if (cc == c) { |
|
1414 |
fCurrentEntity.position++; |
|
1415 |
if (c == '\n') { |
|
1416 |
fCurrentEntity.lineNumber++; |
|
1417 |
fCurrentEntity.columnNumber = 1; |
|
1418 |
} else { |
|
1419 |
fCurrentEntity.columnNumber++; |
|
1420 |
} |
|
1421 |
if (DEBUG_BUFFER) { |
|
1422 |
System.out.print(")skipChar, '"+(char)c+"': "); |
|
1423 |
print(); |
|
1424 |
System.out.println(" -> true"); |
|
1425 |
} |
|
1426 |
return true; |
|
1427 |
} else if (c == '\n' && cc == '\r' && isExternal) { |
|
1428 |
// handle newlines |
|
1429 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
1430 |
fCurrentEntity.ch[0] = (char)cc; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1431 |
load(1, false, true); |
12005 | 1432 |
} |
1433 |
fCurrentEntity.position++; |
|
1434 |
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { |
|
1435 |
fCurrentEntity.position++; |
|
1436 |
} |
|
1437 |
fCurrentEntity.lineNumber++; |
|
1438 |
fCurrentEntity.columnNumber = 1; |
|
1439 |
if (DEBUG_BUFFER) { |
|
1440 |
System.out.print(")skipChar, '"+(char)c+"': "); |
|
1441 |
print(); |
|
1442 |
System.out.println(" -> true"); |
|
1443 |
} |
|
1444 |
return true; |
|
1445 |
} |
|
1446 |
||
1447 |
// character was not skipped |
|
1448 |
if (DEBUG_BUFFER) { |
|
1449 |
System.out.print(")skipChar, '"+(char)c+"': "); |
|
1450 |
print(); |
|
1451 |
System.out.println(" -> false"); |
|
1452 |
} |
|
1453 |
return false; |
|
1454 |
||
1455 |
} // skipChar(int):boolean |
|
1456 |
||
1457 |
public boolean isSpace(char ch){ |
|
1458 |
return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r'); |
|
1459 |
} |
|
1460 |
/** |
|
1461 |
* Skips space characters appearing immediately on the input. |
|
1462 |
* <p> |
|
1463 |
* <strong>Note:</strong> The characters are consumed only if they are |
|
1464 |
* space characters. |
|
1465 |
* |
|
1466 |
* @return Returns true if at least one space character was skipped. |
|
1467 |
* |
|
1468 |
* @throws IOException Thrown if i/o error occurs. |
|
1469 |
* @throws EOFException Thrown on end of file. |
|
1470 |
* |
|
1471 |
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace |
|
1472 |
*/ |
|
1473 |
public boolean skipSpaces() throws IOException { |
|
1474 |
if (DEBUG_BUFFER) { |
|
1475 |
System.out.print("(skipSpaces: "); |
|
1476 |
print(); |
|
1477 |
System.out.println(); |
|
1478 |
} |
|
1479 |
//boolean entityChanged = false; |
|
1480 |
// load more characters, if needed |
|
1481 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1482 |
load(0, true, true); |
12005 | 1483 |
} |
1484 |
||
1485 |
//we are doing this check only in skipSpace() because it is called by |
|
1486 |
//fMiscDispatcher and we want the parser to exit gracefully when document |
|
1487 |
//is well-formed. |
|
1488 |
//it is possible that end of document is reached and |
|
1489 |
//fCurrentEntity becomes null |
|
1490 |
//nothing was read so entity changed 'false' should be returned. |
|
1491 |
if(fCurrentEntity == null){ |
|
1492 |
return false ; |
|
1493 |
} |
|
1494 |
||
1495 |
// skip spaces |
|
1496 |
int c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
1497 |
if (XMLChar.isSpace(c)) { |
|
1498 |
do { |
|
1499 |
boolean entityChanged = false; |
|
1500 |
// handle newlines |
|
1501 |
if (c == '\n' || (isExternal && c == '\r')) { |
|
1502 |
fCurrentEntity.lineNumber++; |
|
1503 |
fCurrentEntity.columnNumber = 1; |
|
1504 |
if (fCurrentEntity.position == fCurrentEntity.count - 1) { |
|
1505 |
fCurrentEntity.ch[0] = (char)c; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1506 |
entityChanged = load(1, true, true); |
12005 | 1507 |
if (!entityChanged){ |
1508 |
// the load change the position to be 1, |
|
1509 |
// need to restore it when entity not changed |
|
1510 |
fCurrentEntity.position = 0; |
|
1511 |
}else if(fCurrentEntity == null){ |
|
1512 |
return true ; |
|
1513 |
} |
|
1514 |
} |
|
1515 |
if (c == '\r' && isExternal) { |
|
1516 |
// REVISIT: Does this need to be updated to fix the |
|
1517 |
// #x0D ^#x0A newline normalization problem? -Ac |
|
1518 |
if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { |
|
1519 |
fCurrentEntity.position--; |
|
1520 |
} |
|
1521 |
} |
|
1522 |
} else { |
|
1523 |
fCurrentEntity.columnNumber++; |
|
1524 |
} |
|
1525 |
// load more characters, if needed |
|
1526 |
if (!entityChanged){ |
|
1527 |
fCurrentEntity.position++; |
|
1528 |
} |
|
1529 |
||
1530 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1531 |
load(0, true, true); |
12005 | 1532 |
|
1533 |
//we are doing this check only in skipSpace() because it is called by |
|
1534 |
//fMiscDispatcher and we want the parser to exit gracefully when document |
|
1535 |
//is well-formed. |
|
1536 |
||
1537 |
//it is possible that end of document is reached and |
|
1538 |
//fCurrentEntity becomes null |
|
1539 |
//nothing was read so entity changed 'false' should be returned. |
|
1540 |
if(fCurrentEntity == null){ |
|
1541 |
return true ; |
|
1542 |
} |
|
1543 |
||
1544 |
} |
|
1545 |
} while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); |
|
1546 |
if (DEBUG_BUFFER) { |
|
1547 |
System.out.print(")skipSpaces: "); |
|
1548 |
print(); |
|
1549 |
System.out.println(" -> true"); |
|
1550 |
} |
|
1551 |
return true; |
|
1552 |
} |
|
1553 |
||
1554 |
// no spaces were found |
|
1555 |
if (DEBUG_BUFFER) { |
|
1556 |
System.out.print(")skipSpaces: "); |
|
1557 |
print(); |
|
1558 |
System.out.println(" -> false"); |
|
1559 |
} |
|
1560 |
return false; |
|
1561 |
||
1562 |
} // skipSpaces():boolean |
|
1563 |
||
1564 |
||
1565 |
/** |
|
1566 |
* @param legnth This function checks that following number of characters are available. |
|
1567 |
* to the underlying buffer. |
|
1568 |
* @return This function returns true if capacity asked is available. |
|
1569 |
*/ |
|
1570 |
public boolean arrangeCapacity(int length) throws IOException{ |
|
1571 |
return arrangeCapacity(length, false); |
|
1572 |
} |
|
1573 |
||
1574 |
/** |
|
1575 |
* @param legnth This function checks that following number of characters are available. |
|
1576 |
* to the underlying buffer. |
|
1577 |
* @param if the underlying function should change the entity |
|
1578 |
* @return This function returns true if capacity asked is available. |
|
1579 |
* |
|
1580 |
*/ |
|
1581 |
public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{ |
|
1582 |
//check if the capacity is availble in the current buffer |
|
1583 |
//count is no. of characters in the buffer [x][m][l] |
|
1584 |
//position is '0' based |
|
1585 |
//System.out.println("fCurrent Entity " + fCurrentEntity); |
|
1586 |
if((fCurrentEntity.count - fCurrentEntity.position) >= length) { |
|
1587 |
return true; |
|
1588 |
} |
|
1589 |
if(DEBUG_SKIP_STRING){ |
|
1590 |
System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); |
|
1591 |
System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); |
|
1592 |
System.out.println("length = " + length); |
|
1593 |
} |
|
1594 |
boolean entityChanged = false; |
|
1595 |
//load more characters -- this function shouldn't change the entity |
|
1596 |
while((fCurrentEntity.count - fCurrentEntity.position) < length){ |
|
1597 |
if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){ |
|
1598 |
invokeListeners(0); |
|
1599 |
System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position); |
|
1600 |
fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position; |
|
1601 |
fCurrentEntity.position = 0; |
|
1602 |
} |
|
1603 |
||
1604 |
if((fCurrentEntity.count - fCurrentEntity.position) < length){ |
|
1605 |
int pos = fCurrentEntity.position; |
|
1606 |
invokeListeners(pos); |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1607 |
entityChanged = load(fCurrentEntity.count, changeEntity, false); |
12005 | 1608 |
fCurrentEntity.position = pos; |
1609 |
if(entityChanged)break; |
|
1610 |
} |
|
1611 |
if(DEBUG_SKIP_STRING){ |
|
1612 |
System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); |
|
1613 |
System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); |
|
1614 |
System.out.println("length = " + length); |
|
1615 |
} |
|
1616 |
} |
|
1617 |
//load changes the position.. set it back to the point where we started. |
|
1618 |
||
1619 |
//after loading check again. |
|
1620 |
if((fCurrentEntity.count - fCurrentEntity.position) >= length) { |
|
1621 |
return true; |
|
1622 |
} else { |
|
1623 |
return false; |
|
1624 |
} |
|
1625 |
} |
|
1626 |
||
1627 |
/** |
|
1628 |
* Skips the specified string appearing immediately on the input. |
|
1629 |
* <p> |
|
1630 |
* <strong>Note:</strong> The characters are consumed only if all |
|
1631 |
* the characters are skipped. |
|
1632 |
* |
|
1633 |
* @param s The string to skip. |
|
1634 |
* |
|
1635 |
* @return Returns true if the string was skipped. |
|
1636 |
* |
|
1637 |
* @throws IOException Thrown if i/o error occurs. |
|
1638 |
* @throws EOFException Thrown on end of file. |
|
1639 |
*/ |
|
1640 |
public boolean skipString(String s) throws IOException { |
|
1641 |
||
1642 |
final int length = s.length(); |
|
1643 |
||
1644 |
//first make sure that required capacity is avaible |
|
1645 |
if(arrangeCapacity(length, false)){ |
|
1646 |
final int beforeSkip = fCurrentEntity.position ; |
|
1647 |
int afterSkip = fCurrentEntity.position + length - 1 ; |
|
1648 |
if(DEBUG_SKIP_STRING){ |
|
1649 |
System.out.println("skipString,length = " + s + "," + length); |
|
1650 |
System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip, length)); |
|
1651 |
} |
|
1652 |
||
1653 |
//s.charAt() indexes are 0 to 'Length -1' based. |
|
1654 |
int i = length - 1 ; |
|
1655 |
//check from reverse |
|
1656 |
while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){ |
|
1657 |
if(afterSkip-- == beforeSkip){ |
|
1658 |
fCurrentEntity.position = fCurrentEntity.position + length ; |
|
1659 |
fCurrentEntity.columnNumber += length; |
|
1660 |
return true; |
|
1661 |
} |
|
1662 |
} |
|
1663 |
} |
|
1664 |
||
1665 |
return false; |
|
1666 |
} // skipString(String):boolean |
|
1667 |
||
1668 |
public boolean skipString(char [] s) throws IOException { |
|
1669 |
||
1670 |
final int length = s.length; |
|
1671 |
//first make sure that required capacity is avaible |
|
1672 |
if(arrangeCapacity(length, false)){ |
|
1673 |
int beforeSkip = fCurrentEntity.position ; |
|
1674 |
int afterSkip = fCurrentEntity.position + length ; |
|
1675 |
||
1676 |
if(DEBUG_SKIP_STRING){ |
|
1677 |
System.out.println("skipString,length = " + new String(s) + "," + length); |
|
1678 |
System.out.println("skipString,length = " + new String(s) + "," + length); |
|
1679 |
} |
|
1680 |
||
1681 |
for(int i=0;i<length;i++){ |
|
1682 |
if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){ |
|
1683 |
return false; |
|
1684 |
} |
|
1685 |
} |
|
1686 |
fCurrentEntity.position = fCurrentEntity.position + length ; |
|
1687 |
fCurrentEntity.columnNumber += length; |
|
1688 |
return true; |
|
1689 |
||
1690 |
} |
|
1691 |
||
1692 |
return false; |
|
1693 |
} |
|
1694 |
||
1695 |
// |
|
1696 |
// Locator methods |
|
1697 |
// |
|
1698 |
// |
|
1699 |
// Private methods |
|
1700 |
// |
|
1701 |
||
1702 |
/** |
|
1703 |
* Loads a chunk of text. |
|
1704 |
* |
|
1705 |
* @param offset The offset into the character buffer to |
|
1706 |
* read the next batch of characters. |
|
1707 |
* @param changeEntity True if the load should change entities |
|
1708 |
* at the end of the entity, otherwise leave |
|
1709 |
* the current entity in place and the entity |
|
1710 |
* boundary will be signaled by the return |
|
1711 |
* value. |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1712 |
* @param notify Determine whether to notify listeners of |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1713 |
* the event |
12005 | 1714 |
* |
1715 |
* @returns Returns true if the entity changed as a result of this |
|
1716 |
* load operation. |
|
1717 |
*/ |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1718 |
final boolean load(int offset, boolean changeEntity, boolean notify) |
12005 | 1719 |
throws IOException { |
1720 |
if (DEBUG_BUFFER) { |
|
1721 |
System.out.print("(load, "+offset+": "); |
|
1722 |
print(); |
|
1723 |
System.out.println(); |
|
1724 |
} |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1725 |
if (notify) { |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1726 |
invokeListeners(offset); |
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1727 |
} |
12005 | 1728 |
//maintaing the count till last load |
1729 |
fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ; |
|
1730 |
// read characters |
|
1731 |
int length = fCurrentEntity.ch.length - offset; |
|
1732 |
if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) { |
|
1733 |
length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE; |
|
1734 |
} |
|
1735 |
if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length); |
|
1736 |
int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length); |
|
1737 |
if (DEBUG_BUFFER) System.out.println(" length actually read: "+count); |
|
1738 |
||
1739 |
// reset count and position |
|
1740 |
boolean entityChanged = false; |
|
1741 |
if (count != -1) { |
|
1742 |
if (count != 0) { |
|
1743 |
// record the last count |
|
1744 |
fCurrentEntity.fLastCount = count; |
|
1745 |
fCurrentEntity.count = count + offset; |
|
1746 |
fCurrentEntity.position = offset; |
|
1747 |
} |
|
1748 |
} |
|
1749 |
// end of this entity |
|
1750 |
else { |
|
1751 |
fCurrentEntity.count = offset; |
|
1752 |
fCurrentEntity.position = offset; |
|
1753 |
entityChanged = true; |
|
1754 |
||
1755 |
if (changeEntity) { |
|
1756 |
//notify the entity manager about the end of entity |
|
1757 |
fEntityManager.endEntity(); |
|
1758 |
//return if the current entity becomes null |
|
1759 |
if(fCurrentEntity == null){ |
|
1760 |
throw END_OF_DOCUMENT_ENTITY; |
|
1761 |
} |
|
1762 |
// handle the trailing edges |
|
1763 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
1764 |
load(0, true, false); |
12005 | 1765 |
} |
1766 |
} |
|
1767 |
||
1768 |
} |
|
1769 |
if (DEBUG_BUFFER) { |
|
1770 |
System.out.print(")load, "+offset+": "); |
|
1771 |
print(); |
|
1772 |
System.out.println(); |
|
1773 |
} |
|
1774 |
||
1775 |
return entityChanged; |
|
1776 |
||
1777 |
} // load(int, boolean):boolean |
|
1778 |
||
1779 |
/** |
|
1780 |
* Creates a reader capable of reading the given input stream in |
|
1781 |
* the specified encoding. |
|
1782 |
* |
|
1783 |
* @param inputStream The input stream. |
|
1784 |
* @param encoding The encoding name that the input stream is |
|
1785 |
* encoded using. If the user has specified that |
|
1786 |
* Java encoding names are allowed, then the |
|
1787 |
* encoding name may be a Java encoding name; |
|
1788 |
* otherwise, it is an ianaEncoding name. |
|
1789 |
* @param isBigEndian For encodings (like uCS-4), whose names cannot |
|
1790 |
* specify a byte order, this tells whether the order is bigEndian. null menas |
|
1791 |
* unknown or not relevant. |
|
1792 |
* |
|
1793 |
* @return Returns a reader. |
|
1794 |
*/ |
|
1795 |
protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) |
|
1796 |
throws IOException { |
|
1797 |
||
1798 |
// normalize encoding name |
|
1799 |
if (encoding == null) { |
|
1800 |
encoding = "UTF-8"; |
|
1801 |
} |
|
1802 |
||
1803 |
// try to use an optimized reader |
|
1804 |
String ENCODING = encoding.toUpperCase(Locale.ENGLISH); |
|
1805 |
if (ENCODING.equals("UTF-8")) { |
|
1806 |
if (DEBUG_ENCODINGS) { |
|
1807 |
System.out.println("$$$ creating UTF8Reader"); |
|
1808 |
} |
|
1809 |
return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); |
|
1810 |
} |
|
1811 |
if (ENCODING.equals("US-ASCII")) { |
|
1812 |
if (DEBUG_ENCODINGS) { |
|
1813 |
System.out.println("$$$ creating ASCIIReader"); |
|
1814 |
} |
|
1815 |
return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); |
|
1816 |
} |
|
1817 |
if(ENCODING.equals("ISO-10646-UCS-4")) { |
|
1818 |
if(isBigEndian != null) { |
|
1819 |
boolean isBE = isBigEndian.booleanValue(); |
|
1820 |
if(isBE) { |
|
1821 |
return new UCSReader(inputStream, UCSReader.UCS4BE); |
|
1822 |
} else { |
|
1823 |
return new UCSReader(inputStream, UCSReader.UCS4LE); |
|
1824 |
} |
|
1825 |
} else { |
|
1826 |
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, |
|
1827 |
"EncodingByteOrderUnsupported", |
|
1828 |
new Object[] { encoding }, |
|
1829 |
XMLErrorReporter.SEVERITY_FATAL_ERROR); |
|
1830 |
} |
|
1831 |
} |
|
1832 |
if(ENCODING.equals("ISO-10646-UCS-2")) { |
|
1833 |
if(isBigEndian != null) { // sould never happen with this encoding... |
|
1834 |
boolean isBE = isBigEndian.booleanValue(); |
|
1835 |
if(isBE) { |
|
1836 |
return new UCSReader(inputStream, UCSReader.UCS2BE); |
|
1837 |
} else { |
|
1838 |
return new UCSReader(inputStream, UCSReader.UCS2LE); |
|
1839 |
} |
|
1840 |
} else { |
|
1841 |
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, |
|
1842 |
"EncodingByteOrderUnsupported", |
|
1843 |
new Object[] { encoding }, |
|
1844 |
XMLErrorReporter.SEVERITY_FATAL_ERROR); |
|
1845 |
} |
|
1846 |
} |
|
1847 |
||
1848 |
// check for valid name |
|
1849 |
boolean validIANA = XMLChar.isValidIANAEncoding(encoding); |
|
1850 |
boolean validJava = XMLChar.isValidJavaEncoding(encoding); |
|
1851 |
if (!validIANA || (fAllowJavaEncodings && !validJava)) { |
|
1852 |
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, |
|
1853 |
"EncodingDeclInvalid", |
|
1854 |
new Object[] { encoding }, |
|
1855 |
XMLErrorReporter.SEVERITY_FATAL_ERROR); |
|
1856 |
// NOTE: AndyH suggested that, on failure, we use ISO Latin 1 |
|
1857 |
// because every byte is a valid ISO Latin 1 character. |
|
1858 |
// It may not translate correctly but if we failed on |
|
1859 |
// the encoding anyway, then we're expecting the content |
|
1860 |
// of the document to be bad. This will just prevent an |
|
1861 |
// invalid UTF-8 sequence to be detected. This is only |
|
1862 |
// important when continue-after-fatal-error is turned |
|
1863 |
// on. -Ac |
|
1864 |
encoding = "ISO-8859-1"; |
|
1865 |
} |
|
1866 |
||
1867 |
// try to use a Java reader |
|
1868 |
String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); |
|
1869 |
if (javaEncoding == null) { |
|
1870 |
if(fAllowJavaEncodings) { |
|
1871 |
javaEncoding = encoding; |
|
1872 |
} else { |
|
1873 |
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, |
|
1874 |
"EncodingDeclInvalid", |
|
1875 |
new Object[] { encoding }, |
|
1876 |
XMLErrorReporter.SEVERITY_FATAL_ERROR); |
|
1877 |
// see comment above. |
|
1878 |
javaEncoding = "ISO8859_1"; |
|
1879 |
} |
|
1880 |
} |
|
1881 |
else if (javaEncoding.equals("ASCII")) { |
|
1882 |
if (DEBUG_ENCODINGS) { |
|
1883 |
System.out.println("$$$ creating ASCIIReader"); |
|
1884 |
} |
|
1885 |
return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); |
|
1886 |
} |
|
1887 |
||
1888 |
if (DEBUG_ENCODINGS) { |
|
1889 |
System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); |
|
1890 |
if (javaEncoding == encoding) { |
|
1891 |
System.out.print(" (IANA encoding)"); |
|
1892 |
} |
|
1893 |
System.out.println(); |
|
1894 |
} |
|
1895 |
return new InputStreamReader(inputStream, javaEncoding); |
|
1896 |
||
1897 |
} // createReader(InputStream,String, Boolean): Reader |
|
1898 |
||
1899 |
/** |
|
1900 |
* Returns the IANA encoding name that is auto-detected from |
|
1901 |
* the bytes specified, with the endian-ness of that encoding where appropriate. |
|
1902 |
* |
|
1903 |
* @param b4 The first four bytes of the input. |
|
1904 |
* @param count The number of bytes actually read. |
|
1905 |
* @return a 2-element array: the first element, an IANA-encoding string, |
|
1906 |
* the second element a Boolean which is true iff the document is big endian, false |
|
1907 |
* if it's little-endian, and null if the distinction isn't relevant. |
|
1908 |
*/ |
|
1909 |
protected Object[] getEncodingName(byte[] b4, int count) { |
|
1910 |
||
1911 |
if (count < 2) { |
|
1912 |
return new Object[]{"UTF-8", null}; |
|
1913 |
} |
|
1914 |
||
1915 |
// UTF-16, with BOM |
|
1916 |
int b0 = b4[0] & 0xFF; |
|
1917 |
int b1 = b4[1] & 0xFF; |
|
1918 |
if (b0 == 0xFE && b1 == 0xFF) { |
|
1919 |
// UTF-16, big-endian |
|
1920 |
return new Object [] {"UTF-16BE", new Boolean(true)}; |
|
1921 |
} |
|
1922 |
if (b0 == 0xFF && b1 == 0xFE) { |
|
1923 |
// UTF-16, little-endian |
|
1924 |
return new Object [] {"UTF-16LE", new Boolean(false)}; |
|
1925 |
} |
|
1926 |
||
1927 |
// default to UTF-8 if we don't have enough bytes to make a |
|
1928 |
// good determination of the encoding |
|
1929 |
if (count < 3) { |
|
1930 |
return new Object [] {"UTF-8", null}; |
|
1931 |
} |
|
1932 |
||
1933 |
// UTF-8 with a BOM |
|
1934 |
int b2 = b4[2] & 0xFF; |
|
1935 |
if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { |
|
1936 |
return new Object [] {"UTF-8", null}; |
|
1937 |
} |
|
1938 |
||
1939 |
// default to UTF-8 if we don't have enough bytes to make a |
|
1940 |
// good determination of the encoding |
|
1941 |
if (count < 4) { |
|
1942 |
return new Object [] {"UTF-8", null}; |
|
1943 |
} |
|
1944 |
||
1945 |
// other encodings |
|
1946 |
int b3 = b4[3] & 0xFF; |
|
1947 |
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { |
|
1948 |
// UCS-4, big endian (1234) |
|
1949 |
return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; |
|
1950 |
} |
|
1951 |
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { |
|
1952 |
// UCS-4, little endian (4321) |
|
1953 |
return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; |
|
1954 |
} |
|
1955 |
if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { |
|
1956 |
// UCS-4, unusual octet order (2143) |
|
1957 |
// REVISIT: What should this be? |
|
1958 |
return new Object [] {"ISO-10646-UCS-4", null}; |
|
1959 |
} |
|
1960 |
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { |
|
1961 |
// UCS-4, unusual octect order (3412) |
|
1962 |
// REVISIT: What should this be? |
|
1963 |
return new Object [] {"ISO-10646-UCS-4", null}; |
|
1964 |
} |
|
1965 |
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { |
|
1966 |
// UTF-16, big-endian, no BOM |
|
1967 |
// (or could turn out to be UCS-2... |
|
1968 |
// REVISIT: What should this be? |
|
1969 |
return new Object [] {"UTF-16BE", new Boolean(true)}; |
|
1970 |
} |
|
1971 |
if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { |
|
1972 |
// UTF-16, little-endian, no BOM |
|
1973 |
// (or could turn out to be UCS-2... |
|
1974 |
return new Object [] {"UTF-16LE", new Boolean(false)}; |
|
1975 |
} |
|
1976 |
if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { |
|
1977 |
// EBCDIC |
|
1978 |
// a la xerces1, return CP037 instead of EBCDIC here |
|
1979 |
return new Object [] {"CP037", null}; |
|
1980 |
} |
|
1981 |
||
1982 |
// default encoding |
|
1983 |
return new Object [] {"UTF-8", null}; |
|
1984 |
||
1985 |
} // getEncodingName(byte[],int):Object[] |
|
1986 |
||
1987 |
/** |
|
1988 |
* xxx not removing endEntity() so that i remember that we need to implement it. |
|
1989 |
* Ends an entity. |
|
1990 |
* |
|
1991 |
* @throws XNIException Thrown by entity handler to signal an error. |
|
1992 |
*/ |
|
1993 |
// |
|
1994 |
/** Prints the contents of the buffer. */ |
|
1995 |
final void print() { |
|
1996 |
if (DEBUG_BUFFER) { |
|
1997 |
if (fCurrentEntity != null) { |
|
1998 |
System.out.print('['); |
|
1999 |
System.out.print(fCurrentEntity.count); |
|
2000 |
System.out.print(' '); |
|
2001 |
System.out.print(fCurrentEntity.position); |
|
2002 |
if (fCurrentEntity.count > 0) { |
|
2003 |
System.out.print(" \""); |
|
2004 |
for (int i = 0; i < fCurrentEntity.count; i++) { |
|
2005 |
if (i == fCurrentEntity.position) { |
|
2006 |
System.out.print('^'); |
|
2007 |
} |
|
2008 |
char c = fCurrentEntity.ch[i]; |
|
2009 |
switch (c) { |
|
2010 |
case '\n': { |
|
2011 |
System.out.print("\\n"); |
|
2012 |
break; |
|
2013 |
} |
|
2014 |
case '\r': { |
|
2015 |
System.out.print("\\r"); |
|
2016 |
break; |
|
2017 |
} |
|
2018 |
case '\t': { |
|
2019 |
System.out.print("\\t"); |
|
2020 |
break; |
|
2021 |
} |
|
2022 |
case '\\': { |
|
2023 |
System.out.print("\\\\"); |
|
2024 |
break; |
|
2025 |
} |
|
2026 |
default: { |
|
2027 |
System.out.print(c); |
|
2028 |
} |
|
2029 |
} |
|
2030 |
} |
|
2031 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
2032 |
System.out.print('^'); |
|
2033 |
} |
|
2034 |
System.out.print('"'); |
|
2035 |
} |
|
2036 |
System.out.print(']'); |
|
2037 |
System.out.print(" @ "); |
|
2038 |
System.out.print(fCurrentEntity.lineNumber); |
|
2039 |
System.out.print(','); |
|
2040 |
System.out.print(fCurrentEntity.columnNumber); |
|
2041 |
} else { |
|
2042 |
System.out.print("*NO CURRENT ENTITY*"); |
|
2043 |
} |
|
2044 |
} |
|
2045 |
} |
|
2046 |
||
2047 |
/** |
|
2048 |
* Registers the listener object and provides callback. |
|
2049 |
* @param listener listener to which call back should be provided when scanner buffer |
|
2050 |
* is being changed. |
|
2051 |
*/ |
|
2052 |
public void registerListener(XMLBufferListener listener) { |
|
2053 |
if(!listeners.contains(listener)) |
|
2054 |
listeners.add(listener); |
|
2055 |
} |
|
2056 |
||
2057 |
/** |
|
2058 |
* |
|
2059 |
* @param loadPos Starting position from which new data is being loaded into scanner buffer. |
|
2060 |
*/ |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
2061 |
public void invokeListeners(int loadPos){ |
12005 | 2062 |
for(int i=0;i<listeners.size();i++){ |
2063 |
XMLBufferListener listener =(XMLBufferListener) listeners.get(i); |
|
2064 |
listener.refresh(loadPos); |
|
2065 |
} |
|
2066 |
} |
|
2067 |
||
2068 |
/** |
|
2069 |
* Skips space characters appearing immediately on the input that would |
|
2070 |
* match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line |
|
2071 |
* normalization is performed. This is useful when scanning structures |
|
2072 |
* such as the XMLDecl and TextDecl that can only contain US-ASCII |
|
2073 |
* characters. |
|
2074 |
* <p> |
|
2075 |
* <strong>Note:</strong> The characters are consumed only if they would |
|
2076 |
* match non-terminal S before end of line normalization is performed. |
|
2077 |
* |
|
2078 |
* @return Returns true if at least one space character was skipped. |
|
2079 |
* |
|
2080 |
* @throws IOException Thrown if i/o error occurs. |
|
2081 |
* @throws EOFException Thrown on end of file. |
|
2082 |
* |
|
2083 |
* @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace |
|
2084 |
*/ |
|
2085 |
public final boolean skipDeclSpaces() throws IOException { |
|
2086 |
if (DEBUG_BUFFER) { |
|
2087 |
System.out.print("(skipDeclSpaces: "); |
|
2088 |
//XMLEntityManager.print(fCurrentEntity); |
|
2089 |
System.out.println(); |
|
2090 |
} |
|
2091 |
||
2092 |
// load more characters, if needed |
|
2093 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
2094 |
load(0, true, false); |
12005 | 2095 |
} |
2096 |
||
2097 |
// skip spaces |
|
2098 |
int c = fCurrentEntity.ch[fCurrentEntity.position]; |
|
2099 |
if (XMLChar.isSpace(c)) { |
|
2100 |
boolean external = fCurrentEntity.isExternal(); |
|
2101 |
do { |
|
2102 |
boolean entityChanged = false; |
|
2103 |
// handle newlines |
|
2104 |
if (c == '\n' || (external && c == '\r')) { |
|
2105 |
fCurrentEntity.lineNumber++; |
|
2106 |
fCurrentEntity.columnNumber = 1; |
|
2107 |
if (fCurrentEntity.position == fCurrentEntity.count - 1) { |
|
2108 |
fCurrentEntity.ch[0] = (char)c; |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
2109 |
entityChanged = load(1, true, false); |
12005 | 2110 |
if (!entityChanged) |
2111 |
// the load change the position to be 1, |
|
2112 |
// need to restore it when entity not changed |
|
2113 |
fCurrentEntity.position = 0; |
|
2114 |
} |
|
2115 |
if (c == '\r' && external) { |
|
2116 |
// REVISIT: Does this need to be updated to fix the |
|
2117 |
// #x0D ^#x0A newline normalization problem? -Ac |
|
2118 |
if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { |
|
2119 |
fCurrentEntity.position--; |
|
2120 |
} |
|
2121 |
} |
|
2122 |
/*** NEWLINE NORMALIZATION *** |
|
2123 |
* else { |
|
2124 |
* if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r' |
|
2125 |
* && external) { |
|
2126 |
* fCurrentEntity.position++; |
|
2127 |
* } |
|
2128 |
* } |
|
2129 |
* /***/ |
|
2130 |
} else { |
|
2131 |
fCurrentEntity.columnNumber++; |
|
2132 |
} |
|
2133 |
// load more characters, if needed |
|
2134 |
if (!entityChanged) |
|
2135 |
fCurrentEntity.position++; |
|
2136 |
if (fCurrentEntity.position == fCurrentEntity.count) { |
|
22140
f2634f2bc36c
8027359: XML parser returns incorrect parsing results
joehw
parents:
22138
diff
changeset
|
2137 |
load(0, true, false); |
12005 | 2138 |
} |
2139 |
} while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); |
|
2140 |
if (DEBUG_BUFFER) { |
|
2141 |
System.out.print(")skipDeclSpaces: "); |
|
2142 |
// XMLEntityManager.print(fCurrentEntity); |
|
2143 |
System.out.println(" -> true"); |
|
2144 |
} |
|
2145 |
return true; |
|
2146 |
} |
|
2147 |
||
2148 |
// no spaces were found |
|
2149 |
if (DEBUG_BUFFER) { |
|
2150 |
System.out.print(")skipDeclSpaces: "); |
|
2151 |
//XMLEntityManager.print(fCurrentEntity); |
|
2152 |
System.out.println(" -> false"); |
|
2153 |
} |
|
2154 |
return false; |
|
2155 |
||
2156 |
} // skipDeclSpaces():boolean |
|
2157 |
||
2158 |
||
2159 |
} // class XMLEntityScanner |