author | joehw |
Wed, 18 Oct 2017 13:25:49 -0700 | |
changeset 47359 | e1a6c0168741 |
parent 47216 | 71c04702a3d5 |
child 47477 | 115ed64c7822 |
permissions | -rw-r--r-- |
6 | 1 |
/* |
47359
e1a6c0168741
8181150: Fix lint warnings in JAXP repo: rawtypes and unchecked
joehw
parents:
47216
diff
changeset
|
2 |
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. |
e1a6c0168741
8181150: Fix lint warnings in JAXP repo: rawtypes and unchecked
joehw
parents:
47216
diff
changeset
|
3 |
* @LastModified: Oct 2017 |
6 | 4 |
*/ |
5 |
/* |
|
44797
8b3b3b911b8a
8162572: Update License Header for all JAXP sources
joehw
parents:
25868
diff
changeset
|
6 |
* Licensed to the Apache Software Foundation (ASF) under one or more |
8b3b3b911b8a
8162572: Update License Header for all JAXP sources
joehw
parents:
25868
diff
changeset
|
7 |
* contributor license agreements. See the NOTICE file distributed with |
8b3b3b911b8a
8162572: Update License Header for all JAXP sources
joehw
parents:
25868
diff
changeset
|
8 |
* this work for additional information regarding copyright ownership. |
8b3b3b911b8a
8162572: Update License Header for all JAXP sources
joehw
parents:
25868
diff
changeset
|
9 |
* The ASF licenses this file to You under the Apache License, Version 2.0 |
8b3b3b911b8a
8162572: Update License Header for all JAXP sources
joehw
parents:
25868
diff
changeset
|
10 |
* (the "License"); you may not use this file except in compliance with |
8b3b3b911b8a
8162572: Update License Header for all JAXP sources
joehw
parents:
25868
diff
changeset
|
11 |
* the License. You may obtain a copy of the License at |
6 | 12 |
* |
44797
8b3b3b911b8a
8162572: Update License Header for all JAXP sources
joehw
parents:
25868
diff
changeset
|
13 |
* http://www.apache.org/licenses/LICENSE-2.0 |
6 | 14 |
* |
15 |
* Unless required by applicable law or agreed to in writing, software |
|
16 |
* distributed under the License is distributed on an "AS IS" BASIS, |
|
17 |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
18 |
* See the License for the specific language governing permissions and |
|
19 |
* limitations under the License. |
|
20 |
*/ |
|
44797
8b3b3b911b8a
8162572: Update License Header for all JAXP sources
joehw
parents:
25868
diff
changeset
|
21 |
|
6 | 22 |
package com.sun.org.apache.xml.internal.dtm.ref.dom2dtm; |
23 |
||
24 |
import com.sun.org.apache.xml.internal.dtm.DTM; |
|
25 |
import com.sun.org.apache.xml.internal.dtm.DTMManager; |
|
26 |
import com.sun.org.apache.xml.internal.dtm.DTMWSFilter; |
|
27 |
import com.sun.org.apache.xml.internal.dtm.ref.DTMDefaultBaseIterators; |
|
28 |
import com.sun.org.apache.xml.internal.dtm.ref.DTMManagerDefault; |
|
29 |
import com.sun.org.apache.xml.internal.dtm.ref.ExpandedNameTable; |
|
30 |
import com.sun.org.apache.xml.internal.dtm.ref.IncrementalSAXSource; |
|
31 |
import com.sun.org.apache.xml.internal.res.XMLErrorResources; |
|
32 |
import com.sun.org.apache.xml.internal.res.XMLMessages; |
|
33 |
import com.sun.org.apache.xml.internal.utils.FastStringBuffer; |
|
34 |
import com.sun.org.apache.xml.internal.utils.QName; |
|
35 |
import com.sun.org.apache.xml.internal.utils.StringBufferPool; |
|
36 |
import com.sun.org.apache.xml.internal.utils.TreeWalker; |
|
37 |
import com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer; |
|
38 |
import com.sun.org.apache.xml.internal.utils.XMLString; |
|
39 |
import com.sun.org.apache.xml.internal.utils.XMLStringFactory; |
|
47359
e1a6c0168741
8181150: Fix lint warnings in JAXP repo: rawtypes and unchecked
joehw
parents:
47216
diff
changeset
|
40 |
import java.util.ArrayList; |
e1a6c0168741
8181150: Fix lint warnings in JAXP repo: rawtypes and unchecked
joehw
parents:
47216
diff
changeset
|
41 |
import java.util.List; |
e1a6c0168741
8181150: Fix lint warnings in JAXP repo: rawtypes and unchecked
joehw
parents:
47216
diff
changeset
|
42 |
import javax.xml.transform.SourceLocator; |
e1a6c0168741
8181150: Fix lint warnings in JAXP repo: rawtypes and unchecked
joehw
parents:
47216
diff
changeset
|
43 |
import javax.xml.transform.dom.DOMSource; |
6 | 44 |
import org.w3c.dom.Attr; |
45 |
import org.w3c.dom.Document; |
|
46 |
import org.w3c.dom.DocumentType; |
|
47 |
import org.w3c.dom.Element; |
|
48 |
import org.w3c.dom.Entity; |
|
49 |
import org.w3c.dom.NamedNodeMap; |
|
50 |
import org.w3c.dom.Node; |
|
51 |
import org.xml.sax.ContentHandler; |
|
52 |
||
53 |
/** The <code>DOM2DTM</code> class serves up a DOM's contents via the |
|
54 |
* DTM API. |
|
55 |
* |
|
56 |
* Note that it doesn't necessarily represent a full Document |
|
57 |
* tree. You can wrap a DOM2DTM around a specific node and its subtree |
|
58 |
* and the right things should happen. (I don't _think_ we currently |
|
59 |
* support DocumentFrgment nodes as roots, though that might be worth |
|
60 |
* considering.) |
|
61 |
* |
|
62 |
* Note too that we do not currently attempt to track document |
|
63 |
* mutation. If you alter the DOM after wrapping DOM2DTM around it, |
|
64 |
* all bets are off. |
|
65 |
* */ |
|
66 |
public class DOM2DTM extends DTMDefaultBaseIterators |
|
67 |
{ |
|
68 |
static final boolean JJK_DEBUG=false; |
|
69 |
static final boolean JJK_NEWCODE=true; |
|
70 |
||
71 |
/** Manefest constant |
|
72 |
*/ |
|
73 |
static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace"; |
|
74 |
||
75 |
/** The current position in the DOM tree. Last node examined for |
|
76 |
* possible copying to DTM. */ |
|
77 |
transient private Node m_pos; |
|
78 |
/** The current position in the DTM tree. Who children get appended to. */ |
|
79 |
private int m_last_parent=0; |
|
80 |
/** The current position in the DTM tree. Who children reference as their |
|
81 |
* previous sib. */ |
|
82 |
private int m_last_kid=NULL; |
|
83 |
||
84 |
/** The top of the subtree. |
|
85 |
* %REVIEW%: 'may not be the same as m_context if "//foo" pattern.' |
|
86 |
* */ |
|
87 |
transient private Node m_root; |
|
88 |
||
89 |
/** True iff the first element has been processed. This is used to control |
|
90 |
synthesis of the implied xml: namespace declaration node. */ |
|
91 |
boolean m_processedFirstElement=false; |
|
92 |
||
93 |
/** true if ALL the nodes in the m_root subtree have been processed; |
|
94 |
* false if our incremental build has not yet finished scanning the |
|
95 |
* DOM tree. */ |
|
96 |
transient private boolean m_nodesAreProcessed; |
|
97 |
||
98 |
/** The node objects. The instance part of the handle indexes |
|
99 |
* directly into this vector. Each DTM node may actually be |
|
100 |
* composed of several DOM nodes (for example, if logically-adjacent |
|
101 |
* Text/CDATASection nodes in the DOM have been coalesced into a |
|
102 |
* single DTM Text node); this table points only to the first in |
|
103 |
* that sequence. */ |
|
47359
e1a6c0168741
8181150: Fix lint warnings in JAXP repo: rawtypes and unchecked
joehw
parents:
47216
diff
changeset
|
104 |
protected List<Node> m_nodes = new ArrayList<>(); |
6 | 105 |
|
106 |
/** |
|
107 |
* Construct a DOM2DTM object from a DOM node. |
|
108 |
* |
|
109 |
* @param mgr The DTMManager who owns this DTM. |
|
110 |
* @param domSource the DOM source that this DTM will wrap. |
|
111 |
* @param dtmIdentity The DTM identity ID for this DTM. |
|
112 |
* @param whiteSpaceFilter The white space filter for this DTM, which may |
|
113 |
* be null. |
|
114 |
* @param xstringfactory XMLString factory for creating character content. |
|
115 |
* @param doIndexing true if the caller considers it worth it to use |
|
116 |
* indexing schemes. |
|
117 |
*/ |
|
118 |
public DOM2DTM(DTMManager mgr, DOMSource domSource, |
|
119 |
int dtmIdentity, DTMWSFilter whiteSpaceFilter, |
|
120 |
XMLStringFactory xstringfactory, |
|
121 |
boolean doIndexing) |
|
122 |
{ |
|
123 |
super(mgr, domSource, dtmIdentity, whiteSpaceFilter, |
|
124 |
xstringfactory, doIndexing); |
|
125 |
||
126 |
// Initialize DOM navigation |
|
127 |
m_pos=m_root = domSource.getNode(); |
|
128 |
// Initialize DTM navigation |
|
129 |
m_last_parent=m_last_kid=NULL; |
|
130 |
m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL); |
|
131 |
||
132 |
// Apparently the domSource root may not actually be the |
|
133 |
// Document node. If it's an Element node, we need to immediately |
|
134 |
// add its attributes. Adapted from nextNode(). |
|
135 |
// %REVIEW% Move this logic into addNode and recurse? Cleaner! |
|
136 |
// |
|
137 |
// (If it's an EntityReference node, we're probably scrod. For now |
|
138 |
// I'm just hoping nobody is ever quite that foolish... %REVIEW%) |
|
139 |
// |
|
140 |
// %ISSUE% What about inherited namespaces in this case? |
|
141 |
// Do we need to special-case initialize them into the DTM model? |
|
142 |
if(ELEMENT_NODE == m_root.getNodeType()) |
|
143 |
{ |
|
144 |
NamedNodeMap attrs=m_root.getAttributes(); |
|
145 |
int attrsize=(attrs==null) ? 0 : attrs.getLength(); |
|
146 |
if(attrsize>0) |
|
147 |
{ |
|
148 |
int attrIndex=NULL; // start with no previous sib |
|
149 |
for(int i=0;i<attrsize;++i) |
|
150 |
{ |
|
151 |
// No need to force nodetype in this case; |
|
152 |
// addNode() will take care of switching it from |
|
153 |
// Attr to Namespace if necessary. |
|
154 |
attrIndex=addNode(attrs.item(i),0,attrIndex,NULL); |
|
155 |
m_firstch.setElementAt(DTM.NULL,attrIndex); |
|
156 |
} |
|
157 |
// Terminate list of attrs, and make sure they aren't |
|
158 |
// considered children of the element |
|
159 |
m_nextsib.setElementAt(DTM.NULL,attrIndex); |
|
160 |
||
161 |
// IMPORTANT: This does NOT change m_last_parent or m_last_kid! |
|
162 |
} // if attrs exist |
|
163 |
} //if(ELEMENT_NODE) |
|
164 |
||
165 |
// Initialize DTM-completed status |
|
166 |
m_nodesAreProcessed = false; |
|
167 |
} |
|
168 |
||
169 |
/** |
|
170 |
* Construct the node map from the node. |
|
171 |
* |
|
172 |
* @param node The node that is to be added to the DTM. |
|
173 |
* @param parentIndex The current parent index. |
|
174 |
* @param previousSibling The previous sibling index. |
|
175 |
* @param forceNodeType If not DTM.NULL, overrides the DOM node type. |
|
176 |
* Used to force nodes to Text rather than CDATASection when their |
|
177 |
* coalesced value includes ordinary Text nodes (current DTM behavior). |
|
178 |
* |
|
179 |
* @return The index identity of the node that was added. |
|
180 |
*/ |
|
181 |
protected int addNode(Node node, int parentIndex, |
|
182 |
int previousSibling, int forceNodeType) |
|
183 |
{ |
|
184 |
int nodeIndex = m_nodes.size(); |
|
185 |
||
186 |
// Have we overflowed a DTM Identity's addressing range? |
|
187 |
if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS)) |
|
188 |
{ |
|
189 |
try |
|
190 |
{ |
|
191 |
if(m_mgr==null) |
|
192 |
throw new ClassCastException(); |
|
193 |
||
194 |
// Handle as Extended Addressing |
|
195 |
DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr; |
|
196 |
int id=mgrD.getFirstFreeDTMID(); |
|
197 |
mgrD.addDTM(this,id,nodeIndex); |
|
198 |
m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS); |
|
199 |
} |
|
200 |
catch(ClassCastException e) |
|
201 |
{ |
|
202 |
// %REVIEW% Wrong error message, but I've been told we're trying |
|
203 |
// not to add messages right not for I18N reasons. |
|
204 |
// %REVIEW% Should this be a Fatal Error? |
|
205 |
error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available"; |
|
206 |
} |
|
207 |
} |
|
208 |
||
209 |
m_size++; |
|
210 |
// ensureSize(nodeIndex); |
|
211 |
||
212 |
int type; |
|
213 |
if(NULL==forceNodeType) |
|
214 |
type = node.getNodeType(); |
|
215 |
else |
|
216 |
type=forceNodeType; |
|
217 |
||
218 |
// %REVIEW% The Namespace Spec currently says that Namespaces are |
|
219 |
// processed in a non-namespace-aware manner, by matching the |
|
220 |
// QName, even though there is in fact a namespace assigned to |
|
221 |
// these nodes in the DOM. If and when that changes, we will have |
|
222 |
// to consider whether we check the namespace-for-namespaces |
|
223 |
// rather than the node name. |
|
224 |
// |
|
225 |
// %TBD% Note that the DOM does not necessarily explicitly declare |
|
226 |
// all the namespaces it uses. DOM Level 3 will introduce a |
|
227 |
// namespace-normalization operation which reconciles that, and we |
|
228 |
// can request that users invoke it or otherwise ensure that the |
|
229 |
// tree is namespace-well-formed before passing the DOM to Xalan. |
|
230 |
// But if they don't, what should we do about it? We probably |
|
231 |
// don't want to alter the source DOM (and may not be able to do |
|
232 |
// so if it's read-only). The best available answer might be to |
|
233 |
// synthesize additional DTM Namespace Nodes that don't correspond |
|
234 |
// to DOM Attr Nodes. |
|
235 |
if (Node.ATTRIBUTE_NODE == type) |
|
236 |
{ |
|
237 |
String name = node.getNodeName(); |
|
238 |
||
239 |
if (name.startsWith("xmlns:") || name.equals("xmlns")) |
|
240 |
{ |
|
241 |
type = DTM.NAMESPACE_NODE; |
|
242 |
} |
|
243 |
} |
|
244 |
||
47359
e1a6c0168741
8181150: Fix lint warnings in JAXP repo: rawtypes and unchecked
joehw
parents:
47216
diff
changeset
|
245 |
m_nodes.add(node); |
6 | 246 |
|
247 |
m_firstch.setElementAt(NOTPROCESSED,nodeIndex); |
|
248 |
m_nextsib.setElementAt(NOTPROCESSED,nodeIndex); |
|
249 |
m_prevsib.setElementAt(previousSibling,nodeIndex); |
|
250 |
m_parent.setElementAt(parentIndex,nodeIndex); |
|
251 |
||
252 |
if(DTM.NULL != parentIndex && |
|
253 |
type != DTM.ATTRIBUTE_NODE && |
|
254 |
type != DTM.NAMESPACE_NODE) |
|
255 |
{ |
|
256 |
// If the DTM parent had no children, this becomes its first child. |
|
257 |
if(NOTPROCESSED == m_firstch.elementAt(parentIndex)) |
|
258 |
m_firstch.setElementAt(nodeIndex,parentIndex); |
|
259 |
} |
|
260 |
||
261 |
String nsURI = node.getNamespaceURI(); |
|
262 |
||
263 |
// Deal with the difference between Namespace spec and XSLT |
|
264 |
// definitions of local name. (The former says PIs don't have |
|
265 |
// localnames; the latter says they do.) |
|
266 |
String localName = (type == Node.PROCESSING_INSTRUCTION_NODE) ? |
|
267 |
node.getNodeName() : |
|
268 |
node.getLocalName(); |
|
269 |
||
270 |
// Hack to make DOM1 sort of work... |
|
271 |
if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE)) |
|
272 |
&& null == localName) |
|
273 |
localName = node.getNodeName(); // -sb |
|
274 |
||
275 |
ExpandedNameTable exnt = m_expandedNameTable; |
|
276 |
||
277 |
// %TBD% Nodes created with the old non-namespace-aware DOM |
|
278 |
// calls createElement() and createAttribute() will never have a |
|
279 |
// localname. That will cause their expandedNameID to be just the |
|
280 |
// nodeType... which will keep them from being matched |
|
281 |
// successfully by name. Since the DOM makes no promise that |
|
282 |
// those will participate in namespace processing, this is |
|
283 |
// officially accepted as Not Our Fault. But it might be nice to |
|
284 |
// issue a diagnostic message! |
|
285 |
if(node.getLocalName()==null && |
|
286 |
(type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE)) |
|
287 |
{ |
|
288 |
// warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM."); |
|
289 |
} |
|
290 |
||
291 |
int expandedNameID = (null != localName) |
|
292 |
? exnt.getExpandedTypeID(nsURI, localName, type) : |
|
293 |
exnt.getExpandedTypeID(type); |
|
294 |
||
295 |
m_exptype.setElementAt(expandedNameID,nodeIndex); |
|
296 |
||
297 |
indexNode(expandedNameID, nodeIndex); |
|
298 |
||
299 |
if (DTM.NULL != previousSibling) |
|
300 |
m_nextsib.setElementAt(nodeIndex,previousSibling); |
|
301 |
||
302 |
// This should be done after m_exptype has been set, and probably should |
|
303 |
// always be the last thing we do |
|
304 |
if (type == DTM.NAMESPACE_NODE) |
|
305 |
declareNamespaceInContext(parentIndex,nodeIndex); |
|
306 |
||
307 |
return nodeIndex; |
|
308 |
} |
|
309 |
||
310 |
/** |
|
311 |
* Get the number of nodes that have been added. |
|
312 |
*/ |
|
313 |
public int getNumberOfNodes() |
|
314 |
{ |
|
315 |
return m_nodes.size(); |
|
316 |
} |
|
317 |
||
318 |
/** |
|
319 |
* This method iterates to the next node that will be added to the table. |
|
320 |
* Each call to this method adds a new node to the table, unless the end |
|
321 |
* is reached, in which case it returns null. |
|
322 |
* |
|
323 |
* @return The true if a next node is found or false if |
|
324 |
* there are no more nodes. |
|
325 |
*/ |
|
326 |
protected boolean nextNode() |
|
327 |
{ |
|
328 |
// Non-recursive one-fetch-at-a-time depth-first traversal with |
|
329 |
// attribute/namespace nodes and white-space stripping. |
|
330 |
// Navigating the DOM is simple, navigating the DTM is simple; |
|
331 |
// keeping track of both at once is a trifle baroque but at least |
|
332 |
// we've avoided most of the special cases. |
|
333 |
if (m_nodesAreProcessed) |
|
334 |
return false; |
|
335 |
||
336 |
// %REVIEW% Is this local copy Really Useful from a performance |
|
337 |
// point of view? Or is this a false microoptimization? |
|
338 |
Node pos=m_pos; |
|
339 |
Node next=null; |
|
340 |
int nexttype=NULL; |
|
341 |
||
342 |
// Navigate DOM tree |
|
343 |
do |
|
344 |
{ |
|
345 |
// Look down to first child. |
|
346 |
if (pos.hasChildNodes()) |
|
347 |
{ |
|
348 |
next = pos.getFirstChild(); |
|
349 |
||
350 |
// %REVIEW% There's probably a more elegant way to skip |
|
351 |
// the doctype. (Just let it go and Suppress it? |
|
352 |
if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType()) |
|
353 |
next=next.getNextSibling(); |
|
354 |
||
355 |
// Push DTM context -- except for children of Entity References, |
|
356 |
// which have no DTM equivalent and cause no DTM navigation. |
|
357 |
if(ENTITY_REFERENCE_NODE!=pos.getNodeType()) |
|
358 |
{ |
|
359 |
m_last_parent=m_last_kid; |
|
360 |
m_last_kid=NULL; |
|
361 |
// Whitespace-handler context stacking |
|
362 |
if(null != m_wsfilter) |
|
363 |
{ |
|
364 |
short wsv = |
|
365 |
m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this); |
|
366 |
boolean shouldStrip = (DTMWSFilter.INHERIT == wsv) |
|
367 |
? getShouldStripWhitespace() |
|
368 |
: (DTMWSFilter.STRIP == wsv); |
|
369 |
pushShouldStripWhitespace(shouldStrip); |
|
370 |
} // if(m_wsfilter) |
|
371 |
} |
|
372 |
} |
|
373 |
||
374 |
// If that fails, look up and right (but not past root!) |
|
375 |
else |
|
376 |
{ |
|
377 |
if(m_last_kid!=NULL) |
|
378 |
{ |
|
379 |
// Last node posted at this level had no more children |
|
380 |
// If it has _no_ children, we need to record that. |
|
381 |
if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED) |
|
382 |
m_firstch.setElementAt(NULL,m_last_kid); |
|
383 |
} |
|
384 |
||
385 |
while(m_last_parent != NULL) |
|
386 |
{ |
|
387 |
// %REVIEW% There's probably a more elegant way to |
|
388 |
// skip the doctype. (Just let it go and Suppress it? |
|
389 |
next = pos.getNextSibling(); |
|
390 |
if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType()) |
|
391 |
next=next.getNextSibling(); |
|
392 |
||
393 |
if(next!=null) |
|
394 |
break; // Found it! |
|
395 |
||
396 |
// No next-sibling found. Pop the DOM. |
|
397 |
pos=pos.getParentNode(); |
|
398 |
if(pos==null) |
|
399 |
{ |
|
400 |
// %TBD% Should never arise, but I want to be sure of that... |
|
401 |
if(JJK_DEBUG) |
|
402 |
{ |
|
403 |
System.out.println("***** DOM2DTM Pop Control Flow problem"); |
|
404 |
for(;;); // Freeze right here! |
|
405 |
} |
|
406 |
} |
|
407 |
||
408 |
// The only parents in the DTM are Elements. However, |
|
409 |
// the DOM could contain EntityReferences. If we |
|
410 |
// encounter one, pop it _without_ popping DTM. |
|
411 |
if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType()) |
|
412 |
{ |
|
413 |
// Nothing needs doing |
|
414 |
if(JJK_DEBUG) |
|
415 |
System.out.println("***** DOM2DTM popping EntRef"); |
|
416 |
} |
|
417 |
else |
|
418 |
{ |
|
419 |
popShouldStripWhitespace(); |
|
420 |
// Fix and pop DTM |
|
421 |
if(m_last_kid==NULL) |
|
422 |
m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element |
|
423 |
else |
|
424 |
m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else |
|
425 |
m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent); |
|
426 |
} |
|
427 |
} |
|
428 |
if(m_last_parent==NULL) |
|
429 |
next=null; |
|
430 |
} |
|
431 |
||
432 |
if(next!=null) |
|
433 |
nexttype=next.getNodeType(); |
|
434 |
||
435 |
// If it's an entity ref, advance past it. |
|
436 |
// |
|
437 |
// %REVIEW% Should we let this out the door and just suppress it? |
|
438 |
// More work, but simpler code, more likely to be correct, and |
|
439 |
// it doesn't happen very often. We'd get rid of the loop too. |
|
440 |
if (ENTITY_REFERENCE_NODE == nexttype) |
|
441 |
pos=next; |
|
442 |
} |
|
443 |
while (ENTITY_REFERENCE_NODE == nexttype); |
|
444 |
||
445 |
// Did we run out of the tree? |
|
446 |
if(next==null) |
|
447 |
{ |
|
448 |
m_nextsib.setElementAt(NULL,0); |
|
449 |
m_nodesAreProcessed = true; |
|
450 |
m_pos=null; |
|
451 |
||
452 |
if(JJK_DEBUG) |
|
453 |
{ |
|
454 |
System.out.println("***** DOM2DTM Crosscheck:"); |
|
455 |
for(int i=0;i<m_nodes.size();++i) |
|
456 |
System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i)); |
|
457 |
} |
|
458 |
||
459 |
return false; |
|
460 |
} |
|
461 |
||
462 |
// Text needs some special handling: |
|
463 |
// |
|
464 |
// DTM may skip whitespace. This is handled by the suppressNode flag, which |
|
465 |
// when true will keep the DTM node from being created. |
|
466 |
// |
|
467 |
// DTM only directly records the first DOM node of any logically-contiguous |
|
468 |
// sequence. The lastTextNode value will be set to the last node in the |
|
469 |
// contiguous sequence, and -- AFTER the DTM addNode -- can be used to |
|
470 |
// advance next over this whole block. Should be simpler than special-casing |
|
471 |
// the above loop for "Was the logically-preceeding sibling a text node". |
|
472 |
// |
|
473 |
// Finally, a DTM node should be considered a CDATASection only if all the |
|
474 |
// contiguous text it covers is CDATASections. The first Text should |
|
475 |
// force DTM to Text. |
|
476 |
||
477 |
boolean suppressNode=false; |
|
478 |
Node lastTextNode=null; |
|
479 |
||
480 |
nexttype=next.getNodeType(); |
|
481 |
||
482 |
// nexttype=pos.getNodeType(); |
|
483 |
if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype) |
|
484 |
{ |
|
485 |
// If filtering, initially assume we're going to suppress the node |
|
486 |
suppressNode=((null != m_wsfilter) && getShouldStripWhitespace()); |
|
487 |
||
488 |
// Scan logically contiguous text (siblings, plus "flattening" |
|
489 |
// of entity reference boundaries). |
|
490 |
Node n=next; |
|
491 |
while(n!=null) |
|
492 |
{ |
|
493 |
lastTextNode=n; |
|
494 |
// Any Text node means DTM considers it all Text |
|
495 |
if(TEXT_NODE == n.getNodeType()) |
|
496 |
nexttype=TEXT_NODE; |
|
497 |
// Any non-whitespace in this sequence blocks whitespace |
|
498 |
// suppression |
|
499 |
suppressNode &= |
|
500 |
XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue()); |
|
501 |
||
502 |
n=logicalNextDOMTextNode(n); |
|
503 |
} |
|
504 |
} |
|
505 |
||
506 |
// Special handling for PIs: Some DOMs represent the XML |
|
507 |
// Declaration as a PI. This is officially incorrect, per the DOM |
|
508 |
// spec, but is considered a "wrong but tolerable" temporary |
|
509 |
// workaround pending proper handling of these fields in DOM Level |
|
510 |
// 3. We want to recognize and reject that case. |
|
511 |
else if(PROCESSING_INSTRUCTION_NODE==nexttype) |
|
512 |
{ |
|
513 |
suppressNode = (pos.getNodeName().toLowerCase().equals("xml")); |
|
514 |
} |
|
515 |
||
516 |
||
517 |
if(!suppressNode) |
|
518 |
{ |
|
519 |
// Inserting next. NOTE that we force the node type; for |
|
520 |
// coalesced Text, this records CDATASections adjacent to |
|
521 |
// ordinary Text as Text. |
|
522 |
int nextindex=addNode(next,m_last_parent,m_last_kid, |
|
523 |
nexttype); |
|
524 |
||
525 |
m_last_kid=nextindex; |
|
526 |
||
527 |
if(ELEMENT_NODE == nexttype) |
|
528 |
{ |
|
529 |
int attrIndex=NULL; // start with no previous sib |
|
530 |
// Process attributes _now_, rather than waiting. |
|
531 |
// Simpler control flow, makes NS cache available immediately. |
|
532 |
NamedNodeMap attrs=next.getAttributes(); |
|
533 |
int attrsize=(attrs==null) ? 0 : attrs.getLength(); |
|
534 |
if(attrsize>0) |
|
535 |
{ |
|
536 |
for(int i=0;i<attrsize;++i) |
|
537 |
{ |
|
538 |
// No need to force nodetype in this case; |
|
539 |
// addNode() will take care of switching it from |
|
540 |
// Attr to Namespace if necessary. |
|
541 |
attrIndex=addNode(attrs.item(i), |
|
542 |
nextindex,attrIndex,NULL); |
|
543 |
m_firstch.setElementAt(DTM.NULL,attrIndex); |
|
544 |
||
545 |
// If the xml: prefix is explicitly declared |
|
546 |
// we don't need to synthesize one. |
|
547 |
// |
|
548 |
// NOTE that XML Namespaces were not originally |
|
549 |
// defined as being namespace-aware (grrr), and |
|
550 |
// while the W3C is planning to fix this it's |
|
551 |
// safer for now to test the QName and trust the |
|
552 |
// parsers to prevent anyone from redefining the |
|
553 |
// reserved xmlns: prefix |
|
554 |
if(!m_processedFirstElement |
|
555 |
&& "xmlns:xml".equals(attrs.item(i).getNodeName())) |
|
556 |
m_processedFirstElement=true; |
|
557 |
} |
|
558 |
// Terminate list of attrs, and make sure they aren't |
|
559 |
// considered children of the element |
|
560 |
} // if attrs exist |
|
561 |
if(!m_processedFirstElement) |
|
562 |
{ |
|
563 |
// The DOM might not have an explicit declaration for the |
|
564 |
// implicit "xml:" prefix, but the XPath data model |
|
565 |
// requires that this appear as a Namespace Node so we |
|
566 |
// have to synthesize one. You can think of this as |
|
567 |
// being a default attribute defined by the XML |
|
568 |
// Namespaces spec rather than by the DTD. |
|
569 |
attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode( |
|
570 |
(Element)next,"xml",NAMESPACE_DECL_NS, |
|
571 |
makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1) |
|
572 |
), |
|
573 |
nextindex,attrIndex,NULL); |
|
574 |
m_firstch.setElementAt(DTM.NULL,attrIndex); |
|
575 |
m_processedFirstElement=true; |
|
576 |
} |
|
577 |
if(attrIndex!=NULL) |
|
578 |
m_nextsib.setElementAt(DTM.NULL,attrIndex); |
|
579 |
} //if(ELEMENT_NODE) |
|
580 |
} // (if !suppressNode) |
|
581 |
||
582 |
// Text postprocessing: Act on values stored above |
|
583 |
if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype) |
|
584 |
{ |
|
585 |
// %TBD% If nexttype was forced to TEXT, patch the DTM node |
|
586 |
||
587 |
next=lastTextNode; // Advance the DOM cursor over contiguous text |
|
588 |
} |
|
589 |
||
590 |
// Remember where we left off. |
|
591 |
m_pos=next; |
|
592 |
return true; |
|
593 |
} |
|
594 |
||
595 |
||
596 |
/** |
|
597 |
* Return an DOM node for the given node. |
|
598 |
* |
|
599 |
* @param nodeHandle The node ID. |
|
600 |
* |
|
601 |
* @return A node representation of the DTM node. |
|
602 |
*/ |
|
603 |
public Node getNode(int nodeHandle) |
|
604 |
{ |
|
605 |
||
606 |
int identity = makeNodeIdentity(nodeHandle); |
|
607 |
||
47359
e1a6c0168741
8181150: Fix lint warnings in JAXP repo: rawtypes and unchecked
joehw
parents:
47216
diff
changeset
|
608 |
return m_nodes.get(identity); |
6 | 609 |
} |
610 |
||
611 |
/** |
|
612 |
* Get a Node from an identity index. |
|
613 |
* |
|
614 |
* NEEDSDOC @param nodeIdentity |
|
615 |
* |
|
616 |
* NEEDSDOC ($objectName$) @return |
|
617 |
*/ |
|
618 |
protected Node lookupNode(int nodeIdentity) |
|
619 |
{ |
|
47359
e1a6c0168741
8181150: Fix lint warnings in JAXP repo: rawtypes and unchecked
joehw
parents:
47216
diff
changeset
|
620 |
return m_nodes.get(nodeIdentity); |
6 | 621 |
} |
622 |
||
623 |
/** |
|
624 |
* Get the next node identity value in the list, and call the iterator |
|
625 |
* if it hasn't been added yet. |
|
626 |
* |
|
627 |
* @param identity The node identity (index). |
|
628 |
* @return identity+1, or DTM.NULL. |
|
629 |
*/ |
|
630 |
protected int getNextNodeIdentity(int identity) |
|
631 |
{ |
|
632 |
||
633 |
identity += 1; |
|
634 |
||
635 |
if (identity >= m_nodes.size()) |
|
636 |
{ |
|
637 |
if (!nextNode()) |
|
638 |
identity = DTM.NULL; |
|
639 |
} |
|
640 |
||
641 |
return identity; |
|
642 |
} |
|
643 |
||
644 |
/** |
|
645 |
* Get the handle from a Node. |
|
646 |
* <p>%OPT% This will be pretty slow.</p> |
|
647 |
* |
|
648 |
* <p>%OPT% An XPath-like search (walk up DOM to root, tracking path; |
|
649 |
* walk down DTM reconstructing path) might be considerably faster |
|
650 |
* on later nodes in large documents. That might also imply improving |
|
651 |
* this call to handle nodes which would be in this DTM but |
|
652 |
* have not yet been built, which might or might not be a Good Thing.</p> |
|
653 |
* |
|
654 |
* %REVIEW% This relies on being able to test node-identity via |
|
655 |
* object-identity. DTM2DOM proxying is a great example of a case where |
|
656 |
* that doesn't work. DOM Level 3 will provide the isSameNode() method |
|
657 |
* to fix that, but until then this is going to be flaky. |
|
658 |
* |
|
659 |
* @param node A node, which may be null. |
|
660 |
* |
|
661 |
* @return The node handle or <code>DTM.NULL</code>. |
|
662 |
*/ |
|
663 |
private int getHandleFromNode(Node node) |
|
664 |
{ |
|
665 |
if (null != node) |
|
666 |
{ |
|
667 |
int len = m_nodes.size(); |
|
668 |
boolean isMore; |
|
669 |
int i = 0; |
|
670 |
do |
|
671 |
{ |
|
672 |
for (; i < len; i++) |
|
673 |
{ |
|
47359
e1a6c0168741
8181150: Fix lint warnings in JAXP repo: rawtypes and unchecked
joehw
parents:
47216
diff
changeset
|
674 |
if (m_nodes.get(i) == node) |
6 | 675 |
return makeNodeHandle(i); |
676 |
} |
|
677 |
||
678 |
isMore = nextNode(); |
|
679 |
||
680 |
len = m_nodes.size(); |
|
681 |
||
682 |
} |
|
683 |
while(isMore || i < len); |
|
684 |
} |
|
685 |
||
686 |
return DTM.NULL; |
|
687 |
} |
|
688 |
||
689 |
/** Get the handle from a Node. This is a more robust version of |
|
690 |
* getHandleFromNode, intended to be usable by the public. |
|
691 |
* |
|
692 |
* <p>%OPT% This will be pretty slow.</p> |
|
693 |
* |
|
694 |
* %REVIEW% This relies on being able to test node-identity via |
|
695 |
* object-identity. DTM2DOM proxying is a great example of a case where |
|
696 |
* that doesn't work. DOM Level 3 will provide the isSameNode() method |
|
697 |
* to fix that, but until then this is going to be flaky. |
|
698 |
* |
|
699 |
* @param node A node, which may be null. |
|
700 |
* |
|
701 |
* @return The node handle or <code>DTM.NULL</code>. */ |
|
702 |
public int getHandleOfNode(Node node) |
|
703 |
{ |
|
704 |
if (null != node) |
|
705 |
{ |
|
706 |
// Is Node actually within the same document? If not, don't search! |
|
707 |
// This would be easier if m_root was always the Document node, but |
|
708 |
// we decided to allow wrapping a DTM around a subtree. |
|
709 |
if((m_root==node) || |
|
710 |
(m_root.getNodeType()==DOCUMENT_NODE && |
|
711 |
m_root==node.getOwnerDocument()) || |
|
712 |
(m_root.getNodeType()!=DOCUMENT_NODE && |
|
713 |
m_root.getOwnerDocument()==node.getOwnerDocument()) |
|
714 |
) |
|
715 |
{ |
|
716 |
// If node _is_ in m_root's tree, find its handle |
|
717 |
// |
|
718 |
// %OPT% This check may be improved significantly when DOM |
|
719 |
// Level 3 nodeKey and relative-order tests become |
|
720 |
// available! |
|
721 |
for(Node cursor=node; |
|
722 |
cursor!=null; |
|
723 |
cursor= |
|
724 |
(cursor.getNodeType()!=ATTRIBUTE_NODE) |
|
725 |
? cursor.getParentNode() |
|
726 |
: ((org.w3c.dom.Attr)cursor).getOwnerElement()) |
|
727 |
{ |
|
728 |
if(cursor==m_root) |
|
729 |
// We know this node; find its handle. |
|
730 |
return getHandleFromNode(node); |
|
731 |
} // for ancestors of node |
|
732 |
} // if node and m_root in same Document |
|
733 |
} // if node!=null |
|
734 |
||
735 |
return DTM.NULL; |
|
736 |
} |
|
737 |
||
738 |
/** |
|
739 |
* Retrieves an attribute node by by qualified name and namespace URI. |
|
740 |
* |
|
741 |
* @param nodeHandle int Handle of the node upon which to look up this attribute.. |
|
742 |
* @param namespaceURI The namespace URI of the attribute to |
|
743 |
* retrieve, or null. |
|
744 |
* @param name The local name of the attribute to |
|
745 |
* retrieve. |
|
746 |
* @return The attribute node handle with the specified name ( |
|
747 |
* <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such |
|
748 |
* attribute. |
|
749 |
*/ |
|
750 |
public int getAttributeNode(int nodeHandle, String namespaceURI, |
|
751 |
String name) |
|
752 |
{ |
|
753 |
||
754 |
// %OPT% This is probably slower than it needs to be. |
|
755 |
if (null == namespaceURI) |
|
756 |
namespaceURI = ""; |
|
757 |
||
758 |
int type = getNodeType(nodeHandle); |
|
759 |
||
760 |
if (DTM.ELEMENT_NODE == type) |
|
761 |
{ |
|
762 |
||
763 |
// Assume that attributes immediately follow the element. |
|
764 |
int identity = makeNodeIdentity(nodeHandle); |
|
765 |
||
766 |
while (DTM.NULL != (identity = getNextNodeIdentity(identity))) |
|
767 |
{ |
|
768 |
// Assume this can not be null. |
|
769 |
type = _type(identity); |
|
770 |
||
771 |
// %REVIEW% |
|
772 |
// Should namespace nodes be retrievable DOM-style as attrs? |
|
773 |
// If not we need a separate function... which may be desirable |
|
774 |
// architecturally, but which is ugly from a code point of view. |
|
775 |
// (If we REALLY insist on it, this code should become a subroutine |
|
776 |
// of both -- retrieve the node, then test if the type matches |
|
777 |
// what you're looking for.) |
|
778 |
if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE) |
|
779 |
{ |
|
780 |
Node node = lookupNode(identity); |
|
781 |
String nodeuri = node.getNamespaceURI(); |
|
782 |
||
783 |
if (null == nodeuri) |
|
784 |
nodeuri = ""; |
|
785 |
||
786 |
String nodelocalname = node.getLocalName(); |
|
787 |
||
788 |
if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname)) |
|
789 |
return makeNodeHandle(identity); |
|
790 |
} |
|
791 |
||
792 |
else // if (DTM.NAMESPACE_NODE != type) |
|
793 |
{ |
|
794 |
break; |
|
795 |
} |
|
796 |
} |
|
797 |
} |
|
798 |
||
799 |
return DTM.NULL; |
|
800 |
} |
|
801 |
||
802 |
/** |
|
803 |
* Get the string-value of a node as a String object |
|
804 |
* (see http://www.w3.org/TR/xpath#data-model |
|
805 |
* for the definition of a node's string-value). |
|
806 |
* |
|
807 |
* @param nodeHandle The node ID. |
|
808 |
* |
|
809 |
* @return A string object that represents the string-value of the given node. |
|
810 |
*/ |
|
811 |
public XMLString getStringValue(int nodeHandle) |
|
812 |
{ |
|
813 |
||
814 |
int type = getNodeType(nodeHandle); |
|
815 |
Node node = getNode(nodeHandle); |
|
816 |
// %TBD% If an element only has one text node, we should just use it |
|
817 |
// directly. |
|
818 |
if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type |
|
819 |
|| DTM.DOCUMENT_FRAGMENT_NODE == type) |
|
820 |
{ |
|
821 |
FastStringBuffer buf = StringBufferPool.get(); |
|
822 |
String s; |
|
823 |
||
824 |
try |
|
825 |
{ |
|
826 |
getNodeData(node, buf); |
|
827 |
||
828 |
s = (buf.length() > 0) ? buf.toString() : ""; |
|
829 |
} |
|
830 |
finally |
|
831 |
{ |
|
832 |
StringBufferPool.free(buf); |
|
833 |
} |
|
834 |
||
835 |
return m_xstrf.newstr( s ); |
|
836 |
} |
|
837 |
else if(TEXT_NODE == type || CDATA_SECTION_NODE == type) |
|
838 |
{ |
|
839 |
// If this is a DTM text node, it may be made of multiple DOM text |
|
840 |
// nodes -- including navigating into Entity References. DOM2DTM |
|
841 |
// records the first node in the sequence and requires that we |
|
842 |
// pick up the others when we retrieve the DTM node's value. |
|
843 |
// |
|
844 |
// %REVIEW% DOM Level 3 is expected to add a "whole text" |
|
845 |
// retrieval method which performs this function for us. |
|
846 |
FastStringBuffer buf = StringBufferPool.get(); |
|
847 |
while(node!=null) |
|
848 |
{ |
|
849 |
buf.append(node.getNodeValue()); |
|
850 |
node=logicalNextDOMTextNode(node); |
|
851 |
} |
|
852 |
String s=(buf.length() > 0) ? buf.toString() : ""; |
|
853 |
StringBufferPool.free(buf); |
|
854 |
return m_xstrf.newstr( s ); |
|
855 |
} |
|
856 |
else |
|
857 |
return m_xstrf.newstr( node.getNodeValue() ); |
|
858 |
} |
|
859 |
||
860 |
/** |
|
861 |
* Determine if the string-value of a node is whitespace |
|
862 |
* |
|
863 |
* @param nodeHandle The node Handle. |
|
864 |
* |
|
865 |
* @return Return true if the given node is whitespace. |
|
866 |
*/ |
|
867 |
public boolean isWhitespace(int nodeHandle) |
|
868 |
{ |
|
869 |
int type = getNodeType(nodeHandle); |
|
870 |
Node node = getNode(nodeHandle); |
|
871 |
if(TEXT_NODE == type || CDATA_SECTION_NODE == type) |
|
872 |
{ |
|
873 |
// If this is a DTM text node, it may be made of multiple DOM text |
|
874 |
// nodes -- including navigating into Entity References. DOM2DTM |
|
875 |
// records the first node in the sequence and requires that we |
|
876 |
// pick up the others when we retrieve the DTM node's value. |
|
877 |
// |
|
878 |
// %REVIEW% DOM Level 3 is expected to add a "whole text" |
|
879 |
// retrieval method which performs this function for us. |
|
880 |
FastStringBuffer buf = StringBufferPool.get(); |
|
881 |
while(node!=null) |
|
882 |
{ |
|
883 |
buf.append(node.getNodeValue()); |
|
884 |
node=logicalNextDOMTextNode(node); |
|
885 |
} |
|
886 |
boolean b = buf.isWhitespace(0, buf.length()); |
|
887 |
StringBufferPool.free(buf); |
|
888 |
return b; |
|
889 |
} |
|
890 |
return false; |
|
891 |
} |
|
892 |
||
893 |
/** |
|
894 |
* Retrieve the text content of a DOM subtree, appending it into a |
|
895 |
* user-supplied FastStringBuffer object. Note that attributes are |
|
896 |
* not considered part of the content of an element. |
|
897 |
* <p> |
|
898 |
* There are open questions regarding whitespace stripping. |
|
899 |
* Currently we make no special effort in that regard, since the standard |
|
900 |
* DOM doesn't yet provide DTD-based information to distinguish |
|
901 |
* whitespace-in-element-context from genuine #PCDATA. Note that we |
|
902 |
* should probably also consider xml:space if/when we address this. |
|
903 |
* DOM Level 3 may solve the problem for us. |
|
904 |
* <p> |
|
905 |
* %REVIEW% Actually, since this method operates on the DOM side of the |
|
906 |
* fence rather than the DTM side, it SHOULDN'T do |
|
907 |
* any special handling. The DOM does what the DOM does; if you want |
|
908 |
* DTM-level abstractions, use DTM-level methods. |
|
909 |
* |
|
910 |
* @param node Node whose subtree is to be walked, gathering the |
|
911 |
* contents of all Text or CDATASection nodes. |
|
912 |
* @param buf FastStringBuffer into which the contents of the text |
|
913 |
* nodes are to be concatenated. |
|
914 |
*/ |
|
915 |
protected static void getNodeData(Node node, FastStringBuffer buf) |
|
916 |
{ |
|
917 |
||
918 |
switch (node.getNodeType()) |
|
919 |
{ |
|
920 |
case Node.DOCUMENT_FRAGMENT_NODE : |
|
921 |
case Node.DOCUMENT_NODE : |
|
922 |
case Node.ELEMENT_NODE : |
|
923 |
{ |
|
924 |
for (Node child = node.getFirstChild(); null != child; |
|
925 |
child = child.getNextSibling()) |
|
926 |
{ |
|
927 |
getNodeData(child, buf); |
|
928 |
} |
|
929 |
} |
|
930 |
break; |
|
931 |
case Node.TEXT_NODE : |
|
932 |
case Node.CDATA_SECTION_NODE : |
|
933 |
case Node.ATTRIBUTE_NODE : // Never a child but might be our starting node |
|
934 |
buf.append(node.getNodeValue()); |
|
935 |
break; |
|
936 |
case Node.PROCESSING_INSTRUCTION_NODE : |
|
937 |
// warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); |
|
938 |
break; |
|
939 |
default : |
|
940 |
// ignore |
|
941 |
break; |
|
942 |
} |
|
943 |
} |
|
944 |
||
945 |
/** |
|
946 |
* Given a node handle, return its DOM-style node name. This will |
|
947 |
* include names such as #text or #document. |
|
948 |
* |
|
949 |
* @param nodeHandle the id of the node. |
|
950 |
* @return String Name of this node, which may be an empty string. |
|
951 |
* %REVIEW% Document when empty string is possible... |
|
952 |
* %REVIEW-COMMENT% It should never be empty, should it? |
|
953 |
*/ |
|
954 |
public String getNodeName(int nodeHandle) |
|
955 |
{ |
|
956 |
||
957 |
Node node = getNode(nodeHandle); |
|
958 |
||
959 |
// Assume non-null. |
|
960 |
return node.getNodeName(); |
|
961 |
} |
|
962 |
||
963 |
/** |
|
964 |
* Given a node handle, return the XPath node name. This should be |
|
965 |
* the name as described by the XPath data model, NOT the DOM-style |
|
966 |
* name. |
|
967 |
* |
|
968 |
* @param nodeHandle the id of the node. |
|
969 |
* @return String Name of this node, which may be an empty string. |
|
970 |
*/ |
|
971 |
public String getNodeNameX(int nodeHandle) |
|
972 |
{ |
|
973 |
||
974 |
String name; |
|
975 |
short type = getNodeType(nodeHandle); |
|
976 |
||
977 |
switch (type) |
|
978 |
{ |
|
979 |
case DTM.NAMESPACE_NODE : |
|
980 |
{ |
|
981 |
Node node = getNode(nodeHandle); |
|
982 |
||
983 |
// assume not null. |
|
984 |
name = node.getNodeName(); |
|
985 |
if(name.startsWith("xmlns:")) |
|
986 |
{ |
|
987 |
name = QName.getLocalPart(name); |
|
988 |
} |
|
989 |
else if(name.equals("xmlns")) |
|
990 |
{ |
|
991 |
name = ""; |
|
992 |
} |
|
993 |
} |
|
994 |
break; |
|
995 |
case DTM.ATTRIBUTE_NODE : |
|
996 |
case DTM.ELEMENT_NODE : |
|
997 |
case DTM.ENTITY_REFERENCE_NODE : |
|
998 |
case DTM.PROCESSING_INSTRUCTION_NODE : |
|
999 |
{ |
|
1000 |
Node node = getNode(nodeHandle); |
|
1001 |
||
1002 |
// assume not null. |
|
1003 |
name = node.getNodeName(); |
|
1004 |
} |
|
1005 |
break; |
|
1006 |
default : |
|
1007 |
name = ""; |
|
1008 |
} |
|
1009 |
||
1010 |
return name; |
|
1011 |
} |
|
1012 |
||
1013 |
/** |
|
1014 |
* Given a node handle, return its XPath-style localname. |
|
1015 |
* (As defined in Namespaces, this is the portion of the name after any |
|
1016 |
* colon character). |
|
1017 |
* |
|
1018 |
* @param nodeHandle the id of the node. |
|
1019 |
* @return String Local name of this node. |
|
1020 |
*/ |
|
1021 |
public String getLocalName(int nodeHandle) |
|
1022 |
{ |
|
1023 |
if(JJK_NEWCODE) |
|
1024 |
{ |
|
1025 |
int id=makeNodeIdentity(nodeHandle); |
|
1026 |
if(NULL==id) return null; |
|
47359
e1a6c0168741
8181150: Fix lint warnings in JAXP repo: rawtypes and unchecked
joehw
parents:
47216
diff
changeset
|
1027 |
Node newnode=m_nodes.get(id); |
6 | 1028 |
String newname=newnode.getLocalName(); |
1029 |
if (null == newname) |
|
1030 |
{ |
|
1031 |
// XSLT treats PIs, and possibly other things, as having QNames. |
|
1032 |
String qname = newnode.getNodeName(); |
|
1033 |
if('#'==qname.charAt(0)) |
|
1034 |
{ |
|
1035 |
// Match old default for this function |
|
1036 |
// This conversion may or may not be necessary |
|
1037 |
newname=""; |
|
1038 |
} |
|
1039 |
else |
|
1040 |
{ |
|
1041 |
int index = qname.indexOf(':'); |
|
1042 |
newname = (index < 0) ? qname : qname.substring(index + 1); |
|
1043 |
} |
|
1044 |
} |
|
1045 |
return newname; |
|
1046 |
} |
|
1047 |
else |
|
1048 |
{ |
|
1049 |
String name; |
|
1050 |
short type = getNodeType(nodeHandle); |
|
1051 |
switch (type) |
|
1052 |
{ |
|
1053 |
case DTM.ATTRIBUTE_NODE : |
|
1054 |
case DTM.ELEMENT_NODE : |
|
1055 |
case DTM.ENTITY_REFERENCE_NODE : |
|
1056 |
case DTM.NAMESPACE_NODE : |
|
1057 |
case DTM.PROCESSING_INSTRUCTION_NODE : |
|
1058 |
{ |
|
1059 |
Node node = getNode(nodeHandle); |
|
1060 |
||
1061 |
// assume not null. |
|
1062 |
name = node.getLocalName(); |
|
1063 |
||
1064 |
if (null == name) |
|
1065 |
{ |
|
1066 |
String qname = node.getNodeName(); |
|
1067 |
int index = qname.indexOf(':'); |
|
1068 |
||
1069 |
name = (index < 0) ? qname : qname.substring(index + 1); |
|
1070 |
} |
|
1071 |
} |
|
1072 |
break; |
|
1073 |
default : |
|
1074 |
name = ""; |
|
1075 |
} |
|
1076 |
return name; |
|
1077 |
} |
|
1078 |
} |
|
1079 |
||
1080 |
/** |
|
1081 |
* Given a namespace handle, return the prefix that the namespace decl is |
|
1082 |
* mapping. |
|
1083 |
* Given a node handle, return the prefix used to map to the namespace. |
|
1084 |
* |
|
1085 |
* <p> %REVIEW% Are you sure you want "" for no prefix? </p> |
|
1086 |
* <p> %REVIEW-COMMENT% I think so... not totally sure. -sb </p> |
|
1087 |
* |
|
1088 |
* @param nodeHandle the id of the node. |
|
1089 |
* @return String prefix of this node's name, or "" if no explicit |
|
1090 |
* namespace prefix was given. |
|
1091 |
*/ |
|
1092 |
public String getPrefix(int nodeHandle) |
|
1093 |
{ |
|
1094 |
||
1095 |
String prefix; |
|
1096 |
short type = getNodeType(nodeHandle); |
|
1097 |
||
1098 |
switch (type) |
|
1099 |
{ |
|
1100 |
case DTM.NAMESPACE_NODE : |
|
1101 |
{ |
|
1102 |
Node node = getNode(nodeHandle); |
|
1103 |
||
1104 |
// assume not null. |
|
1105 |
String qname = node.getNodeName(); |
|
1106 |
int index = qname.indexOf(':'); |
|
1107 |
||
1108 |
prefix = (index < 0) ? "" : qname.substring(index + 1); |
|
1109 |
} |
|
1110 |
break; |
|
1111 |
case DTM.ATTRIBUTE_NODE : |
|
1112 |
case DTM.ELEMENT_NODE : |
|
1113 |
{ |
|
1114 |
Node node = getNode(nodeHandle); |
|
1115 |
||
1116 |
// assume not null. |
|
1117 |
String qname = node.getNodeName(); |
|
1118 |
int index = qname.indexOf(':'); |
|
1119 |
||
1120 |
prefix = (index < 0) ? "" : qname.substring(0, index); |
|
1121 |
} |
|
1122 |
break; |
|
1123 |
default : |
|
1124 |
prefix = ""; |
|
1125 |
} |
|
1126 |
||
1127 |
return prefix; |
|
1128 |
} |
|
1129 |
||
1130 |
/** |
|
1131 |
* Given a node handle, return its DOM-style namespace URI |
|
1132 |
* (As defined in Namespaces, this is the declared URI which this node's |
|
1133 |
* prefix -- or default in lieu thereof -- was mapped to.) |
|
1134 |
* |
|
1135 |
* <p>%REVIEW% Null or ""? -sb</p> |
|
1136 |
* |
|
1137 |
* @param nodeHandle the id of the node. |
|
1138 |
* @return String URI value of this node's namespace, or null if no |
|
1139 |
* namespace was resolved. |
|
1140 |
*/ |
|
1141 |
public String getNamespaceURI(int nodeHandle) |
|
1142 |
{ |
|
1143 |
if(JJK_NEWCODE) |
|
1144 |
{ |
|
1145 |
int id=makeNodeIdentity(nodeHandle); |
|
1146 |
if(id==NULL) return null; |
|
47359
e1a6c0168741
8181150: Fix lint warnings in JAXP repo: rawtypes and unchecked
joehw
parents:
47216
diff
changeset
|
1147 |
Node node=m_nodes.get(id); |
6 | 1148 |
return node.getNamespaceURI(); |
1149 |
} |
|
1150 |
else |
|
1151 |
{ |
|
1152 |
String nsuri; |
|
1153 |
short type = getNodeType(nodeHandle); |
|
1154 |
||
1155 |
switch (type) |
|
1156 |
{ |
|
1157 |
case DTM.ATTRIBUTE_NODE : |
|
1158 |
case DTM.ELEMENT_NODE : |
|
1159 |
case DTM.ENTITY_REFERENCE_NODE : |
|
1160 |
case DTM.NAMESPACE_NODE : |
|
1161 |
case DTM.PROCESSING_INSTRUCTION_NODE : |
|
1162 |
{ |
|
1163 |
Node node = getNode(nodeHandle); |
|
1164 |
||
1165 |
// assume not null. |
|
1166 |
nsuri = node.getNamespaceURI(); |
|
1167 |
||
1168 |
// %TBD% Handle DOM1? |
|
1169 |
} |
|
1170 |
break; |
|
1171 |
default : |
|
1172 |
nsuri = null; |
|
1173 |
} |
|
1174 |
||
1175 |
return nsuri; |
|
1176 |
} |
|
1177 |
||
1178 |
} |
|
1179 |
||
1180 |
/** Utility function: Given a DOM Text node, determine whether it is |
|
1181 |
* logically followed by another Text or CDATASection node. This may |
|
1182 |
* involve traversing into Entity References. |
|
1183 |
* |
|
1184 |
* %REVIEW% DOM Level 3 is expected to add functionality which may |
|
1185 |
* allow us to retire this. |
|
1186 |
*/ |
|
1187 |
private Node logicalNextDOMTextNode(Node n) |
|
1188 |
{ |
|
1189 |
Node p=n.getNextSibling(); |
|
1190 |
if(p==null) |
|
1191 |
{ |
|
1192 |
// Walk out of any EntityReferenceNodes that ended with text |
|
1193 |
for(n=n.getParentNode(); |
|
1194 |
n!=null && ENTITY_REFERENCE_NODE == n.getNodeType(); |
|
1195 |
n=n.getParentNode()) |
|
1196 |
{ |
|
1197 |
p=n.getNextSibling(); |
|
1198 |
if(p!=null) |
|
1199 |
break; |
|
1200 |
} |
|
1201 |
} |
|
1202 |
n=p; |
|
1203 |
while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType()) |
|
1204 |
{ |
|
1205 |
// Walk into any EntityReferenceNodes that start with text |
|
1206 |
if(n.hasChildNodes()) |
|
1207 |
n=n.getFirstChild(); |
|
1208 |
else |
|
1209 |
n=n.getNextSibling(); |
|
1210 |
} |
|
1211 |
if(n!=null) |
|
1212 |
{ |
|
1213 |
// Found a logical next sibling. Is it text? |
|
1214 |
int ntype=n.getNodeType(); |
|
1215 |
if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype) |
|
1216 |
n=null; |
|
1217 |
} |
|
1218 |
return n; |
|
1219 |
} |
|
1220 |
||
1221 |
/** |
|
1222 |
* Given a node handle, return its node value. This is mostly |
|
1223 |
* as defined by the DOM, but may ignore some conveniences. |
|
1224 |
* <p> |
|
1225 |
* |
|
1226 |
* @param nodeHandle The node id. |
|
1227 |
* @return String Value of this node, or null if not |
|
1228 |
* meaningful for this node type. |
|
1229 |
*/ |
|
1230 |
public String getNodeValue(int nodeHandle) |
|
1231 |
{ |
|
1232 |
// The _type(nodeHandle) call was taking the lion's share of our |
|
1233 |
// time, and was wrong anyway since it wasn't coverting handle to |
|
1234 |
// identity. Inlined it. |
|
1235 |
int type = _exptype(makeNodeIdentity(nodeHandle)); |
|
1236 |
type=(NULL != type) ? getNodeType(nodeHandle) : NULL; |
|
1237 |
||
1238 |
if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type) |
|
1239 |
return getNode(nodeHandle).getNodeValue(); |
|
1240 |
||
1241 |
// If this is a DTM text node, it may be made of multiple DOM text |
|
1242 |
// nodes -- including navigating into Entity References. DOM2DTM |
|
1243 |
// records the first node in the sequence and requires that we |
|
1244 |
// pick up the others when we retrieve the DTM node's value. |
|
1245 |
// |
|
1246 |
// %REVIEW% DOM Level 3 is expected to add a "whole text" |
|
1247 |
// retrieval method which performs this function for us. |
|
1248 |
Node node = getNode(nodeHandle); |
|
1249 |
Node n=logicalNextDOMTextNode(node); |
|
1250 |
if(n==null) |
|
1251 |
return node.getNodeValue(); |
|
1252 |
||
1253 |
FastStringBuffer buf = StringBufferPool.get(); |
|
1254 |
buf.append(node.getNodeValue()); |
|
1255 |
while(n!=null) |
|
1256 |
{ |
|
1257 |
buf.append(n.getNodeValue()); |
|
1258 |
n=logicalNextDOMTextNode(n); |
|
1259 |
} |
|
1260 |
String s = (buf.length() > 0) ? buf.toString() : ""; |
|
1261 |
StringBufferPool.free(buf); |
|
1262 |
return s; |
|
1263 |
} |
|
1264 |
||
1265 |
/** |
|
1266 |
* A document type declaration information item has the following properties: |
|
1267 |
* |
|
1268 |
* 1. [system identifier] The system identifier of the external subset, if |
|
1269 |
* it exists. Otherwise this property has no value. |
|
1270 |
* |
|
1271 |
* @return the system identifier String object, or null if there is none. |
|
1272 |
*/ |
|
1273 |
public String getDocumentTypeDeclarationSystemIdentifier() |
|
1274 |
{ |
|
1275 |
||
1276 |
Document doc; |
|
1277 |
||
1278 |
if (m_root.getNodeType() == Node.DOCUMENT_NODE) |
|
1279 |
doc = (Document) m_root; |
|
1280 |
else |
|
1281 |
doc = m_root.getOwnerDocument(); |
|
1282 |
||
1283 |
if (null != doc) |
|
1284 |
{ |
|
1285 |
DocumentType dtd = doc.getDoctype(); |
|
1286 |
||
1287 |
if (null != dtd) |
|
1288 |
{ |
|
1289 |
return dtd.getSystemId(); |
|
1290 |
} |
|
1291 |
} |
|
1292 |
||
1293 |
return null; |
|
1294 |
} |
|
1295 |
||
1296 |
/** |
|
1297 |
* Return the public identifier of the external subset, |
|
1298 |
* normalized as described in 4.2.2 External Entities [XML]. If there is |
|
1299 |
* no external subset or if it has no public identifier, this property |
|
1300 |
* has no value. |
|
1301 |
* |
|
1302 |
* @return the public identifier String object, or null if there is none. |
|
1303 |
*/ |
|
1304 |
public String getDocumentTypeDeclarationPublicIdentifier() |
|
1305 |
{ |
|
1306 |
||
1307 |
Document doc; |
|
1308 |
||
1309 |
if (m_root.getNodeType() == Node.DOCUMENT_NODE) |
|
1310 |
doc = (Document) m_root; |
|
1311 |
else |
|
1312 |
doc = m_root.getOwnerDocument(); |
|
1313 |
||
1314 |
if (null != doc) |
|
1315 |
{ |
|
1316 |
DocumentType dtd = doc.getDoctype(); |
|
1317 |
||
1318 |
if (null != dtd) |
|
1319 |
{ |
|
1320 |
return dtd.getPublicId(); |
|
1321 |
} |
|
1322 |
} |
|
1323 |
||
1324 |
return null; |
|
1325 |
} |
|
1326 |
||
1327 |
/** |
|
1328 |
* Returns the <code>Element</code> whose <code>ID</code> is given by |
|
1329 |
* <code>elementId</code>. If no such element exists, returns |
|
1330 |
* <code>DTM.NULL</code>. Behavior is not defined if more than one element |
|
1331 |
* has this <code>ID</code>. Attributes (including those |
|
1332 |
* with the name "ID") are not of type ID unless so defined by DTD/Schema |
|
1333 |
* information available to the DTM implementation. |
|
1334 |
* Implementations that do not know whether attributes are of type ID or |
|
1335 |
* not are expected to return <code>DTM.NULL</code>. |
|
1336 |
* |
|
1337 |
* <p>%REVIEW% Presumably IDs are still scoped to a single document, |
|
1338 |
* and this operation searches only within a single document, right? |
|
1339 |
* Wouldn't want collisions between DTMs in the same process.</p> |
|
1340 |
* |
|
1341 |
* @param elementId The unique <code>id</code> value for an element. |
|
1342 |
* @return The handle of the matching element. |
|
1343 |
*/ |
|
1344 |
public int getElementById(String elementId) |
|
1345 |
{ |
|
1346 |
||
1347 |
Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) |
|
1348 |
? (Document) m_root : m_root.getOwnerDocument(); |
|
1349 |
||
1350 |
if(null != doc) |
|
1351 |
{ |
|
1352 |
Node elem = doc.getElementById(elementId); |
|
1353 |
if(null != elem) |
|
1354 |
{ |
|
1355 |
int elemHandle = getHandleFromNode(elem); |
|
1356 |
||
1357 |
if(DTM.NULL == elemHandle) |
|
1358 |
{ |
|
1359 |
int identity = m_nodes.size()-1; |
|
1360 |
while (DTM.NULL != (identity = getNextNodeIdentity(identity))) |
|
1361 |
{ |
|
1362 |
Node node = getNode(identity); |
|
1363 |
if(node == elem) |
|
1364 |
{ |
|
1365 |
elemHandle = getHandleFromNode(elem); |
|
1366 |
break; |
|
1367 |
} |
|
1368 |
} |
|
1369 |
} |
|
1370 |
||
1371 |
return elemHandle; |
|
1372 |
} |
|
1373 |
||
1374 |
} |
|
1375 |
return DTM.NULL; |
|
1376 |
} |
|
1377 |
||
1378 |
/** |
|
1379 |
* The getUnparsedEntityURI function returns the URI of the unparsed |
|
1380 |
* entity with the specified name in the same document as the context |
|
1381 |
* node (see [3.3 Unparsed Entities]). It returns the empty string if |
|
1382 |
* there is no such entity. |
|
1383 |
* <p> |
|
1384 |
* XML processors may choose to use the System Identifier (if one |
|
1385 |
* is provided) to resolve the entity, rather than the URI in the |
|
1386 |
* Public Identifier. The details are dependent on the processor, and |
|
1387 |
* we would have to support some form of plug-in resolver to handle |
|
1388 |
* this properly. Currently, we simply return the System Identifier if |
|
1389 |
* present, and hope that it a usable URI or that our caller can |
|
1390 |
* map it to one. |
|
1391 |
* TODO: Resolve Public Identifiers... or consider changing function name. |
|
1392 |
* <p> |
|
1393 |
* If we find a relative URI |
|
1394 |
* reference, XML expects it to be resolved in terms of the base URI |
|
1395 |
* of the document. The DOM doesn't do that for us, and it isn't |
|
1396 |
* entirely clear whether that should be done here; currently that's |
|
1397 |
* pushed up to a higher level of our application. (Note that DOM Level |
|
1398 |
* 1 didn't store the document's base URI.) |
|
1399 |
* TODO: Consider resolving Relative URIs. |
|
1400 |
* <p> |
|
1401 |
* (The DOM's statement that "An XML processor may choose to |
|
1402 |
* completely expand entities before the structure model is passed |
|
1403 |
* to the DOM" refers only to parsed entities, not unparsed, and hence |
|
1404 |
* doesn't affect this function.) |
|
1405 |
* |
|
1406 |
* @param name A string containing the Entity Name of the unparsed |
|
1407 |
* entity. |
|
1408 |
* |
|
1409 |
* @return String containing the URI of the Unparsed Entity, or an |
|
1410 |
* empty string if no such entity exists. |
|
1411 |
*/ |
|
1412 |
public String getUnparsedEntityURI(String name) |
|
1413 |
{ |
|
1414 |
||
1415 |
String url = ""; |
|
1416 |
Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) |
|
1417 |
? (Document) m_root : m_root.getOwnerDocument(); |
|
1418 |
||
1419 |
if (null != doc) |
|
1420 |
{ |
|
1421 |
DocumentType doctype = doc.getDoctype(); |
|
1422 |
||
1423 |
if (null != doctype) |
|
1424 |
{ |
|
1425 |
NamedNodeMap entities = doctype.getEntities(); |
|
1426 |
if(null == entities) |
|
1427 |
return url; |
|
1428 |
Entity entity = (Entity) entities.getNamedItem(name); |
|
1429 |
if(null == entity) |
|
1430 |
return url; |
|
1431 |
||
1432 |
String notationName = entity.getNotationName(); |
|
1433 |
||
1434 |
if (null != notationName) // then it's unparsed |
|
1435 |
{ |
|
1436 |
// The draft says: "The XSLT processor may use the public |
|
1437 |
// identifier to generate a URI for the entity instead of the URI |
|
1438 |
// specified in the system identifier. If the XSLT processor does |
|
1439 |
// not use the public identifier to generate the URI, it must use |
|
1440 |
// the system identifier; if the system identifier is a relative |
|
1441 |
// URI, it must be resolved into an absolute URI using the URI of |
|
1442 |
// the resource containing the entity declaration as the base |
|
1443 |
// URI [RFC2396]." |
|
1444 |
// So I'm falling a bit short here. |
|
1445 |
url = entity.getSystemId(); |
|
1446 |
||
1447 |
if (null == url) |
|
1448 |
{ |
|
1449 |
url = entity.getPublicId(); |
|
1450 |
} |
|
1451 |
else |
|
1452 |
{ |
|
1453 |
// This should be resolved to an absolute URL, but that's hard |
|
1454 |
// to do from here. |
|
1455 |
} |
|
1456 |
} |
|
1457 |
} |
|
1458 |
} |
|
1459 |
||
1460 |
return url; |
|
1461 |
} |
|
1462 |
||
1463 |
/** |
|
1464 |
* 5. [specified] A flag indicating whether this attribute was actually |
|
1465 |
* specified in the start-tag of its element, or was defaulted from the |
|
1466 |
* DTD. |
|
1467 |
* |
|
1468 |
* @param attributeHandle the attribute handle |
|
1469 |
* @return <code>true</code> if the attribute was specified; |
|
1470 |
* <code>false</code> if it was defaulted. |
|
1471 |
*/ |
|
1472 |
public boolean isAttributeSpecified(int attributeHandle) |
|
1473 |
{ |
|
1474 |
int type = getNodeType(attributeHandle); |
|
1475 |
||
1476 |
if (DTM.ATTRIBUTE_NODE == type) |
|
1477 |
{ |
|
1478 |
Attr attr = (Attr)getNode(attributeHandle); |
|
1479 |
return attr.getSpecified(); |
|
1480 |
} |
|
1481 |
return false; |
|
1482 |
} |
|
1483 |
||
1484 |
/** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since |
|
1485 |
* we're wrapped around an existing DOM. |
|
1486 |
* |
|
1487 |
* @param source The IncrementalSAXSource that we want to recieve events from |
|
1488 |
* on demand. |
|
1489 |
*/ |
|
1490 |
public void setIncrementalSAXSource(IncrementalSAXSource source) |
|
1491 |
{ |
|
1492 |
} |
|
1493 |
||
1494 |
/** getContentHandler returns "our SAX builder" -- the thing that |
|
1495 |
* someone else should send SAX events to in order to extend this |
|
1496 |
* DTM model. |
|
1497 |
* |
|
1498 |
* @return null if this model doesn't respond to SAX events, |
|
1499 |
* "this" if the DTM object has a built-in SAX ContentHandler, |
|
1500 |
* the IncrmentalSAXSource if we're bound to one and should receive |
|
1501 |
* the SAX stream via it for incremental build purposes... |
|
1502 |
* */ |
|
1503 |
public org.xml.sax.ContentHandler getContentHandler() |
|
1504 |
{ |
|
1505 |
return null; |
|
1506 |
} |
|
1507 |
||
1508 |
/** |
|
1509 |
* Return this DTM's lexical handler. |
|
1510 |
* |
|
1511 |
* %REVIEW% Should this return null if constrution already done/begun? |
|
1512 |
* |
|
1513 |
* @return null if this model doesn't respond to lexical SAX events, |
|
1514 |
* "this" if the DTM object has a built-in SAX ContentHandler, |
|
1515 |
* the IncrementalSAXSource if we're bound to one and should receive |
|
1516 |
* the SAX stream via it for incremental build purposes... |
|
1517 |
*/ |
|
1518 |
public org.xml.sax.ext.LexicalHandler getLexicalHandler() |
|
1519 |
{ |
|
1520 |
||
1521 |
return null; |
|
1522 |
} |
|
1523 |
||
1524 |
||
1525 |
/** |
|
1526 |
* Return this DTM's EntityResolver. |
|
1527 |
* |
|
1528 |
* @return null if this model doesn't respond to SAX entity ref events. |
|
1529 |
*/ |
|
1530 |
public org.xml.sax.EntityResolver getEntityResolver() |
|
1531 |
{ |
|
1532 |
||
1533 |
return null; |
|
1534 |
} |
|
1535 |
||
1536 |
/** |
|
1537 |
* Return this DTM's DTDHandler. |
|
1538 |
* |
|
1539 |
* @return null if this model doesn't respond to SAX dtd events. |
|
1540 |
*/ |
|
1541 |
public org.xml.sax.DTDHandler getDTDHandler() |
|
1542 |
{ |
|
1543 |
||
1544 |
return null; |
|
1545 |
} |
|
1546 |
||
1547 |
/** |
|
1548 |
* Return this DTM's ErrorHandler. |
|
1549 |
* |
|
1550 |
* @return null if this model doesn't respond to SAX error events. |
|
1551 |
*/ |
|
1552 |
public org.xml.sax.ErrorHandler getErrorHandler() |
|
1553 |
{ |
|
1554 |
||
1555 |
return null; |
|
1556 |
} |
|
1557 |
||
1558 |
/** |
|
1559 |
* Return this DTM's DeclHandler. |
|
1560 |
* |
|
1561 |
* @return null if this model doesn't respond to SAX Decl events. |
|
1562 |
*/ |
|
1563 |
public org.xml.sax.ext.DeclHandler getDeclHandler() |
|
1564 |
{ |
|
1565 |
||
1566 |
return null; |
|
1567 |
} |
|
1568 |
||
1569 |
/** @return true iff we're building this model incrementally (eg |
|
1570 |
* we're partnered with a IncrementalSAXSource) and thus require that the |
|
1571 |
* transformation and the parse run simultaneously. Guidance to the |
|
1572 |
* DTMManager. |
|
1573 |
* */ |
|
1574 |
public boolean needsTwoThreads() |
|
1575 |
{ |
|
1576 |
return false; |
|
1577 |
} |
|
1578 |
||
1579 |
// ========== Direct SAX Dispatch, for optimization purposes ======== |
|
1580 |
||
1581 |
/** |
|
1582 |
* Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition |
|
1583 |
* of whitespace. Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S"> |
|
1584 |
* the definition of <CODE>S</CODE></A> for details. |
|
1585 |
* @param ch Character to check as XML whitespace. |
|
1586 |
* @return =true if <var>ch</var> is XML whitespace; otherwise =false. |
|
1587 |
*/ |
|
1588 |
private static boolean isSpace(char ch) |
|
1589 |
{ |
|
1590 |
return XMLCharacterRecognizer.isWhiteSpace(ch); // Take the easy way out for now. |
|
1591 |
} |
|
1592 |
||
1593 |
/** |
|
1594 |
* Directly call the |
|
1595 |
* characters method on the passed ContentHandler for the |
|
1596 |
* string-value of the given node (see http://www.w3.org/TR/xpath#data-model |
|
1597 |
* for the definition of a node's string-value). Multiple calls to the |
|
1598 |
* ContentHandler's characters methods may well occur for a single call to |
|
1599 |
* this method. |
|
1600 |
* |
|
1601 |
* @param nodeHandle The node ID. |
|
1602 |
* @param ch A non-null reference to a ContentHandler. |
|
1603 |
* |
|
1604 |
* @throws org.xml.sax.SAXException |
|
1605 |
*/ |
|
1606 |
public void dispatchCharactersEvents( |
|
1607 |
int nodeHandle, org.xml.sax.ContentHandler ch, |
|
1608 |
boolean normalize) |
|
1609 |
throws org.xml.sax.SAXException |
|
1610 |
{ |
|
1611 |
if(normalize) |
|
1612 |
{ |
|
1613 |
XMLString str = getStringValue(nodeHandle); |
|
1614 |
str = str.fixWhiteSpace(true, true, false); |
|
1615 |
str.dispatchCharactersEvents(ch); |
|
1616 |
} |
|
1617 |
else |
|
1618 |
{ |
|
1619 |
int type = getNodeType(nodeHandle); |
|
1620 |
Node node = getNode(nodeHandle); |
|
1621 |
dispatchNodeData(node, ch, 0); |
|
1622 |
// Text coalition -- a DTM text node may represent multiple |
|
1623 |
// DOM nodes. |
|
1624 |
if(TEXT_NODE == type || CDATA_SECTION_NODE == type) |
|
1625 |
{ |
|
1626 |
while( null != (node=logicalNextDOMTextNode(node)) ) |
|
1627 |
{ |
|
1628 |
dispatchNodeData(node, ch, 0); |
|
1629 |
} |
|
1630 |
} |
|
1631 |
} |
|
1632 |
} |
|
1633 |
||
1634 |
/** |
|
1635 |
* Retrieve the text content of a DOM subtree, appending it into a |
|
1636 |
* user-supplied FastStringBuffer object. Note that attributes are |
|
1637 |
* not considered part of the content of an element. |
|
1638 |
* <p> |
|
1639 |
* There are open questions regarding whitespace stripping. |
|
1640 |
* Currently we make no special effort in that regard, since the standard |
|
1641 |
* DOM doesn't yet provide DTD-based information to distinguish |
|
1642 |
* whitespace-in-element-context from genuine #PCDATA. Note that we |
|
1643 |
* should probably also consider xml:space if/when we address this. |
|
1644 |
* DOM Level 3 may solve the problem for us. |
|
1645 |
* <p> |
|
1646 |
* %REVIEW% Note that as a DOM-level operation, it can be argued that this |
|
1647 |
* routine _shouldn't_ perform any processing beyond what the DOM already |
|
1648 |
* does, and that whitespace stripping and so on belong at the DTM level. |
|
1649 |
* If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM. |
|
1650 |
* |
|
1651 |
* @param node Node whose subtree is to be walked, gathering the |
|
1652 |
* contents of all Text or CDATASection nodes. |
|
1653 |
*/ |
|
1654 |
protected static void dispatchNodeData(Node node, |
|
1655 |
org.xml.sax.ContentHandler ch, |
|
1656 |
int depth) |
|
1657 |
throws org.xml.sax.SAXException |
|
1658 |
{ |
|
1659 |
||
1660 |
switch (node.getNodeType()) |
|
1661 |
{ |
|
1662 |
case Node.DOCUMENT_FRAGMENT_NODE : |
|
1663 |
case Node.DOCUMENT_NODE : |
|
1664 |
case Node.ELEMENT_NODE : |
|
1665 |
{ |
|
1666 |
for (Node child = node.getFirstChild(); null != child; |
|
1667 |
child = child.getNextSibling()) |
|
1668 |
{ |
|
1669 |
dispatchNodeData(child, ch, depth+1); |
|
1670 |
} |
|
1671 |
} |
|
1672 |
break; |
|
1673 |
case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW% |
|
1674 |
case Node.COMMENT_NODE : |
|
1675 |
if(0 != depth) |
|
1676 |
break; |
|
1677 |
// NOTE: Because this operation works in the DOM space, it does _not_ attempt |
|
1678 |
// to perform Text Coalition. That should only be done in DTM space. |
|
1679 |
case Node.TEXT_NODE : |
|
1680 |
case Node.CDATA_SECTION_NODE : |
|
1681 |
case Node.ATTRIBUTE_NODE : |
|
1682 |
String str = node.getNodeValue(); |
|
1683 |
if(ch instanceof CharacterNodeHandler) |
|
1684 |
{ |
|
1685 |
((CharacterNodeHandler)ch).characters(node); |
|
1686 |
} |
|
1687 |
else |
|
1688 |
{ |
|
1689 |
ch.characters(str.toCharArray(), 0, str.length()); |
|
1690 |
} |
|
1691 |
break; |
|
1692 |
// /* case Node.PROCESSING_INSTRUCTION_NODE : |
|
1693 |
// // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); |
|
1694 |
// break; */ |
|
1695 |
default : |
|
1696 |
// ignore |
|
1697 |
break; |
|
1698 |
} |
|
1699 |
} |
|
1700 |
||
1701 |
TreeWalker m_walker = new TreeWalker(null); |
|
1702 |
||
1703 |
/** |
|
1704 |
* Directly create SAX parser events from a subtree. |
|
1705 |
* |
|
1706 |
* @param nodeHandle The node ID. |
|
1707 |
* @param ch A non-null reference to a ContentHandler. |
|
1708 |
* |
|
1709 |
* @throws org.xml.sax.SAXException |
|
1710 |
*/ |
|
1711 |
public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch) |
|
1712 |
throws org.xml.sax.SAXException |
|
1713 |
{ |
|
1714 |
TreeWalker treeWalker = m_walker; |
|
1715 |
ContentHandler prevCH = treeWalker.getContentHandler(); |
|
1716 |
||
1717 |
if(null != prevCH) |
|
1718 |
{ |
|
1719 |
treeWalker = new TreeWalker(null); |
|
1720 |
} |
|
1721 |
treeWalker.setContentHandler(ch); |
|
1722 |
||
1723 |
try |
|
1724 |
{ |
|
1725 |
Node node = getNode(nodeHandle); |
|
12458 | 1726 |
treeWalker.traverseFragment(node); |
6 | 1727 |
} |
1728 |
finally |
|
1729 |
{ |
|
1730 |
treeWalker.setContentHandler(null); |
|
1731 |
} |
|
1732 |
} |
|
1733 |
||
1734 |
public interface CharacterNodeHandler |
|
1735 |
{ |
|
1736 |
public void characters(Node node) |
|
1737 |
throws org.xml.sax.SAXException; |
|
1738 |
} |
|
1739 |
||
1740 |
/** |
|
1741 |
* For the moment all the run time properties are ignored by this |
|
1742 |
* class. |
|
1743 |
* |
|
1744 |
* @param property a <code>String</code> value |
|
1745 |
* @param value an <code>Object</code> value |
|
1746 |
*/ |
|
1747 |
public void setProperty(String property, Object value) |
|
1748 |
{ |
|
1749 |
} |
|
1750 |
||
1751 |
/** |
|
1752 |
* No source information is available for DOM2DTM, so return |
|
1753 |
* <code>null</code> here. |
|
1754 |
* |
|
1755 |
* @param node an <code>int</code> value |
|
1756 |
* @return null |
|
1757 |
*/ |
|
1758 |
public SourceLocator getSourceLocatorFor(int node) |
|
1759 |
{ |
|
1760 |
return null; |
|
1761 |
} |
|
1762 |
||
1763 |
} |