author | joehw |
Mon, 13 Oct 2014 14:09:03 -0700 | |
changeset 27111 | 7a491d709b83 |
parent 25868 | 686eef1e7a79 |
child 34461 | f346a0b2b7f9 |
permissions | -rw-r--r-- |
12005 | 1 |
/* |
27111
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
2 |
* Copyright (c) 2006, 2009, Oracle and/or its affiliates. All rights reserved. |
12005 | 3 |
*/ |
4 |
/* |
|
27111
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
5 |
* Licensed to the Apache Software Foundation (ASF) under one or more |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
6 |
* contributor license agreements. See the NOTICE file distributed with |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
7 |
* this work for additional information regarding copyright ownership. |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
8 |
* The ASF licenses this file to You under the Apache License, Version 2.0 |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
9 |
* (the "License"); you may not use this file except in compliance with |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
10 |
* the License. You may obtain a copy of the License at |
12005 | 11 |
* |
12 |
* http://www.apache.org/licenses/LICENSE-2.0 |
|
13 |
* |
|
14 |
* Unless required by applicable law or agreed to in writing, software |
|
15 |
* distributed under the License is distributed on an "AS IS" BASIS, |
|
16 |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
17 |
* See the License for the specific language governing permissions and |
|
18 |
* limitations under the License. |
|
19 |
*/ |
|
20 |
||
21 |
package com.sun.org.apache.xerces.internal.impl.xs.models; |
|
22 |
||
23 |
import com.sun.org.apache.xerces.internal.xni.QName; |
|
24 |
import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode; |
|
25 |
import com.sun.org.apache.xerces.internal.impl.dtd.models.CMStateSet; |
|
26 |
import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols; |
|
27 |
import com.sun.org.apache.xerces.internal.impl.xs.SubstitutionGroupHandler; |
|
28 |
import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl; |
|
29 |
import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl; |
|
30 |
import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl; |
|
31 |
import com.sun.org.apache.xerces.internal.impl.xs.XSWildcardDecl; |
|
32 |
import com.sun.org.apache.xerces.internal.impl.xs.XMLSchemaException; |
|
33 |
import com.sun.org.apache.xerces.internal.impl.xs.XSConstraints; |
|
34 |
import java.util.ArrayList; |
|
35 |
import java.util.HashMap; |
|
36 |
||
37 |
/** |
|
38 |
* DFAContentModel is the implementation of XSCMValidator that does |
|
39 |
* all of the non-trivial element content validation. This class does |
|
40 |
* the conversion from the regular expression to the DFA that |
|
41 |
* it then uses in its validation algorithm. |
|
42 |
* |
|
43 |
* @xerces.internal |
|
44 |
* |
|
45 |
* @author Neil Graham, IBM |
|
46 |
*/ |
|
47 |
public class XSDFACM |
|
48 |
implements XSCMValidator { |
|
49 |
||
50 |
// |
|
51 |
// Constants |
|
52 |
// |
|
53 |
private static final boolean DEBUG = false; |
|
54 |
||
55 |
// special strings |
|
56 |
||
57 |
// debugging |
|
58 |
||
59 |
/** Set to true to debug content model validation. */ |
|
60 |
private static final boolean DEBUG_VALIDATE_CONTENT = false; |
|
61 |
||
62 |
// |
|
63 |
// Data |
|
64 |
// |
|
65 |
||
66 |
/** |
|
67 |
* This is the map of unique input symbol elements to indices into |
|
68 |
* each state's per-input symbol transition table entry. This is part |
|
69 |
* of the built DFA information that must be kept around to do the |
|
70 |
* actual validation. Note tat since either XSElementDecl or XSParticleDecl object |
|
71 |
* can live here, we've got to use an Object. |
|
72 |
*/ |
|
73 |
private Object fElemMap[] = null; |
|
74 |
||
75 |
/** |
|
76 |
* This is a map of whether the element map contains information |
|
77 |
* related to ANY models. |
|
78 |
*/ |
|
79 |
private int fElemMapType[] = null; |
|
80 |
||
81 |
/** |
|
82 |
* id of the unique input symbol |
|
83 |
*/ |
|
84 |
private int fElemMapId[] = null; |
|
85 |
||
86 |
/** The element map size. */ |
|
87 |
private int fElemMapSize = 0; |
|
88 |
||
89 |
/** |
|
90 |
* This is an array of booleans, one per state (there are |
|
91 |
* fTransTableSize states in the DFA) that indicates whether that |
|
92 |
* state is a final state. |
|
93 |
*/ |
|
94 |
private boolean fFinalStateFlags[] = null; |
|
95 |
||
96 |
/** |
|
97 |
* The list of follow positions for each NFA position (i.e. for each |
|
98 |
* non-epsilon leaf node.) This is only used during the building of |
|
99 |
* the DFA, and is let go afterwards. |
|
100 |
*/ |
|
101 |
private CMStateSet fFollowList[] = null; |
|
102 |
||
103 |
/** |
|
104 |
* This is the head node of our intermediate representation. It is |
|
105 |
* only non-null during the building of the DFA (just so that it |
|
106 |
* does not have to be passed all around.) Once the DFA is built, |
|
107 |
* this is no longer required so its nulled out. |
|
108 |
*/ |
|
109 |
private CMNode fHeadNode = null; |
|
110 |
||
111 |
/** |
|
112 |
* The count of leaf nodes. This is an important number that set some |
|
113 |
* limits on the sizes of data structures in the DFA process. |
|
114 |
*/ |
|
115 |
private int fLeafCount = 0; |
|
116 |
||
117 |
/** |
|
118 |
* An array of non-epsilon leaf nodes, which is used during the DFA |
|
119 |
* build operation, then dropped. |
|
120 |
*/ |
|
121 |
private XSCMLeaf fLeafList[] = null; |
|
122 |
||
123 |
/** Array mapping ANY types to the leaf list. */ |
|
124 |
private int fLeafListType[] = null; |
|
125 |
||
126 |
/** |
|
127 |
* This is the transition table that is the main by product of all |
|
128 |
* of the effort here. It is an array of arrays of ints. The first |
|
129 |
* dimension is the number of states we end up with in the DFA. The |
|
130 |
* second dimensions is the number of unique elements in the content |
|
131 |
* model (fElemMapSize). Each entry in the second dimension indicates |
|
132 |
* the new state given that input for the first dimension's start |
|
133 |
* state. |
|
134 |
* <p> |
|
135 |
* The fElemMap array handles mapping from element indexes to |
|
136 |
* positions in the second dimension of the transition table. |
|
137 |
*/ |
|
138 |
private int fTransTable[][] = null; |
|
139 |
/** |
|
140 |
* Array containing occurence information for looping states |
|
141 |
* which use counters to check minOccurs/maxOccurs. |
|
142 |
*/ |
|
143 |
private Occurence [] fCountingStates = null; |
|
144 |
static final class Occurence { |
|
145 |
final int minOccurs; |
|
146 |
final int maxOccurs; |
|
147 |
final int elemIndex; |
|
148 |
public Occurence (XSCMRepeatingLeaf leaf, int elemIndex) { |
|
149 |
minOccurs = leaf.getMinOccurs(); |
|
150 |
maxOccurs = leaf.getMaxOccurs(); |
|
151 |
this.elemIndex = elemIndex; |
|
152 |
} |
|
153 |
public String toString() { |
|
154 |
return "minOccurs=" + minOccurs |
|
155 |
+ ";maxOccurs=" + |
|
156 |
((maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) |
|
157 |
? Integer.toString(maxOccurs) : "unbounded"); |
|
158 |
} |
|
159 |
} |
|
160 |
||
161 |
/** |
|
162 |
* The number of valid entries in the transition table, and in the other |
|
163 |
* related tables such as fFinalStateFlags. |
|
164 |
*/ |
|
165 |
private int fTransTableSize = 0; |
|
166 |
||
27111
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
167 |
private boolean fIsCompactedForUPA; |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
168 |
|
12005 | 169 |
/** |
170 |
* Array of counters for all the for elements (or wildcards) |
|
171 |
* of the form a{n,m} where n > 1 and m <= unbounded. Used |
|
172 |
* to count the a's to later check against n and m. Counter |
|
173 |
* set to -1 if element (or wildcard) not optimized by |
|
174 |
* constant space algorithm. |
|
175 |
*/ |
|
176 |
private int fElemMapCounter[]; |
|
177 |
||
178 |
/** |
|
179 |
* Array of lower bounds for all the for elements (or wildcards) |
|
180 |
* of the form a{n,m} where n > 1 and m <= unbounded. This array |
|
181 |
* stores the n's for those elements (or wildcards) for which |
|
182 |
* the constant space algorithm applies (or -1 otherwise). |
|
183 |
*/ |
|
184 |
private int fElemMapCounterLowerBound[]; |
|
185 |
||
186 |
/** |
|
187 |
* Array of upper bounds for all the for elements (or wildcards) |
|
188 |
* of the form a{n,m} where n > 1 and m <= unbounded. This array |
|
189 |
* stores the n's for those elements (or wildcards) for which |
|
190 |
* the constant space algorithm applies, or -1 if algorithm does |
|
191 |
* not apply or m = unbounded. |
|
192 |
*/ |
|
193 |
private int fElemMapCounterUpperBound[]; // -1 if no upper bound |
|
194 |
||
195 |
// temp variables |
|
196 |
||
197 |
// |
|
198 |
// Constructors |
|
199 |
// |
|
200 |
||
201 |
/** |
|
202 |
* Constructs a DFA content model. |
|
203 |
* |
|
204 |
* @param syntaxTree The syntax tree of the content model. |
|
205 |
* @param leafCount The number of leaves. |
|
206 |
* |
|
207 |
* @exception RuntimeException Thrown if DFA can't be built. |
|
208 |
*/ |
|
209 |
||
210 |
public XSDFACM(CMNode syntaxTree, int leafCount) { |
|
211 |
||
212 |
// Store away our index and pools in members |
|
213 |
fLeafCount = leafCount; |
|
214 |
||
215 |
// |
|
216 |
// Create some string pool indexes that represent the names of some |
|
217 |
// magical nodes in the syntax tree. |
|
218 |
// (already done in static initialization... |
|
219 |
// |
|
220 |
||
221 |
// |
|
222 |
// Ok, so lets grind through the building of the DFA. This method |
|
223 |
// handles the high level logic of the algorithm, but it uses a |
|
224 |
// number of helper classes to do its thing. |
|
225 |
// |
|
226 |
// In order to avoid having hundreds of references to the error and |
|
227 |
// string handlers around, this guy and all of his helper classes |
|
228 |
// just throw a simple exception and we then pass it along. |
|
229 |
// |
|
230 |
||
231 |
if(DEBUG_VALIDATE_CONTENT) { |
|
232 |
XSDFACM.time -= System.currentTimeMillis(); |
|
233 |
} |
|
234 |
||
235 |
buildDFA(syntaxTree); |
|
236 |
||
237 |
if(DEBUG_VALIDATE_CONTENT) { |
|
238 |
XSDFACM.time += System.currentTimeMillis(); |
|
239 |
System.out.println("DFA build: " + XSDFACM.time + "ms"); |
|
240 |
} |
|
241 |
} |
|
242 |
||
243 |
private static long time = 0; |
|
244 |
||
245 |
// |
|
246 |
// XSCMValidator methods |
|
247 |
// |
|
248 |
||
249 |
/** |
|
250 |
* check whether the given state is one of the final states |
|
251 |
* |
|
252 |
* @param state the state to check |
|
253 |
* |
|
254 |
* @return whether it's a final state |
|
255 |
*/ |
|
256 |
public boolean isFinalState (int state) { |
|
257 |
return (state < 0)? false : |
|
258 |
fFinalStateFlags[state]; |
|
259 |
} |
|
260 |
||
261 |
/** |
|
262 |
* one transition only |
|
263 |
* |
|
264 |
* @param curElem The current element's QName |
|
265 |
* @param state stack to store the previous state |
|
266 |
* @param subGroupHandler the substitution group handler |
|
267 |
* |
|
268 |
* @return null if transition is invalid; otherwise the Object corresponding to the |
|
269 |
* XSElementDecl or XSWildcardDecl identified. Also, the |
|
270 |
* state array will be modified to include the new state; this so that the validator can |
|
271 |
* store it away. |
|
272 |
* |
|
273 |
* @exception RuntimeException thrown on error |
|
274 |
*/ |
|
275 |
public Object oneTransition(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler) { |
|
276 |
int curState = state[0]; |
|
277 |
||
278 |
if(curState == XSCMValidator.FIRST_ERROR || curState == XSCMValidator.SUBSEQUENT_ERROR) { |
|
279 |
// there was an error last time; so just go find correct Object in fElemmMap. |
|
280 |
// ... after resetting state[0]. |
|
281 |
if(curState == XSCMValidator.FIRST_ERROR) |
|
282 |
state[0] = XSCMValidator.SUBSEQUENT_ERROR; |
|
283 |
||
284 |
return findMatchingDecl(curElem, subGroupHandler); |
|
285 |
} |
|
286 |
||
287 |
int nextState = 0; |
|
288 |
int elemIndex = 0; |
|
289 |
Object matchingDecl = null; |
|
290 |
||
291 |
for (; elemIndex < fElemMapSize; elemIndex++) { |
|
292 |
nextState = fTransTable[curState][elemIndex]; |
|
293 |
if (nextState == -1) |
|
294 |
continue; |
|
295 |
int type = fElemMapType[elemIndex] ; |
|
296 |
if (type == XSParticleDecl.PARTICLE_ELEMENT) { |
|
297 |
matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); |
|
298 |
if (matchingDecl != null) { |
|
299 |
// Increment counter if constant space algorithm applies |
|
300 |
if (fElemMapCounter[elemIndex] >= 0) { |
|
301 |
fElemMapCounter[elemIndex]++; |
|
302 |
} |
|
303 |
break; |
|
304 |
} |
|
305 |
} |
|
306 |
else if (type == XSParticleDecl.PARTICLE_WILDCARD) { |
|
307 |
if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) { |
|
308 |
matchingDecl = fElemMap[elemIndex]; |
|
309 |
// Increment counter if constant space algorithm applies |
|
310 |
if (fElemMapCounter[elemIndex] >= 0) { |
|
311 |
fElemMapCounter[elemIndex]++; |
|
312 |
} |
|
313 |
break; |
|
314 |
} |
|
315 |
} |
|
316 |
} |
|
317 |
||
318 |
// if we still can't find a match, set the state to first_error |
|
319 |
// and return null |
|
320 |
if (elemIndex == fElemMapSize) { |
|
321 |
state[1] = state[0]; |
|
322 |
state[0] = XSCMValidator.FIRST_ERROR; |
|
323 |
return findMatchingDecl(curElem, subGroupHandler); |
|
324 |
} |
|
325 |
||
326 |
if (fCountingStates != null) { |
|
327 |
Occurence o = fCountingStates[curState]; |
|
328 |
if (o != null) { |
|
329 |
if (curState == nextState) { |
|
330 |
if (++state[2] > o.maxOccurs && |
|
331 |
o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) { |
|
332 |
// It's likely that we looped too many times on the current state |
|
333 |
// however it's possible that we actually matched another particle |
|
334 |
// which allows the same name. |
|
335 |
// |
|
336 |
// Consider: |
|
337 |
// |
|
338 |
// <xs:sequence> |
|
339 |
// <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/> |
|
340 |
// <xs:element name="foo" type="xs:string" fixed="bar"/> |
|
341 |
// </xs:sequence> |
|
342 |
// |
|
343 |
// and |
|
344 |
// |
|
345 |
// <xs:sequence> |
|
346 |
// <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/> |
|
347 |
// <xs:any namespace="##any" processContents="skip"/> |
|
348 |
// </xs:sequence> |
|
349 |
// |
|
350 |
// In the DFA there will be two transitions from the current state which |
|
351 |
// allow "foo". Note that this is not a UPA violation. The ambiguity of which |
|
352 |
// transition to take is resolved by the current value of the counter. Since |
|
353 |
// we've already seen enough instances of the first "foo" perhaps there is |
|
354 |
// another element declaration or wildcard deeper in the element map which |
|
355 |
// matches. |
|
356 |
return findMatchingDecl(curElem, state, subGroupHandler, elemIndex); |
|
357 |
} |
|
358 |
} |
|
359 |
else if (state[2] < o.minOccurs) { |
|
360 |
// not enough loops on the current state. |
|
361 |
state[1] = state[0]; |
|
362 |
state[0] = XSCMValidator.FIRST_ERROR; |
|
363 |
return findMatchingDecl(curElem, subGroupHandler); |
|
364 |
} |
|
365 |
else { |
|
366 |
// Exiting a counting state. If we're entering a new |
|
367 |
// counting state, reset the counter. |
|
368 |
o = fCountingStates[nextState]; |
|
369 |
if (o != null) { |
|
370 |
state[2] = (elemIndex == o.elemIndex) ? 1 : 0; |
|
371 |
} |
|
372 |
} |
|
373 |
} |
|
374 |
else { |
|
375 |
o = fCountingStates[nextState]; |
|
376 |
if (o != null) { |
|
377 |
// Entering a new counting state. Reset the counter. |
|
378 |
// If we've already seen one instance of the looping |
|
379 |
// particle set the counter to 1, otherwise set it |
|
380 |
// to 0. |
|
381 |
state[2] = (elemIndex == o.elemIndex) ? 1 : 0; |
|
382 |
} |
|
383 |
} |
|
384 |
} |
|
385 |
||
386 |
state[0] = nextState; |
|
387 |
return matchingDecl; |
|
388 |
} // oneTransition(QName, int[], SubstitutionGroupHandler): Object |
|
389 |
||
390 |
Object findMatchingDecl(QName curElem, SubstitutionGroupHandler subGroupHandler) { |
|
391 |
Object matchingDecl = null; |
|
392 |
||
393 |
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { |
|
394 |
int type = fElemMapType[elemIndex] ; |
|
395 |
if (type == XSParticleDecl.PARTICLE_ELEMENT) { |
|
396 |
matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); |
|
397 |
if (matchingDecl != null) { |
|
398 |
return matchingDecl; |
|
399 |
} |
|
400 |
} |
|
401 |
else if (type == XSParticleDecl.PARTICLE_WILDCARD) { |
|
402 |
if(((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) |
|
403 |
return fElemMap[elemIndex]; |
|
404 |
} |
|
405 |
} |
|
406 |
||
407 |
return null; |
|
408 |
} // findMatchingDecl(QName, SubstitutionGroupHandler): Object |
|
409 |
||
410 |
Object findMatchingDecl(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler, int elemIndex) { |
|
411 |
||
412 |
int curState = state[0]; |
|
413 |
int nextState = 0; |
|
414 |
Object matchingDecl = null; |
|
415 |
||
416 |
while (++elemIndex < fElemMapSize) { |
|
417 |
nextState = fTransTable[curState][elemIndex]; |
|
418 |
if (nextState == -1) |
|
419 |
continue; |
|
420 |
int type = fElemMapType[elemIndex] ; |
|
421 |
if (type == XSParticleDecl.PARTICLE_ELEMENT) { |
|
422 |
matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); |
|
423 |
if (matchingDecl != null) { |
|
424 |
break; |
|
425 |
} |
|
426 |
} |
|
427 |
else if (type == XSParticleDecl.PARTICLE_WILDCARD) { |
|
428 |
if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) { |
|
429 |
matchingDecl = fElemMap[elemIndex]; |
|
430 |
break; |
|
431 |
} |
|
432 |
} |
|
433 |
} |
|
434 |
||
435 |
// if we still can't find a match, set the state to FIRST_ERROR and return null |
|
436 |
if (elemIndex == fElemMapSize) { |
|
437 |
state[1] = state[0]; |
|
438 |
state[0] = XSCMValidator.FIRST_ERROR; |
|
439 |
return findMatchingDecl(curElem, subGroupHandler); |
|
440 |
} |
|
441 |
||
442 |
// if we found a match, set the next state and reset the |
|
443 |
// counter if the next state is a counting state. |
|
444 |
state[0] = nextState; |
|
445 |
final Occurence o = fCountingStates[nextState]; |
|
446 |
if (o != null) { |
|
447 |
state[2] = (elemIndex == o.elemIndex) ? 1 : 0; |
|
448 |
} |
|
449 |
return matchingDecl; |
|
450 |
} // findMatchingDecl(QName, int[], SubstitutionGroupHandler, int): Object |
|
451 |
||
452 |
// This method returns the start states of the content model. |
|
453 |
public int[] startContentModel() { |
|
454 |
// Clear all constant space algorithm counters in use |
|
455 |
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { |
|
456 |
if (fElemMapCounter[elemIndex] != -1) { |
|
457 |
fElemMapCounter[elemIndex] = 0; |
|
458 |
} |
|
459 |
} |
|
460 |
// [0] : the current state |
|
461 |
// [1] : if [0] is an error state then the |
|
462 |
// last valid state before the error |
|
463 |
// [2] : occurence counter for counting states |
|
464 |
return new int [3]; |
|
465 |
} // startContentModel():int[] |
|
466 |
||
467 |
// this method returns whether the last state was a valid final state |
|
468 |
public boolean endContentModel(int[] state) { |
|
469 |
final int curState = state[0]; |
|
470 |
if (fFinalStateFlags[curState]) { |
|
471 |
if (fCountingStates != null) { |
|
472 |
Occurence o = fCountingStates[curState]; |
|
473 |
if (o != null && state[2] < o.minOccurs) { |
|
474 |
// not enough loops on the current state to be considered final. |
|
475 |
return false; |
|
476 |
} |
|
477 |
} |
|
478 |
return true; |
|
479 |
} |
|
480 |
return false; |
|
481 |
} // endContentModel(int[]): boolean |
|
482 |
||
483 |
// Killed off whatCanGoHere; we may need it for DOM canInsert(...) etc., |
|
484 |
// but we can put it back later. |
|
485 |
||
486 |
// |
|
487 |
// Private methods |
|
488 |
// |
|
489 |
||
490 |
/** |
|
491 |
* Builds the internal DFA transition table from the given syntax tree. |
|
492 |
* |
|
493 |
* @param syntaxTree The syntax tree. |
|
494 |
* |
|
495 |
* @exception RuntimeException Thrown if DFA cannot be built. |
|
496 |
*/ |
|
497 |
private void buildDFA(CMNode syntaxTree) { |
|
498 |
// |
|
499 |
// The first step we need to take is to rewrite the content model |
|
500 |
// using our CMNode objects, and in the process get rid of any |
|
501 |
// repetition short cuts, converting them into '*' style repetitions |
|
502 |
// or getting rid of repetitions altogether. |
|
503 |
// |
|
504 |
// The conversions done are: |
|
505 |
// |
|
506 |
// x+ -> (x|x*) |
|
507 |
// x? -> (x|epsilon) |
|
508 |
// |
|
509 |
// This is a relatively complex scenario. What is happening is that |
|
510 |
// we create a top level binary node of which the special EOC value |
|
511 |
// is set as the right side node. The the left side is set to the |
|
512 |
// rewritten syntax tree. The source is the original content model |
|
513 |
// info from the decl pool. The rewrite is done by buildSyntaxTree() |
|
514 |
// which recurses the decl pool's content of the element and builds |
|
515 |
// a new tree in the process. |
|
516 |
// |
|
517 |
// Note that, during this operation, we set each non-epsilon leaf |
|
518 |
// node's DFA state position and count the number of such leafs, which |
|
519 |
// is left in the fLeafCount member. |
|
520 |
// |
|
521 |
// The nodeTmp object is passed in just as a temp node to use during |
|
522 |
// the recursion. Otherwise, we'd have to create a new node on every |
|
523 |
// level of recursion, which would be piggy in Java (as is everything |
|
524 |
// for that matter.) |
|
525 |
// |
|
526 |
||
527 |
/* MODIFIED (Jan, 2001) |
|
528 |
* |
|
529 |
* Use following rules. |
|
530 |
* nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x) |
|
531 |
* nullable(x?) := true, first(x?) := first(x), last(x?) := last(x) |
|
532 |
* |
|
533 |
* The same computation of follow as x* is applied to x+ |
|
534 |
* |
|
535 |
* The modification drastically reduces computation time of |
|
536 |
* "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+" |
|
537 |
*/ |
|
538 |
||
539 |
// |
|
540 |
// And handle specially the EOC node, which also must be numbered |
|
541 |
// and counted as a non-epsilon leaf node. It could not be handled |
|
542 |
// in the above tree build because it was created before all that |
|
543 |
// started. We save the EOC position since its used during the DFA |
|
544 |
// building loop. |
|
545 |
// |
|
546 |
int EOCPos = fLeafCount; |
|
547 |
XSCMLeaf nodeEOC = new XSCMLeaf(XSParticleDecl.PARTICLE_ELEMENT, null, -1, fLeafCount++); |
|
548 |
fHeadNode = new XSCMBinOp( |
|
549 |
XSModelGroupImpl.MODELGROUP_SEQUENCE, |
|
550 |
syntaxTree, |
|
551 |
nodeEOC |
|
552 |
); |
|
553 |
||
554 |
// |
|
555 |
// Ok, so now we have to iterate the new tree and do a little more |
|
556 |
// work now that we know the leaf count. One thing we need to do is |
|
557 |
// to calculate the first and last position sets of each node. This |
|
558 |
// is cached away in each of the nodes. |
|
559 |
// |
|
560 |
// Along the way we also set the leaf count in each node as the |
|
561 |
// maximum state count. They must know this in order to create their |
|
562 |
// first/last pos sets. |
|
563 |
// |
|
564 |
// We also need to build an array of references to the non-epsilon |
|
565 |
// leaf nodes. Since we iterate it in the same way as before, this |
|
566 |
// will put them in the array according to their position values. |
|
567 |
// |
|
568 |
fLeafList = new XSCMLeaf[fLeafCount]; |
|
569 |
fLeafListType = new int[fLeafCount]; |
|
570 |
postTreeBuildInit(fHeadNode); |
|
571 |
||
572 |
// |
|
573 |
// And, moving onward... We now need to build the follow position |
|
574 |
// sets for all the nodes. So we allocate an array of state sets, |
|
575 |
// one for each leaf node (i.e. each DFA position.) |
|
576 |
// |
|
577 |
fFollowList = new CMStateSet[fLeafCount]; |
|
578 |
for (int index = 0; index < fLeafCount; index++) |
|
579 |
fFollowList[index] = new CMStateSet(fLeafCount); |
|
580 |
calcFollowList(fHeadNode); |
|
581 |
// |
|
582 |
// And finally the big push... Now we build the DFA using all the |
|
583 |
// states and the tree we've built up. First we set up the various |
|
584 |
// data structures we are going to use while we do this. |
|
585 |
// |
|
586 |
// First of all we need an array of unique element names in our |
|
587 |
// content model. For each transition table entry, we need a set of |
|
588 |
// contiguous indices to represent the transitions for a particular |
|
589 |
// input element. So we need to a zero based range of indexes that |
|
590 |
// map to element types. This element map provides that mapping. |
|
591 |
// |
|
592 |
fElemMap = new Object[fLeafCount]; |
|
593 |
fElemMapType = new int[fLeafCount]; |
|
594 |
fElemMapId = new int[fLeafCount]; |
|
595 |
||
596 |
fElemMapCounter = new int[fLeafCount]; |
|
597 |
fElemMapCounterLowerBound = new int[fLeafCount]; |
|
598 |
fElemMapCounterUpperBound = new int[fLeafCount]; |
|
599 |
||
600 |
fElemMapSize = 0; |
|
601 |
Occurence [] elemOccurenceMap = null; |
|
602 |
||
603 |
for (int outIndex = 0; outIndex < fLeafCount; outIndex++) { |
|
604 |
// optimization from Henry Zongaro: |
|
605 |
//fElemMap[outIndex] = new Object (); |
|
606 |
fElemMap[outIndex] = null; |
|
607 |
||
608 |
int inIndex = 0; |
|
609 |
final int id = fLeafList[outIndex].getParticleId(); |
|
610 |
for (; inIndex < fElemMapSize; inIndex++) { |
|
611 |
if (id == fElemMapId[inIndex]) |
|
612 |
break; |
|
613 |
} |
|
614 |
||
615 |
// If it was not in the list, then add it, if not the EOC node |
|
616 |
if (inIndex == fElemMapSize) { |
|
617 |
XSCMLeaf leaf = fLeafList[outIndex]; |
|
618 |
fElemMap[fElemMapSize] = leaf.getLeaf(); |
|
619 |
if (leaf instanceof XSCMRepeatingLeaf) { |
|
620 |
if (elemOccurenceMap == null) { |
|
621 |
elemOccurenceMap = new Occurence[fLeafCount]; |
|
622 |
} |
|
623 |
elemOccurenceMap[fElemMapSize] = new Occurence((XSCMRepeatingLeaf) leaf, fElemMapSize); |
|
624 |
} |
|
625 |
||
626 |
fElemMapType[fElemMapSize] = fLeafListType[outIndex]; |
|
627 |
fElemMapId[fElemMapSize] = id; |
|
628 |
||
629 |
// Init counters and bounds for a{n,m} algorithm |
|
630 |
int[] bounds = (int[]) leaf.getUserData(); |
|
631 |
if (bounds != null) { |
|
632 |
fElemMapCounter[fElemMapSize] = 0; |
|
633 |
fElemMapCounterLowerBound[fElemMapSize] = bounds[0]; |
|
634 |
fElemMapCounterUpperBound[fElemMapSize] = bounds[1]; |
|
635 |
} else { |
|
636 |
fElemMapCounter[fElemMapSize] = -1; |
|
637 |
fElemMapCounterLowerBound[fElemMapSize] = -1; |
|
638 |
fElemMapCounterUpperBound[fElemMapSize] = -1; |
|
639 |
} |
|
640 |
||
641 |
fElemMapSize++; |
|
642 |
} |
|
643 |
} |
|
644 |
||
645 |
// the last entry in the element map must be the EOC element. |
|
646 |
// remove it from the map. |
|
647 |
if (DEBUG) { |
|
648 |
if (fElemMapId[fElemMapSize-1] != -1) |
|
649 |
System.err.println("interal error in DFA: last element is not EOC."); |
|
650 |
} |
|
651 |
fElemMapSize--; |
|
652 |
||
653 |
/*** |
|
654 |
* Optimization(Jan, 2001); We sort fLeafList according to |
|
655 |
* elemIndex which is *uniquely* associated to each leaf. |
|
656 |
* We are *assuming* that each element appears in at least one leaf. |
|
657 |
**/ |
|
658 |
||
659 |
int[] fLeafSorter = new int[fLeafCount + fElemMapSize]; |
|
660 |
int fSortCount = 0; |
|
661 |
||
662 |
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { |
|
663 |
final int id = fElemMapId[elemIndex]; |
|
664 |
for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) { |
|
665 |
if (id == fLeafList[leafIndex].getParticleId()) |
|
666 |
fLeafSorter[fSortCount++] = leafIndex; |
|
667 |
} |
|
668 |
fLeafSorter[fSortCount++] = -1; |
|
669 |
} |
|
670 |
||
671 |
/* Optimization(Jan, 2001) */ |
|
672 |
||
673 |
// |
|
674 |
// Next lets create some arrays, some that hold transient |
|
675 |
// information during the DFA build and some that are permament. |
|
676 |
// These are kind of sticky since we cannot know how big they will |
|
677 |
// get, but we don't want to use any Java collections because of |
|
678 |
// performance. |
|
679 |
// |
|
680 |
// Basically they will probably be about fLeafCount*2 on average, |
|
681 |
// but can be as large as 2^(fLeafCount*2), worst case. So we start |
|
682 |
// with fLeafCount*4 as a middle ground. This will be very unlikely |
|
683 |
// to ever have to expand, though it if does, the overhead will be |
|
684 |
// somewhat ugly. |
|
685 |
// |
|
686 |
int curArraySize = fLeafCount * 4; |
|
687 |
CMStateSet[] statesToDo = new CMStateSet[curArraySize]; |
|
688 |
fFinalStateFlags = new boolean[curArraySize]; |
|
689 |
fTransTable = new int[curArraySize][]; |
|
690 |
||
691 |
// |
|
692 |
// Ok we start with the initial set as the first pos set of the |
|
693 |
// head node (which is the seq node that holds the content model |
|
694 |
// and the EOC node.) |
|
695 |
// |
|
696 |
CMStateSet setT = fHeadNode.firstPos(); |
|
697 |
||
698 |
// |
|
699 |
// Init our two state flags. Basically the unmarked state counter |
|
700 |
// is always chasing the current state counter. When it catches up, |
|
701 |
// that means we made a pass through that did not add any new states |
|
702 |
// to the lists, at which time we are done. We could have used a |
|
703 |
// expanding array of flags which we used to mark off states as we |
|
704 |
// complete them, but this is easier though less readable maybe. |
|
705 |
// |
|
706 |
int unmarkedState = 0; |
|
707 |
int curState = 0; |
|
708 |
||
709 |
// |
|
710 |
// Init the first transition table entry, and put the initial state |
|
711 |
// into the states to do list, then bump the current state. |
|
712 |
// |
|
713 |
fTransTable[curState] = makeDefStateList(); |
|
714 |
statesToDo[curState] = setT; |
|
715 |
curState++; |
|
716 |
||
717 |
/* Optimization(Jan, 2001); This is faster for |
|
718 |
* a large content model such as, "(t001+|t002+|.... |t500+)". |
|
719 |
*/ |
|
720 |
||
721 |
HashMap stateTable = new HashMap(); |
|
722 |
||
723 |
/* Optimization(Jan, 2001) */ |
|
724 |
||
725 |
// |
|
726 |
// Ok, almost done with the algorithm... We now enter the |
|
727 |
// loop where we go until the states done counter catches up with |
|
728 |
// the states to do counter. |
|
729 |
// |
|
730 |
while (unmarkedState < curState) { |
|
731 |
// |
|
732 |
// Get the first unmarked state out of the list of states to do. |
|
733 |
// And get the associated transition table entry. |
|
734 |
// |
|
735 |
setT = statesToDo[unmarkedState]; |
|
736 |
int[] transEntry = fTransTable[unmarkedState]; |
|
737 |
||
738 |
// Mark this one final if it contains the EOC state |
|
739 |
fFinalStateFlags[unmarkedState] = setT.getBit(EOCPos); |
|
740 |
||
741 |
// Bump up the unmarked state count, marking this state done |
|
742 |
unmarkedState++; |
|
743 |
||
744 |
// Loop through each possible input symbol in the element map |
|
745 |
CMStateSet newSet = null; |
|
746 |
/* Optimization(Jan, 2001) */ |
|
747 |
int sorterIndex = 0; |
|
748 |
/* Optimization(Jan, 2001) */ |
|
749 |
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { |
|
750 |
// |
|
751 |
// Build up a set of states which is the union of all of |
|
752 |
// the follow sets of DFA positions that are in the current |
|
753 |
// state. If we gave away the new set last time through then |
|
754 |
// create a new one. Otherwise, zero out the existing one. |
|
755 |
// |
|
756 |
if (newSet == null) |
|
757 |
newSet = new CMStateSet(fLeafCount); |
|
758 |
else |
|
759 |
newSet.zeroBits(); |
|
760 |
||
761 |
/* Optimization(Jan, 2001) */ |
|
762 |
int leafIndex = fLeafSorter[sorterIndex++]; |
|
763 |
||
764 |
while (leafIndex != -1) { |
|
765 |
// If this leaf index (DFA position) is in the current set... |
|
766 |
if (setT.getBit(leafIndex)) { |
|
767 |
// |
|
768 |
// If this leaf is the current input symbol, then we |
|
769 |
// want to add its follow list to the set of states to |
|
770 |
// transition to from the current state. |
|
771 |
// |
|
772 |
newSet.union(fFollowList[leafIndex]); |
|
773 |
} |
|
774 |
||
775 |
leafIndex = fLeafSorter[sorterIndex++]; |
|
776 |
} |
|
777 |
/* Optimization(Jan, 2001) */ |
|
778 |
||
779 |
// |
|
780 |
// If this new set is not empty, then see if its in the list |
|
781 |
// of states to do. If not, then add it. |
|
782 |
// |
|
783 |
if (!newSet.isEmpty()) { |
|
784 |
// |
|
785 |
// Search the 'states to do' list to see if this new |
|
786 |
// state set is already in there. |
|
787 |
// |
|
788 |
||
789 |
/* Optimization(Jan, 2001) */ |
|
790 |
Integer stateObj = (Integer)stateTable.get(newSet); |
|
791 |
int stateIndex = (stateObj == null ? curState : stateObj.intValue()); |
|
792 |
/* Optimization(Jan, 2001) */ |
|
793 |
||
794 |
// If we did not find it, then add it |
|
795 |
if (stateIndex == curState) { |
|
796 |
// |
|
797 |
// Put this new state into the states to do and init |
|
798 |
// a new entry at the same index in the transition |
|
799 |
// table. |
|
800 |
// |
|
801 |
statesToDo[curState] = newSet; |
|
802 |
fTransTable[curState] = makeDefStateList(); |
|
803 |
||
804 |
/* Optimization(Jan, 2001) */ |
|
805 |
stateTable.put(newSet, new Integer(curState)); |
|
806 |
/* Optimization(Jan, 2001) */ |
|
807 |
||
808 |
// We now have a new state to do so bump the count |
|
809 |
curState++; |
|
810 |
||
811 |
// |
|
812 |
// Null out the new set to indicate we adopted it. |
|
813 |
// This will cause the creation of a new set on the |
|
814 |
// next time around the loop. |
|
815 |
// |
|
816 |
newSet = null; |
|
817 |
} |
|
818 |
||
819 |
// |
|
820 |
// Now set this state in the transition table's entry |
|
821 |
// for this element (using its index), with the DFA |
|
822 |
// state we will move to from the current state when we |
|
823 |
// see this input element. |
|
824 |
// |
|
825 |
transEntry[elemIndex] = stateIndex; |
|
826 |
||
827 |
// Expand the arrays if we're full |
|
828 |
if (curState == curArraySize) { |
|
829 |
// |
|
830 |
// Yikes, we overflowed the initial array size, so |
|
831 |
// we've got to expand all of these arrays. So adjust |
|
832 |
// up the size by 50% and allocate new arrays. |
|
833 |
// |
|
834 |
final int newSize = (int)(curArraySize * 1.5); |
|
835 |
CMStateSet[] newToDo = new CMStateSet[newSize]; |
|
836 |
boolean[] newFinalFlags = new boolean[newSize]; |
|
837 |
int[][] newTransTable = new int[newSize][]; |
|
838 |
||
839 |
// Copy over all of the existing content |
|
840 |
System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize); |
|
841 |
System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize); |
|
842 |
System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize); |
|
843 |
||
844 |
// Store the new array size |
|
845 |
curArraySize = newSize; |
|
846 |
statesToDo = newToDo; |
|
847 |
fFinalStateFlags = newFinalFlags; |
|
848 |
fTransTable = newTransTable; |
|
849 |
} |
|
850 |
} |
|
851 |
} |
|
852 |
} |
|
853 |
||
854 |
// |
|
855 |
// Fill in the occurence information for each looping state |
|
856 |
// if we're using counters. |
|
857 |
// |
|
858 |
if (elemOccurenceMap != null) { |
|
859 |
fCountingStates = new Occurence[curState]; |
|
860 |
for (int i = 0; i < curState; ++i) { |
|
861 |
int [] transitions = fTransTable[i]; |
|
862 |
for (int j = 0; j < transitions.length; ++j) { |
|
863 |
if (i == transitions[j]) { |
|
864 |
fCountingStates[i] = elemOccurenceMap[j]; |
|
865 |
break; |
|
866 |
} |
|
867 |
} |
|
868 |
} |
|
869 |
} |
|
870 |
||
871 |
// |
|
872 |
// And now we can say bye bye to the temp representation since we've |
|
873 |
// built the DFA. |
|
874 |
// |
|
875 |
if (DEBUG_VALIDATE_CONTENT) |
|
876 |
dumpTree(fHeadNode, 0); |
|
877 |
fHeadNode = null; |
|
878 |
fLeafList = null; |
|
879 |
fFollowList = null; |
|
880 |
fLeafListType = null; |
|
881 |
fElemMapId = null; |
|
882 |
} |
|
883 |
||
884 |
/** |
|
885 |
* Calculates the follow list of the current node. |
|
886 |
* |
|
887 |
* @param nodeCur The curent node. |
|
888 |
* |
|
889 |
* @exception RuntimeException Thrown if follow list cannot be calculated. |
|
890 |
*/ |
|
891 |
private void calcFollowList(CMNode nodeCur) { |
|
892 |
// Recurse as required |
|
893 |
if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) { |
|
894 |
// Recurse only |
|
895 |
calcFollowList(((XSCMBinOp)nodeCur).getLeft()); |
|
896 |
calcFollowList(((XSCMBinOp)nodeCur).getRight()); |
|
897 |
} |
|
898 |
else if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE) { |
|
899 |
// Recurse first |
|
900 |
calcFollowList(((XSCMBinOp)nodeCur).getLeft()); |
|
901 |
calcFollowList(((XSCMBinOp)nodeCur).getRight()); |
|
902 |
||
903 |
// |
|
904 |
// Now handle our level. We use our left child's last pos |
|
905 |
// set and our right child's first pos set, so go ahead and |
|
906 |
// get them ahead of time. |
|
907 |
// |
|
908 |
final CMStateSet last = ((XSCMBinOp)nodeCur).getLeft().lastPos(); |
|
909 |
final CMStateSet first = ((XSCMBinOp)nodeCur).getRight().firstPos(); |
|
910 |
||
911 |
// |
|
912 |
// Now, for every position which is in our left child's last set |
|
913 |
// add all of the states in our right child's first set to the |
|
914 |
// follow set for that position. |
|
915 |
// |
|
916 |
for (int index = 0; index < fLeafCount; index++) { |
|
917 |
if (last.getBit(index)) |
|
918 |
fFollowList[index].union(first); |
|
919 |
} |
|
920 |
} |
|
921 |
else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE |
|
922 |
|| nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE) { |
|
923 |
// Recurse first |
|
924 |
calcFollowList(((XSCMUniOp)nodeCur).getChild()); |
|
925 |
||
926 |
// |
|
927 |
// Now handle our level. We use our own first and last position |
|
928 |
// sets, so get them up front. |
|
929 |
// |
|
930 |
final CMStateSet first = nodeCur.firstPos(); |
|
931 |
final CMStateSet last = nodeCur.lastPos(); |
|
932 |
||
933 |
// |
|
934 |
// For every position which is in our last position set, add all |
|
935 |
// of our first position states to the follow set for that |
|
936 |
// position. |
|
937 |
// |
|
938 |
for (int index = 0; index < fLeafCount; index++) { |
|
939 |
if (last.getBit(index)) |
|
940 |
fFollowList[index].union(first); |
|
941 |
} |
|
942 |
} |
|
943 |
||
944 |
else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) { |
|
945 |
// Recurse only |
|
946 |
calcFollowList(((XSCMUniOp)nodeCur).getChild()); |
|
947 |
} |
|
948 |
||
949 |
} |
|
950 |
||
951 |
/** |
|
952 |
* Dumps the tree of the current node to standard output. |
|
953 |
* |
|
954 |
* @param nodeCur The current node. |
|
955 |
* @param level The maximum levels to output. |
|
956 |
* |
|
957 |
* @exception RuntimeException Thrown on error. |
|
958 |
*/ |
|
959 |
private void dumpTree(CMNode nodeCur, int level) { |
|
960 |
for (int index = 0; index < level; index++) |
|
961 |
System.out.print(" "); |
|
962 |
||
963 |
int type = nodeCur.type(); |
|
964 |
||
965 |
switch(type ) { |
|
966 |
||
967 |
case XSModelGroupImpl.MODELGROUP_CHOICE: |
|
968 |
case XSModelGroupImpl.MODELGROUP_SEQUENCE: { |
|
969 |
if (type == XSModelGroupImpl.MODELGROUP_CHOICE) |
|
970 |
System.out.print("Choice Node "); |
|
971 |
else |
|
972 |
System.out.print("Seq Node "); |
|
973 |
||
974 |
if (nodeCur.isNullable()) |
|
975 |
System.out.print("Nullable "); |
|
976 |
||
977 |
System.out.print("firstPos="); |
|
978 |
System.out.print(nodeCur.firstPos().toString()); |
|
979 |
System.out.print(" lastPos="); |
|
980 |
System.out.println(nodeCur.lastPos().toString()); |
|
981 |
||
982 |
dumpTree(((XSCMBinOp)nodeCur).getLeft(), level+1); |
|
983 |
dumpTree(((XSCMBinOp)nodeCur).getRight(), level+1); |
|
984 |
break; |
|
985 |
} |
|
986 |
case XSParticleDecl.PARTICLE_ZERO_OR_MORE: |
|
987 |
case XSParticleDecl.PARTICLE_ONE_OR_MORE: |
|
988 |
case XSParticleDecl.PARTICLE_ZERO_OR_ONE: { |
|
989 |
System.out.print("Rep Node "); |
|
990 |
||
991 |
if (nodeCur.isNullable()) |
|
992 |
System.out.print("Nullable "); |
|
993 |
||
994 |
System.out.print("firstPos="); |
|
995 |
System.out.print(nodeCur.firstPos().toString()); |
|
996 |
System.out.print(" lastPos="); |
|
997 |
System.out.println(nodeCur.lastPos().toString()); |
|
998 |
||
999 |
dumpTree(((XSCMUniOp)nodeCur).getChild(), level+1); |
|
1000 |
break; |
|
1001 |
} |
|
1002 |
case XSParticleDecl.PARTICLE_ELEMENT: { |
|
1003 |
System.out.print |
|
1004 |
( |
|
1005 |
"Leaf: (pos=" |
|
1006 |
+ ((XSCMLeaf)nodeCur).getPosition() |
|
1007 |
+ "), " |
|
1008 |
+ "(elemIndex=" |
|
1009 |
+ ((XSCMLeaf)nodeCur).getLeaf() |
|
1010 |
+ ") " |
|
1011 |
); |
|
1012 |
||
1013 |
if (nodeCur.isNullable()) |
|
1014 |
System.out.print(" Nullable "); |
|
1015 |
||
1016 |
System.out.print("firstPos="); |
|
1017 |
System.out.print(nodeCur.firstPos().toString()); |
|
1018 |
System.out.print(" lastPos="); |
|
1019 |
System.out.println(nodeCur.lastPos().toString()); |
|
1020 |
break; |
|
1021 |
} |
|
1022 |
case XSParticleDecl.PARTICLE_WILDCARD: |
|
1023 |
System.out.print("Any Node: "); |
|
1024 |
||
1025 |
System.out.print("firstPos="); |
|
1026 |
System.out.print(nodeCur.firstPos().toString()); |
|
1027 |
System.out.print(" lastPos="); |
|
1028 |
System.out.println(nodeCur.lastPos().toString()); |
|
1029 |
break; |
|
1030 |
default: { |
|
1031 |
throw new RuntimeException("ImplementationMessages.VAL_NIICM"); |
|
1032 |
} |
|
1033 |
} |
|
1034 |
||
1035 |
} |
|
1036 |
||
1037 |
||
1038 |
/** |
|
1039 |
* -1 is used to represent bad transitions in the transition table |
|
1040 |
* entry for each state. So each entry is initialized to an all -1 |
|
1041 |
* array. This method creates a new entry and initializes it. |
|
1042 |
*/ |
|
1043 |
private int[] makeDefStateList() |
|
1044 |
{ |
|
1045 |
int[] retArray = new int[fElemMapSize]; |
|
1046 |
for (int index = 0; index < fElemMapSize; index++) |
|
1047 |
retArray[index] = -1; |
|
1048 |
return retArray; |
|
1049 |
} |
|
1050 |
||
1051 |
/** Post tree build initialization. */ |
|
1052 |
private void postTreeBuildInit(CMNode nodeCur) throws RuntimeException { |
|
1053 |
// Set the maximum states on this node |
|
1054 |
nodeCur.setMaxStates(fLeafCount); |
|
1055 |
||
1056 |
XSCMLeaf leaf = null; |
|
1057 |
int pos = 0; |
|
1058 |
// Recurse as required |
|
1059 |
if (nodeCur.type() == XSParticleDecl.PARTICLE_WILDCARD) { |
|
1060 |
leaf = (XSCMLeaf)nodeCur; |
|
1061 |
pos = leaf.getPosition(); |
|
1062 |
fLeafList[pos] = leaf; |
|
1063 |
fLeafListType[pos] = XSParticleDecl.PARTICLE_WILDCARD; |
|
1064 |
} |
|
1065 |
else if ((nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) || |
|
1066 |
(nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE)) { |
|
1067 |
postTreeBuildInit(((XSCMBinOp)nodeCur).getLeft()); |
|
1068 |
postTreeBuildInit(((XSCMBinOp)nodeCur).getRight()); |
|
1069 |
} |
|
1070 |
else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE || |
|
1071 |
nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE || |
|
1072 |
nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) { |
|
1073 |
postTreeBuildInit(((XSCMUniOp)nodeCur).getChild()); |
|
1074 |
} |
|
1075 |
else if (nodeCur.type() == XSParticleDecl.PARTICLE_ELEMENT) { |
|
1076 |
// Put this node in the leaf list at the current index if its |
|
1077 |
// a non-epsilon leaf. |
|
1078 |
leaf = (XSCMLeaf)nodeCur; |
|
1079 |
pos = leaf.getPosition(); |
|
1080 |
fLeafList[pos] = leaf; |
|
1081 |
fLeafListType[pos] = XSParticleDecl.PARTICLE_ELEMENT; |
|
1082 |
} |
|
1083 |
else { |
|
1084 |
throw new RuntimeException("ImplementationMessages.VAL_NIICM"); |
|
1085 |
} |
|
1086 |
} |
|
1087 |
||
1088 |
/** |
|
1089 |
* check whether this content violates UPA constraint. |
|
1090 |
* |
|
1091 |
* @param subGroupHandler the substitution group handler |
|
1092 |
* @return true if this content model contains other or list wildcard |
|
1093 |
*/ |
|
1094 |
public boolean checkUniqueParticleAttribution(SubstitutionGroupHandler subGroupHandler) throws XMLSchemaException { |
|
1095 |
// Unique Particle Attribution |
|
1096 |
// store the conflict results between any two elements in fElemMap |
|
1097 |
// 0: not compared; -1: no conflict; 1: conflict |
|
1098 |
// initialize the conflict table (all 0 initially) |
|
1099 |
byte conflictTable[][] = new byte[fElemMapSize][fElemMapSize]; |
|
1100 |
||
1101 |
// for each state, check whether it has overlap transitions |
|
1102 |
for (int i = 0; i < fTransTable.length && fTransTable[i] != null; i++) { |
|
1103 |
for (int j = 0; j < fElemMapSize; j++) { |
|
1104 |
for (int k = j+1; k < fElemMapSize; k++) { |
|
1105 |
if (fTransTable[i][j] != -1 && |
|
1106 |
fTransTable[i][k] != -1) { |
|
1107 |
if (conflictTable[j][k] == 0) { |
|
1108 |
if (XSConstraints.overlapUPA |
|
1109 |
(fElemMap[j], fElemMap[k], |
|
1110 |
subGroupHandler)) { |
|
1111 |
if (fCountingStates != null) { |
|
1112 |
Occurence o = fCountingStates[i]; |
|
1113 |
// If "i" is a counting state and exactly one of the transitions |
|
1114 |
// loops back to "i" then the two particles do not overlap if |
|
1115 |
// minOccurs == maxOccurs. |
|
1116 |
if (o != null && |
|
1117 |
fTransTable[i][j] == i ^ fTransTable[i][k] == i && |
|
1118 |
o.minOccurs == o.maxOccurs) { |
|
1119 |
conflictTable[j][k] = (byte) -1; |
|
1120 |
continue; |
|
1121 |
} |
|
1122 |
} |
|
1123 |
conflictTable[j][k] = (byte) 1; |
|
1124 |
} |
|
1125 |
else { |
|
1126 |
conflictTable[j][k] = (byte) -1; |
|
1127 |
} |
|
1128 |
} |
|
1129 |
} |
|
1130 |
} |
|
1131 |
} |
|
1132 |
} |
|
1133 |
||
1134 |
// report all errors |
|
1135 |
for (int i = 0; i < fElemMapSize; i++) { |
|
1136 |
for (int j = 0; j < fElemMapSize; j++) { |
|
1137 |
if (conflictTable[i][j] == 1) { |
|
1138 |
//errors.newError("cos-nonambig", new Object[]{fElemMap[i].toString(), |
|
1139 |
// fElemMap[j].toString()}); |
|
1140 |
// REVISIT: do we want to report all errors? or just one? |
|
1141 |
throw new XMLSchemaException("cos-nonambig", new Object[]{fElemMap[i].toString(), |
|
1142 |
fElemMap[j].toString()}); |
|
1143 |
} |
|
1144 |
} |
|
1145 |
} |
|
1146 |
||
1147 |
// if there is a other or list wildcard, we need to check this CM |
|
1148 |
// again, if this grammar is cached. |
|
1149 |
for (int i = 0; i < fElemMapSize; i++) { |
|
1150 |
if (fElemMapType[i] == XSParticleDecl.PARTICLE_WILDCARD) { |
|
1151 |
XSWildcardDecl wildcard = (XSWildcardDecl)fElemMap[i]; |
|
1152 |
if (wildcard.fType == XSWildcardDecl.NSCONSTRAINT_LIST || |
|
1153 |
wildcard.fType == XSWildcardDecl.NSCONSTRAINT_NOT) { |
|
1154 |
return true; |
|
1155 |
} |
|
1156 |
} |
|
1157 |
} |
|
1158 |
||
1159 |
return false; |
|
1160 |
} |
|
1161 |
||
1162 |
/** |
|
1163 |
* Check which elements are valid to appear at this point. This method also |
|
1164 |
* works if the state is in error, in which case it returns what should |
|
1165 |
* have been seen. |
|
1166 |
* |
|
1167 |
* @param state the current state |
|
27111
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1168 |
* @return a list whose entries are instances of |
12005 | 1169 |
* either XSWildcardDecl or XSElementDecl. |
1170 |
*/ |
|
27111
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1171 |
public ArrayList whatCanGoHere(int[] state) { |
12005 | 1172 |
int curState = state[0]; |
1173 |
if (curState < 0) |
|
1174 |
curState = state[1]; |
|
1175 |
Occurence o = (fCountingStates != null) ? |
|
1176 |
fCountingStates[curState] : null; |
|
1177 |
int count = state[2]; |
|
1178 |
||
27111
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1179 |
ArrayList ret = new ArrayList(); |
12005 | 1180 |
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { |
1181 |
int nextState = fTransTable[curState][elemIndex]; |
|
1182 |
if (nextState != -1) { |
|
1183 |
if (o != null) { |
|
1184 |
if (curState == nextState) { |
|
1185 |
// Do not include transitions which loop back to the |
|
1186 |
// current state if we've looped the maximum number |
|
1187 |
// of times or greater. |
|
1188 |
if (count >= o.maxOccurs && |
|
1189 |
o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) { |
|
1190 |
continue; |
|
1191 |
} |
|
1192 |
} |
|
1193 |
// Do not include transitions which advance past the |
|
1194 |
// current state if we have not looped enough times. |
|
1195 |
else if (count < o.minOccurs) { |
|
1196 |
continue; |
|
1197 |
} |
|
1198 |
} |
|
27111
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1199 |
ret.add(fElemMap[elemIndex]); |
12005 | 1200 |
} |
1201 |
} |
|
1202 |
return ret; |
|
1203 |
} |
|
1204 |
||
1205 |
/** |
|
1206 |
* Used by constant space algorithm for a{n,m} for n > 1 and |
|
1207 |
* m <= unbounded. Called by a validator if validation of |
|
1208 |
* countent model succeeds after subsuming a{n,m} to a* |
|
1209 |
* (or a+) to check the n and m bounds. |
|
1210 |
* Returns <code>null</code> if validation of bounds is |
|
1211 |
* successful. Returns a list of strings with error info |
|
1212 |
* if not. Even entries in list returned are error codes |
|
1213 |
* (used to look up properties) and odd entries are parameters |
|
1214 |
* to be passed when formatting error message. Each parameter |
|
1215 |
* is associated with the error code that preceeds it in |
|
1216 |
* the list. |
|
1217 |
*/ |
|
1218 |
public ArrayList checkMinMaxBounds() { |
|
1219 |
ArrayList result = null; |
|
1220 |
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { |
|
1221 |
int count = fElemMapCounter[elemIndex]; |
|
1222 |
if (count == -1) { |
|
1223 |
continue; |
|
1224 |
} |
|
1225 |
final int minOccurs = fElemMapCounterLowerBound[elemIndex]; |
|
1226 |
final int maxOccurs = fElemMapCounterUpperBound[elemIndex]; |
|
1227 |
if (count < minOccurs) { |
|
1228 |
if (result == null) result = new ArrayList(); |
|
1229 |
result.add("cvc-complex-type.2.4.b"); |
|
1230 |
result.add("{" + fElemMap[elemIndex] + "}"); |
|
1231 |
} |
|
1232 |
if (maxOccurs != -1 && count > maxOccurs) { |
|
1233 |
if (result == null) result = new ArrayList(); |
|
27111
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1234 |
result.add("cvc-complex-type.2.4.d.1"); |
12005 | 1235 |
result.add("{" + fElemMap[elemIndex] + "}"); |
1236 |
} |
|
1237 |
} |
|
1238 |
return result; |
|
1239 |
} |
|
1240 |
||
27111
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1241 |
public int [] occurenceInfo(int[] state) { |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1242 |
if (fCountingStates != null) { |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1243 |
int curState = state[0]; |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1244 |
if (curState < 0) { |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1245 |
curState = state[1]; |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1246 |
} |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1247 |
Occurence o = fCountingStates[curState]; |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1248 |
if (o != null) { |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1249 |
int [] occurenceInfo = new int[4]; |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1250 |
occurenceInfo[0] = o.minOccurs; |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1251 |
occurenceInfo[1] = o.maxOccurs; |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1252 |
occurenceInfo[2] = state[2]; |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1253 |
occurenceInfo[3] = o.elemIndex; |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1254 |
return occurenceInfo; |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1255 |
} |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1256 |
} |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1257 |
return null; |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1258 |
} |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1259 |
|
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1260 |
public String getTermName(int termId) { |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1261 |
Object term = fElemMap[termId]; |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1262 |
return (term != null) ? term.toString() : null; |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1263 |
} |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1264 |
|
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1265 |
public boolean isCompactedForUPA() { |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1266 |
return fIsCompactedForUPA; |
7a491d709b83
8036951: Xerces Update: XMLSchemaValidator.java and XMLSchemaLoader.java
joehw
parents:
25868
diff
changeset
|
1267 |
} |
12005 | 1268 |
} // class DFAContentModel |