author | chegar |
Thu, 17 Oct 2019 20:54:25 +0100 | |
branch | datagramsocketimpl-branch |
changeset 58679 | 9c3209ff7550 |
parent 58678 | 9cf78a70fa4f |
parent 58022 | 12885822f0c5 |
permissions | -rw-r--r-- |
6 | 1 |
/* |
55575
25165403c62e
8223291: Whitespace is added to CDATA tags when using OutputKeys.INDENT to format XML
joehw
parents:
51786
diff
changeset
|
2 |
* Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved. |
6 | 3 |
*/ |
4 |
/* |
|
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
5 |
* Licensed to the Apache Software Foundation (ASF) under one or more |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
6 |
* contributor license agreements. See the NOTICE file distributed with |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
7 |
* this work for additional information regarding copyright ownership. |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
8 |
* The ASF licenses this file to You under the Apache License, Version 2.0 |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
9 |
* (the "License"); you may not use this file except in compliance with |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
10 |
* the License. You may obtain a copy of the License at |
6 | 11 |
* |
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
12 |
* http://www.apache.org/licenses/LICENSE-2.0 |
6 | 13 |
* |
14 |
* Unless required by applicable law or agreed to in writing, software |
|
15 |
* distributed under the License is distributed on an "AS IS" BASIS, |
|
16 |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
17 |
* See the License for the specific language governing permissions and |
|
18 |
* limitations under the License. |
|
19 |
*/ |
|
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
20 |
|
6 | 21 |
package com.sun.org.apache.xml.internal.serializer; |
22 |
||
23 |
import java.io.IOException; |
|
24 |
import java.util.Properties; |
|
25 |
||
26 |
import javax.xml.transform.Result; |
|
27 |
||
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
28 |
import org.xml.sax.Attributes; |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
29 |
import org.xml.sax.SAXException; |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
30 |
|
6 | 31 |
import com.sun.org.apache.xml.internal.serializer.utils.MsgKey; |
32 |
import com.sun.org.apache.xml.internal.serializer.utils.Utils; |
|
58022
12885822f0c5
8228854: Default ErrorListener reports warnings and errors to the console
joehw
parents:
55575
diff
changeset
|
33 |
import javax.xml.transform.ErrorListener; |
6 | 34 |
|
35 |
/** |
|
36 |
* This serializer takes a series of SAX or |
|
37 |
* SAX-like events and writes its output |
|
38 |
* to the given stream. |
|
39 |
* |
|
40 |
* This class is not a public API, it is public |
|
41 |
* because it is used from another package. |
|
42 |
* |
|
43 |
* @xsl.usage internal |
|
58022
12885822f0c5
8228854: Default ErrorListener reports warnings and errors to the console
joehw
parents:
55575
diff
changeset
|
44 |
* @LastModified: Aug 2019 |
6 | 45 |
*/ |
46 |
public final class ToHTMLStream extends ToStream |
|
47 |
{ |
|
48 |
||
49 |
/** This flag is set while receiving events from the DTD */ |
|
50 |
protected boolean m_inDTD = false; |
|
51 |
||
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
52 |
/** True if the previous element is a block element. */ |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
53 |
private boolean m_isprevblock = false; |
6 | 54 |
|
55 |
/** |
|
56 |
* Map that tells which XML characters should have special treatment, and it |
|
57 |
* provides character to entity name lookup. |
|
58 |
*/ |
|
12902
0a840d92fa30
7151118: Regressions on 7u4 b11 comp. 7u4 b06 on specjvm2008.xml.transform subbenchmark
joehw
parents:
12458
diff
changeset
|
59 |
private static final CharInfo m_htmlcharInfo = |
6 | 60 |
// new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE); |
23954 | 61 |
CharInfo.getCharInfoInternal(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML); |
6 | 62 |
|
63 |
/** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */ |
|
64 |
static final Trie m_elementFlags = new Trie(); |
|
65 |
||
66 |
static { |
|
67 |
initTagReference(m_elementFlags); |
|
68 |
} |
|
69 |
static void initTagReference(Trie m_elementFlags) { |
|
70 |
||
71 |
// HTML 4.0 loose DTD |
|
72 |
m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY)); |
|
73 |
m_elementFlags.put( |
|
74 |
"FRAME", |
|
75 |
new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); |
|
76 |
m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
77 |
m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
78 |
m_elementFlags.put( |
|
79 |
"ISINDEX", |
|
80 |
new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); |
|
81 |
m_elementFlags.put( |
|
82 |
"APPLET", |
|
83 |
new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE)); |
|
84 |
m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
85 |
m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
86 |
m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
87 |
||
88 |
// HTML 4.0 strict DTD |
|
89 |
m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); |
|
90 |
m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE)); |
|
91 |
m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE)); |
|
92 |
m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE)); |
|
93 |
m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE)); |
|
94 |
m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE)); |
|
95 |
m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE)); |
|
96 |
m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE)); |
|
97 |
m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE)); |
|
98 |
m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE)); |
|
99 |
m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE)); |
|
100 |
m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE)); |
|
101 |
m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE)); |
|
102 |
m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE)); |
|
103 |
m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE)); |
|
104 |
m_elementFlags.put( |
|
105 |
"SUP", |
|
106 |
new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); |
|
107 |
m_elementFlags.put( |
|
108 |
"SUB", |
|
109 |
new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); |
|
110 |
m_elementFlags.put( |
|
111 |
"SPAN", |
|
112 |
new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); |
|
113 |
m_elementFlags.put( |
|
114 |
"BDO", |
|
115 |
new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); |
|
116 |
m_elementFlags.put( |
|
117 |
"BR", |
|
118 |
new ElemDesc( |
|
119 |
0 |
|
120 |
| ElemDesc.SPECIAL |
|
121 |
| ElemDesc.ASPECIAL |
|
122 |
| ElemDesc.EMPTY |
|
123 |
| ElemDesc.BLOCK)); |
|
124 |
m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
125 |
m_elementFlags.put( |
|
126 |
"ADDRESS", |
|
127 |
new ElemDesc( |
|
128 |
0 |
|
129 |
| ElemDesc.BLOCK |
|
130 |
| ElemDesc.BLOCKFORM |
|
131 |
| ElemDesc.BLOCKFORMFIELDSET)); |
|
132 |
m_elementFlags.put( |
|
133 |
"DIV", |
|
134 |
new ElemDesc( |
|
135 |
0 |
|
136 |
| ElemDesc.BLOCK |
|
137 |
| ElemDesc.BLOCKFORM |
|
138 |
| ElemDesc.BLOCKFORMFIELDSET)); |
|
139 |
m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL)); |
|
140 |
m_elementFlags.put( |
|
141 |
"MAP", |
|
142 |
new ElemDesc( |
|
143 |
0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK)); |
|
144 |
m_elementFlags.put( |
|
145 |
"AREA", |
|
146 |
new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); |
|
147 |
m_elementFlags.put( |
|
148 |
"LINK", |
|
149 |
new ElemDesc( |
|
150 |
0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); |
|
151 |
m_elementFlags.put( |
|
152 |
"IMG", |
|
153 |
new ElemDesc( |
|
154 |
0 |
|
155 |
| ElemDesc.SPECIAL |
|
156 |
| ElemDesc.ASPECIAL |
|
157 |
| ElemDesc.EMPTY |
|
158 |
| ElemDesc.WHITESPACESENSITIVE)); |
|
159 |
m_elementFlags.put( |
|
160 |
"OBJECT", |
|
161 |
new ElemDesc( |
|
162 |
0 |
|
163 |
| ElemDesc.SPECIAL |
|
164 |
| ElemDesc.ASPECIAL |
|
165 |
| ElemDesc.HEADMISC |
|
166 |
| ElemDesc.WHITESPACESENSITIVE)); |
|
167 |
m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY)); |
|
168 |
m_elementFlags.put( |
|
169 |
"HR", |
|
170 |
new ElemDesc( |
|
171 |
0 |
|
172 |
| ElemDesc.BLOCK |
|
173 |
| ElemDesc.BLOCKFORM |
|
174 |
| ElemDesc.BLOCKFORMFIELDSET |
|
175 |
| ElemDesc.EMPTY)); |
|
176 |
m_elementFlags.put( |
|
177 |
"P", |
|
178 |
new ElemDesc( |
|
179 |
0 |
|
180 |
| ElemDesc.BLOCK |
|
181 |
| ElemDesc.BLOCKFORM |
|
182 |
| ElemDesc.BLOCKFORMFIELDSET)); |
|
183 |
m_elementFlags.put( |
|
184 |
"H1", |
|
185 |
new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); |
|
186 |
m_elementFlags.put( |
|
187 |
"H2", |
|
188 |
new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); |
|
189 |
m_elementFlags.put( |
|
190 |
"H3", |
|
191 |
new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); |
|
192 |
m_elementFlags.put( |
|
193 |
"H4", |
|
194 |
new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); |
|
195 |
m_elementFlags.put( |
|
196 |
"H5", |
|
197 |
new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); |
|
198 |
m_elementFlags.put( |
|
199 |
"H6", |
|
200 |
new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); |
|
201 |
m_elementFlags.put( |
|
202 |
"PRE", |
|
203 |
new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK)); |
|
204 |
m_elementFlags.put( |
|
205 |
"Q", |
|
206 |
new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); |
|
207 |
m_elementFlags.put( |
|
208 |
"BLOCKQUOTE", |
|
209 |
new ElemDesc( |
|
210 |
0 |
|
211 |
| ElemDesc.BLOCK |
|
212 |
| ElemDesc.BLOCKFORM |
|
213 |
| ElemDesc.BLOCKFORMFIELDSET)); |
|
214 |
m_elementFlags.put("INS", new ElemDesc(0)); |
|
215 |
m_elementFlags.put("DEL", new ElemDesc(0)); |
|
216 |
m_elementFlags.put( |
|
217 |
"DL", |
|
218 |
new ElemDesc( |
|
219 |
0 |
|
220 |
| ElemDesc.BLOCK |
|
221 |
| ElemDesc.BLOCKFORM |
|
222 |
| ElemDesc.BLOCKFORMFIELDSET)); |
|
223 |
m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
224 |
m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
225 |
m_elementFlags.put( |
|
226 |
"OL", |
|
227 |
new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); |
|
228 |
m_elementFlags.put( |
|
229 |
"UL", |
|
230 |
new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); |
|
231 |
m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
232 |
m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
233 |
m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL)); |
|
234 |
m_elementFlags.put( |
|
235 |
"INPUT", |
|
236 |
new ElemDesc( |
|
237 |
0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY)); |
|
238 |
m_elementFlags.put( |
|
239 |
"SELECT", |
|
240 |
new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); |
|
241 |
m_elementFlags.put("OPTGROUP", new ElemDesc(0)); |
|
242 |
m_elementFlags.put("OPTION", new ElemDesc(0)); |
|
243 |
m_elementFlags.put( |
|
244 |
"TEXTAREA", |
|
245 |
new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); |
|
246 |
m_elementFlags.put( |
|
247 |
"FIELDSET", |
|
248 |
new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM)); |
|
249 |
m_elementFlags.put("LEGEND", new ElemDesc(0)); |
|
250 |
m_elementFlags.put( |
|
251 |
"BUTTON", |
|
252 |
new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); |
|
253 |
m_elementFlags.put( |
|
254 |
"TABLE", |
|
255 |
new ElemDesc( |
|
256 |
0 |
|
257 |
| ElemDesc.BLOCK |
|
258 |
| ElemDesc.BLOCKFORM |
|
259 |
| ElemDesc.BLOCKFORMFIELDSET)); |
|
260 |
m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
261 |
m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
262 |
m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
263 |
m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
264 |
m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
265 |
m_elementFlags.put( |
|
266 |
"COL", |
|
267 |
new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); |
|
268 |
m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
269 |
m_elementFlags.put("TH", new ElemDesc(0)); |
|
270 |
m_elementFlags.put("TD", new ElemDesc(0)); |
|
271 |
m_elementFlags.put( |
|
272 |
"HEAD", |
|
273 |
new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM)); |
|
274 |
m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
275 |
m_elementFlags.put( |
|
276 |
"BASE", |
|
277 |
new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); |
|
278 |
m_elementFlags.put( |
|
279 |
"META", |
|
280 |
new ElemDesc( |
|
281 |
0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); |
|
282 |
m_elementFlags.put( |
|
283 |
"STYLE", |
|
284 |
new ElemDesc( |
|
285 |
0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK)); |
|
286 |
m_elementFlags.put( |
|
287 |
"SCRIPT", |
|
288 |
new ElemDesc( |
|
289 |
0 |
|
290 |
| ElemDesc.SPECIAL |
|
291 |
| ElemDesc.ASPECIAL |
|
292 |
| ElemDesc.HEADMISC |
|
293 |
| ElemDesc.RAW)); |
|
294 |
m_elementFlags.put( |
|
295 |
"NOSCRIPT", |
|
296 |
new ElemDesc( |
|
297 |
0 |
|
298 |
| ElemDesc.BLOCK |
|
299 |
| ElemDesc.BLOCKFORM |
|
300 |
| ElemDesc.BLOCKFORMFIELDSET)); |
|
301 |
m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK)); |
|
302 |
||
303 |
// From "John Ky" <hand@syd.speednet.com.au |
|
304 |
// Transitional Document Type Definition () |
|
305 |
// file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont |
|
306 |
m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); |
|
307 |
||
308 |
// file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE |
|
309 |
m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE)); |
|
310 |
m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE)); |
|
311 |
||
312 |
// file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U |
|
313 |
m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE)); |
|
314 |
||
315 |
// From "John Ky" <hand@syd.speednet.com.au |
|
316 |
m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE)); |
|
317 |
||
318 |
// HTML 4.0, section 16.5 |
|
319 |
m_elementFlags.put( |
|
320 |
"IFRAME", |
|
321 |
new ElemDesc( |
|
322 |
0 |
|
323 |
| ElemDesc.BLOCK |
|
324 |
| ElemDesc.BLOCKFORM |
|
325 |
| ElemDesc.BLOCKFORMFIELDSET)); |
|
326 |
||
327 |
// Netscape 4 extension |
|
328 |
m_elementFlags.put( |
|
329 |
"LAYER", |
|
330 |
new ElemDesc( |
|
331 |
0 |
|
332 |
| ElemDesc.BLOCK |
|
333 |
| ElemDesc.BLOCKFORM |
|
334 |
| ElemDesc.BLOCKFORMFIELDSET)); |
|
335 |
// Netscape 4 extension |
|
336 |
m_elementFlags.put( |
|
337 |
"ILAYER", |
|
338 |
new ElemDesc( |
|
339 |
0 |
|
340 |
| ElemDesc.BLOCK |
|
341 |
| ElemDesc.BLOCKFORM |
|
342 |
| ElemDesc.BLOCKFORMFIELDSET)); |
|
343 |
||
344 |
||
345 |
// NOW FOR ATTRIBUTE INFORMATION . . . |
|
346 |
ElemDesc elemDesc; |
|
347 |
||
348 |
||
349 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
350 |
elemDesc = (ElemDesc) m_elementFlags.get("a"); |
6 | 351 |
elemDesc.setAttr("HREF", ElemDesc.ATTRURL); |
352 |
elemDesc.setAttr("NAME", ElemDesc.ATTRURL); |
|
353 |
||
354 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
355 |
elemDesc = (ElemDesc) m_elementFlags.get("area"); |
6 | 356 |
elemDesc.setAttr("HREF", ElemDesc.ATTRURL); |
357 |
elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY); |
|
358 |
||
359 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
360 |
elemDesc = (ElemDesc) m_elementFlags.get("base"); |
6 | 361 |
elemDesc.setAttr("HREF", ElemDesc.ATTRURL); |
362 |
||
363 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
364 |
elemDesc = (ElemDesc) m_elementFlags.get("button"); |
6 | 365 |
elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); |
366 |
||
367 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
368 |
elemDesc = (ElemDesc) m_elementFlags.get("blockquote"); |
6 | 369 |
elemDesc.setAttr("CITE", ElemDesc.ATTRURL); |
370 |
||
371 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
372 |
elemDesc = (ElemDesc) m_elementFlags.get("del"); |
6 | 373 |
elemDesc.setAttr("CITE", ElemDesc.ATTRURL); |
374 |
||
375 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
376 |
elemDesc = (ElemDesc) m_elementFlags.get("dir"); |
6 | 377 |
elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); |
378 |
||
379 |
// ---------------------------------------------- |
|
380 |
||
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
381 |
elemDesc = (ElemDesc) m_elementFlags.get("div"); |
6 | 382 |
elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension |
383 |
elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension |
|
384 |
||
385 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
386 |
elemDesc = (ElemDesc) m_elementFlags.get("dl"); |
6 | 387 |
elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); |
388 |
||
389 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
390 |
elemDesc = (ElemDesc) m_elementFlags.get("form"); |
6 | 391 |
elemDesc.setAttr("ACTION", ElemDesc.ATTRURL); |
392 |
||
393 |
// ---------------------------------------------- |
|
394 |
// Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM> |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
395 |
elemDesc = (ElemDesc) m_elementFlags.get("frame"); |
6 | 396 |
elemDesc.setAttr("SRC", ElemDesc.ATTRURL); |
397 |
elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); |
|
398 |
elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY); |
|
399 |
||
400 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
401 |
elemDesc = (ElemDesc) m_elementFlags.get("head"); |
6 | 402 |
elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL); |
403 |
||
404 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
405 |
elemDesc = (ElemDesc) m_elementFlags.get("hr"); |
6 | 406 |
elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY); |
407 |
||
408 |
// ---------------------------------------------- |
|
409 |
// HTML 4.0, section 16.5 |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
410 |
elemDesc = (ElemDesc) m_elementFlags.get("iframe"); |
6 | 411 |
elemDesc.setAttr("SRC", ElemDesc.ATTRURL); |
412 |
elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); |
|
413 |
||
414 |
// ---------------------------------------------- |
|
415 |
// Netscape 4 extension |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
416 |
elemDesc = (ElemDesc) m_elementFlags.get("ilayer"); |
6 | 417 |
elemDesc.setAttr("SRC", ElemDesc.ATTRURL); |
418 |
||
419 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
420 |
elemDesc = (ElemDesc) m_elementFlags.get("img"); |
6 | 421 |
elemDesc.setAttr("SRC", ElemDesc.ATTRURL); |
422 |
elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); |
|
423 |
elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); |
|
424 |
elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); |
|
425 |
||
426 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
427 |
elemDesc = (ElemDesc) m_elementFlags.get("input"); |
6 | 428 |
elemDesc.setAttr("SRC", ElemDesc.ATTRURL); |
429 |
elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); |
|
430 |
elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY); |
|
431 |
elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); |
|
432 |
elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); |
|
433 |
elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); |
|
434 |
||
435 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
436 |
elemDesc = (ElemDesc) m_elementFlags.get("ins"); |
6 | 437 |
elemDesc.setAttr("CITE", ElemDesc.ATTRURL); |
438 |
||
439 |
// ---------------------------------------------- |
|
440 |
// Netscape 4 extension |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
441 |
elemDesc = (ElemDesc) m_elementFlags.get("layer"); |
6 | 442 |
elemDesc.setAttr("SRC", ElemDesc.ATTRURL); |
443 |
||
444 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
445 |
elemDesc = (ElemDesc) m_elementFlags.get("link"); |
6 | 446 |
elemDesc.setAttr("HREF", ElemDesc.ATTRURL); |
447 |
||
448 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
449 |
elemDesc = (ElemDesc) m_elementFlags.get("menu"); |
6 | 450 |
elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); |
451 |
||
452 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
453 |
elemDesc = (ElemDesc) m_elementFlags.get("object"); |
6 | 454 |
elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL); |
455 |
elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL); |
|
456 |
elemDesc.setAttr("DATA", ElemDesc.ATTRURL); |
|
457 |
elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL); |
|
458 |
elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); |
|
459 |
elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY); |
|
460 |
||
461 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
462 |
elemDesc = (ElemDesc) m_elementFlags.get("ol"); |
6 | 463 |
elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); |
464 |
||
465 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
466 |
elemDesc = (ElemDesc) m_elementFlags.get("optgroup"); |
6 | 467 |
elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); |
468 |
||
469 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
470 |
elemDesc = (ElemDesc) m_elementFlags.get("option"); |
6 | 471 |
elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY); |
472 |
elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); |
|
473 |
||
474 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
475 |
elemDesc = (ElemDesc) m_elementFlags.get("q"); |
6 | 476 |
elemDesc.setAttr("CITE", ElemDesc.ATTRURL); |
477 |
||
478 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
479 |
elemDesc = (ElemDesc) m_elementFlags.get("script"); |
6 | 480 |
elemDesc.setAttr("SRC", ElemDesc.ATTRURL); |
481 |
elemDesc.setAttr("FOR", ElemDesc.ATTRURL); |
|
482 |
elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY); |
|
483 |
||
484 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
485 |
elemDesc = (ElemDesc) m_elementFlags.get("select"); |
6 | 486 |
elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); |
487 |
elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY); |
|
488 |
||
489 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
490 |
elemDesc = (ElemDesc) m_elementFlags.get("table"); |
6 | 491 |
elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension |
492 |
||
493 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
494 |
elemDesc = (ElemDesc) m_elementFlags.get("td"); |
6 | 495 |
elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); |
496 |
||
497 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
498 |
elemDesc = (ElemDesc) m_elementFlags.get("textarea"); |
6 | 499 |
elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); |
500 |
elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); |
|
501 |
||
502 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
503 |
elemDesc = (ElemDesc) m_elementFlags.get("th"); |
6 | 504 |
elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); |
505 |
||
506 |
// ---------------------------------------------- |
|
507 |
// The nowrap attribute of a tr element is both |
|
508 |
// a Netscape and Internet-Explorer extension |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
509 |
elemDesc = (ElemDesc) m_elementFlags.get("tr"); |
6 | 510 |
elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); |
511 |
||
512 |
// ---------------------------------------------- |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
513 |
elemDesc = (ElemDesc) m_elementFlags.get("ul"); |
6 | 514 |
elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); |
515 |
} |
|
516 |
||
517 |
/** |
|
518 |
* Dummy element for elements not found. |
|
519 |
*/ |
|
520 |
static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK); |
|
521 |
||
522 |
/** True if URLs should be specially escaped with the %xx form. */ |
|
523 |
private boolean m_specialEscapeURLs = true; |
|
524 |
||
525 |
/** True if the META tag should be omitted. */ |
|
526 |
private boolean m_omitMetaTag = false; |
|
527 |
||
528 |
/** |
|
529 |
* Tells if the formatter should use special URL escaping. |
|
530 |
* |
|
531 |
* @param bool True if URLs should be specially escaped with the %xx form. |
|
532 |
*/ |
|
533 |
public void setSpecialEscapeURLs(boolean bool) |
|
534 |
{ |
|
535 |
m_specialEscapeURLs = bool; |
|
536 |
} |
|
537 |
||
538 |
/** |
|
539 |
* Tells if the formatter should omit the META tag. |
|
540 |
* |
|
541 |
* @param bool True if the META tag should be omitted. |
|
542 |
*/ |
|
543 |
public void setOmitMetaTag(boolean bool) |
|
544 |
{ |
|
545 |
m_omitMetaTag = bool; |
|
546 |
} |
|
547 |
||
548 |
/** |
|
549 |
* Specifies an output format for this serializer. It the |
|
550 |
* serializer has already been associated with an output format, |
|
551 |
* it will switch to the new format. This method should not be |
|
552 |
* called while the serializer is in the process of serializing |
|
553 |
* a document. |
|
554 |
* |
|
555 |
* This method can be called multiple times before starting |
|
556 |
* the serialization of a particular result-tree. In principle |
|
557 |
* all serialization parameters can be changed, with the exception |
|
558 |
* of method="html" (it must be method="html" otherwise we |
|
559 |
* shouldn't even have a ToHTMLStream object here!) |
|
560 |
* |
|
561 |
* @param format The output format or serialzation parameters |
|
562 |
* to use. |
|
563 |
*/ |
|
564 |
public void setOutputFormat(Properties format) |
|
565 |
{ |
|
566 |
||
567 |
m_specialEscapeURLs = |
|
568 |
OutputPropertyUtils.getBooleanProperty( |
|
569 |
OutputPropertiesFactory.S_USE_URL_ESCAPING, |
|
570 |
format); |
|
571 |
||
572 |
m_omitMetaTag = |
|
573 |
OutputPropertyUtils.getBooleanProperty( |
|
574 |
OutputPropertiesFactory.S_OMIT_META_TAG, |
|
575 |
format); |
|
576 |
||
577 |
super.setOutputFormat(format); |
|
578 |
} |
|
579 |
||
580 |
/** |
|
581 |
* Tells if the formatter should use special URL escaping. |
|
582 |
* |
|
583 |
* @return True if URLs should be specially escaped with the %xx form. |
|
584 |
*/ |
|
585 |
private final boolean getSpecialEscapeURLs() |
|
586 |
{ |
|
587 |
return m_specialEscapeURLs; |
|
588 |
} |
|
589 |
||
590 |
/** |
|
591 |
* Tells if the formatter should omit the META tag. |
|
592 |
* |
|
593 |
* @return True if the META tag should be omitted. |
|
594 |
*/ |
|
595 |
private final boolean getOmitMetaTag() |
|
596 |
{ |
|
597 |
return m_omitMetaTag; |
|
598 |
} |
|
599 |
||
600 |
/** |
|
601 |
* Get a description of the given element. |
|
602 |
* |
|
603 |
* @param name non-null name of element, case insensitive. |
|
604 |
* |
|
605 |
* @return non-null reference to ElemDesc, which may be m_dummy if no |
|
606 |
* element description matches the given name. |
|
607 |
*/ |
|
608 |
public static final ElemDesc getElemDesc(String name) |
|
609 |
{ |
|
610 |
/* this method used to return m_dummy when name was null |
|
611 |
* but now it doesn't check and and requires non-null name. |
|
612 |
*/ |
|
613 |
Object obj = m_elementFlags.get(name); |
|
614 |
if (null != obj) |
|
615 |
return (ElemDesc)obj; |
|
616 |
return m_dummy; |
|
617 |
} |
|
618 |
||
619 |
/** |
|
620 |
* A Trie that is just a copy of the "static" one. |
|
621 |
* We need this one to be able to use the faster, but not thread-safe |
|
622 |
* method Trie.get2(name) |
|
623 |
*/ |
|
624 |
private Trie m_htmlInfo = new Trie(m_elementFlags); |
|
625 |
/** |
|
626 |
* Calls to this method could be replaced with calls to |
|
627 |
* getElemDesc(name), but this one should be faster. |
|
628 |
*/ |
|
629 |
private ElemDesc getElemDesc2(String name) |
|
630 |
{ |
|
631 |
Object obj = m_htmlInfo.get2(name); |
|
632 |
if (null != obj) |
|
633 |
return (ElemDesc)obj; |
|
634 |
return m_dummy; |
|
635 |
} |
|
636 |
||
637 |
/** |
|
638 |
* Default constructor. |
|
639 |
*/ |
|
640 |
public ToHTMLStream() |
|
641 |
{ |
|
58022
12885822f0c5
8228854: Default ErrorListener reports warnings and errors to the console
joehw
parents:
55575
diff
changeset
|
642 |
this(null); |
12885822f0c5
8228854: Default ErrorListener reports warnings and errors to the console
joehw
parents:
55575
diff
changeset
|
643 |
} |
6 | 644 |
|
58022
12885822f0c5
8228854: Default ErrorListener reports warnings and errors to the console
joehw
parents:
55575
diff
changeset
|
645 |
public ToHTMLStream(ErrorListener l) |
12885822f0c5
8228854: Default ErrorListener reports warnings and errors to the console
joehw
parents:
55575
diff
changeset
|
646 |
{ |
12885822f0c5
8228854: Default ErrorListener reports warnings and errors to the console
joehw
parents:
55575
diff
changeset
|
647 |
super(l); |
6 | 648 |
m_charInfo = m_htmlcharInfo; |
649 |
// initialize namespaces |
|
650 |
m_prefixMap = new NamespaceMappings(); |
|
651 |
} |
|
652 |
||
653 |
/** The name of the current element. */ |
|
654 |
// private String m_currentElementName = null; |
|
655 |
||
656 |
/** |
|
657 |
* Receive notification of the beginning of a document. |
|
658 |
* |
|
659 |
* @throws org.xml.sax.SAXException Any SAX exception, possibly |
|
660 |
* wrapping another exception. |
|
661 |
* |
|
662 |
* @throws org.xml.sax.SAXException |
|
663 |
*/ |
|
664 |
protected void startDocumentInternal() throws org.xml.sax.SAXException |
|
665 |
{ |
|
666 |
super.startDocumentInternal(); |
|
667 |
||
668 |
m_needToCallStartDocument = false; |
|
669 |
m_needToOutputDocTypeDecl = true; |
|
670 |
m_startNewLine = false; |
|
671 |
setOmitXMLDeclaration(true); |
|
672 |
||
673 |
if (true == m_needToOutputDocTypeDecl) |
|
674 |
{ |
|
675 |
String doctypeSystem = getDoctypeSystem(); |
|
676 |
String doctypePublic = getDoctypePublic(); |
|
677 |
if ((null != doctypeSystem) || (null != doctypePublic)) |
|
678 |
{ |
|
679 |
final java.io.Writer writer = m_writer; |
|
680 |
try |
|
681 |
{ |
|
682 |
writer.write("<!DOCTYPE html"); |
|
683 |
||
684 |
if (null != doctypePublic) |
|
685 |
{ |
|
686 |
writer.write(" PUBLIC \""); |
|
687 |
writer.write(doctypePublic); |
|
688 |
writer.write('"'); |
|
689 |
} |
|
690 |
||
691 |
if (null != doctypeSystem) |
|
692 |
{ |
|
693 |
if (null == doctypePublic) |
|
694 |
writer.write(" SYSTEM \""); |
|
695 |
else |
|
696 |
writer.write(" \""); |
|
697 |
||
698 |
writer.write(doctypeSystem); |
|
699 |
writer.write('"'); |
|
700 |
} |
|
701 |
||
702 |
writer.write('>'); |
|
703 |
outputLineSep(); |
|
704 |
} |
|
705 |
catch(IOException e) |
|
706 |
{ |
|
707 |
throw new SAXException(e); |
|
708 |
} |
|
709 |
} |
|
710 |
} |
|
711 |
||
712 |
m_needToOutputDocTypeDecl = false; |
|
713 |
} |
|
714 |
||
715 |
/** |
|
716 |
* Receive notification of the end of a document. |
|
717 |
* |
|
718 |
* @throws org.xml.sax.SAXException Any SAX exception, possibly |
|
719 |
* wrapping another exception. |
|
720 |
* |
|
721 |
* @throws org.xml.sax.SAXException |
|
722 |
*/ |
|
723 |
public final void endDocument() throws org.xml.sax.SAXException |
|
724 |
{ |
|
43744
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
725 |
if (m_doIndent) { |
55575
25165403c62e
8223291: Whitespace is added to CDATA tags when using OutputKeys.INDENT to format XML
joehw
parents:
51786
diff
changeset
|
726 |
flushCharactersBuffer(false); |
43744
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
727 |
} |
6 | 728 |
flushPending(); |
729 |
if (m_doIndent && !m_isprevtext) |
|
730 |
{ |
|
731 |
try |
|
732 |
{ |
|
733 |
outputLineSep(); |
|
734 |
} |
|
735 |
catch(IOException e) |
|
736 |
{ |
|
737 |
throw new SAXException(e); |
|
738 |
} |
|
739 |
} |
|
740 |
||
741 |
flushWriter(); |
|
742 |
if (m_tracer != null) |
|
743 |
super.fireEndDoc(); |
|
744 |
} |
|
745 |
||
746 |
/** |
|
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
747 |
* If the previous is an inline element, won't insert a new line before the |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
748 |
* text. |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
749 |
* |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
750 |
*/ |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
751 |
protected boolean shouldIndentForText() { |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
752 |
return super.shouldIndentForText() && m_isprevblock; |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
753 |
} |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
754 |
|
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
755 |
/** |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
756 |
* Only check m_doIndent, disregard m_ispreserveSpace. |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
757 |
* |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
758 |
* @return True if the content should be formatted. |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
759 |
*/ |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
760 |
protected boolean shouldFormatOutput() { |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
761 |
return m_doIndent; |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
762 |
} |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
763 |
|
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
764 |
/** |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
765 |
* Receive notification of the beginning of an element. |
6 | 766 |
* |
767 |
* |
|
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
768 |
* @param namespaceURI |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
769 |
* @param localName |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
770 |
* @param name |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
771 |
* The element type name. |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
772 |
* @param atts |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
773 |
* The attributes attached to the element, if any. |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
774 |
* @throws org.xml.sax.SAXException |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
775 |
* Any SAX exception, possibly wrapping another exception. |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
776 |
* @see #endElement |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
777 |
* @see org.xml.sax.AttributeList |
6 | 778 |
*/ |
779 |
public void startElement( |
|
780 |
String namespaceURI, |
|
781 |
String localName, |
|
782 |
String name, |
|
783 |
Attributes atts) |
|
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
784 |
throws SAXException |
6 | 785 |
{ |
43744
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
786 |
if (m_doIndent) { |
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
787 |
// will add extra one if having namespace but no matter |
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
788 |
m_childNodeNum++; |
55575
25165403c62e
8223291: Whitespace is added to CDATA tags when using OutputKeys.INDENT to format XML
joehw
parents:
51786
diff
changeset
|
789 |
flushCharactersBuffer(false); |
43744
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
790 |
} |
6 | 791 |
ElemContext elemContext = m_elemContext; |
792 |
||
793 |
// clean up any pending things first |
|
794 |
if (elemContext.m_startTagOpen) |
|
795 |
{ |
|
796 |
closeStartTag(); |
|
797 |
elemContext.m_startTagOpen = false; |
|
798 |
} |
|
799 |
else if (m_cdataTagOpen) |
|
800 |
{ |
|
801 |
closeCDATA(); |
|
802 |
m_cdataTagOpen = false; |
|
803 |
} |
|
804 |
else if (m_needToCallStartDocument) |
|
805 |
{ |
|
806 |
startDocumentInternal(); |
|
807 |
m_needToCallStartDocument = false; |
|
808 |
} |
|
809 |
||
810 |
||
811 |
// if this element has a namespace then treat it like XML |
|
812 |
if (null != namespaceURI && namespaceURI.length() > 0) |
|
813 |
{ |
|
814 |
super.startElement(namespaceURI, localName, name, atts); |
|
815 |
||
816 |
return; |
|
817 |
} |
|
818 |
||
819 |
try |
|
820 |
{ |
|
821 |
// getElemDesc2(name) is faster than getElemDesc(name) |
|
822 |
ElemDesc elemDesc = getElemDesc2(name); |
|
823 |
int elemFlags = elemDesc.getFlags(); |
|
824 |
||
825 |
// deal with indentation issues first |
|
826 |
if (m_doIndent) |
|
827 |
{ |
|
828 |
boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0; |
|
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
829 |
if ((elemContext.m_elementName != null) |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
830 |
// If this element is a block element, |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
831 |
// or if this is not a block element, then if the |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
832 |
// previous is neither a text nor an inline |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
833 |
&& (isBlockElement || (!(m_isprevtext || !m_isprevblock)))) |
6 | 834 |
{ |
835 |
m_startNewLine = true; |
|
836 |
||
837 |
indent(); |
|
838 |
} |
|
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
839 |
m_isprevblock = isBlockElement; |
6 | 840 |
} |
841 |
||
842 |
// save any attributes for later processing |
|
843 |
if (atts != null) |
|
844 |
addAttributes(atts); |
|
845 |
||
846 |
m_isprevtext = false; |
|
847 |
final java.io.Writer writer = m_writer; |
|
848 |
writer.write('<'); |
|
849 |
writer.write(name); |
|
850 |
||
43744
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
851 |
if (m_doIndent) { |
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
852 |
m_childNodeNumStack.add(m_childNodeNum); |
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
853 |
m_childNodeNum = 0; |
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
854 |
} |
6 | 855 |
|
856 |
if (m_tracer != null) |
|
857 |
firePseudoAttributes(); |
|
858 |
||
859 |
if ((elemFlags & ElemDesc.EMPTY) != 0) |
|
860 |
{ |
|
861 |
// an optimization for elements which are expected |
|
862 |
// to be empty. |
|
863 |
m_elemContext = elemContext.push(); |
|
864 |
/* XSLTC sometimes calls namespaceAfterStartElement() |
|
865 |
* so we need to remember the name |
|
866 |
*/ |
|
867 |
m_elemContext.m_elementName = name; |
|
868 |
m_elemContext.m_elementDesc = elemDesc; |
|
869 |
return; |
|
870 |
} |
|
871 |
else |
|
872 |
{ |
|
873 |
elemContext = elemContext.push(namespaceURI,localName,name); |
|
874 |
m_elemContext = elemContext; |
|
875 |
elemContext.m_elementDesc = elemDesc; |
|
876 |
elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0; |
|
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
877 |
|
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
878 |
// set m_startNewLine for the next element |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
879 |
if (m_doIndent) { |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
880 |
// elemFlags is equivalent to m_elemContext.m_elementDesc.getFlags(), |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
881 |
// in this branch m_elemContext.m_elementName is not null |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
882 |
boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0; |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
883 |
if (isBlockElement) |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
884 |
m_startNewLine = true; |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
885 |
} |
6 | 886 |
} |
887 |
||
888 |
||
889 |
if ((elemFlags & ElemDesc.HEADELEM) != 0) |
|
890 |
{ |
|
891 |
// This is the <HEAD> element, do some special processing |
|
892 |
closeStartTag(); |
|
893 |
elemContext.m_startTagOpen = false; |
|
894 |
if (!m_omitMetaTag) |
|
895 |
{ |
|
896 |
if (m_doIndent) |
|
897 |
indent(); |
|
898 |
writer.write( |
|
899 |
"<META http-equiv=\"Content-Type\" content=\"text/html; charset="); |
|
900 |
String encoding = getEncoding(); |
|
901 |
String encode = Encodings.getMimeEncoding(encoding); |
|
902 |
writer.write(encode); |
|
903 |
writer.write("\">"); |
|
904 |
} |
|
905 |
} |
|
906 |
} |
|
907 |
catch (IOException e) |
|
908 |
{ |
|
909 |
throw new SAXException(e); |
|
910 |
} |
|
911 |
} |
|
912 |
||
913 |
/** |
|
914 |
* Receive notification of the end of an element. |
|
915 |
* |
|
916 |
* |
|
917 |
* @param namespaceURI |
|
918 |
* @param localName |
|
919 |
* @param name The element type name |
|
920 |
* @throws org.xml.sax.SAXException Any SAX exception, possibly |
|
921 |
* wrapping another exception. |
|
922 |
*/ |
|
923 |
public final void endElement( |
|
924 |
final String namespaceURI, |
|
925 |
final String localName, |
|
926 |
final String name) |
|
927 |
throws org.xml.sax.SAXException |
|
928 |
{ |
|
43744
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
929 |
if (m_doIndent) { |
55575
25165403c62e
8223291: Whitespace is added to CDATA tags when using OutputKeys.INDENT to format XML
joehw
parents:
51786
diff
changeset
|
930 |
flushCharactersBuffer(false); |
43744
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
931 |
} |
6 | 932 |
// deal with any pending issues |
933 |
if (m_cdataTagOpen) |
|
934 |
closeCDATA(); |
|
935 |
||
936 |
// if the element has a namespace, treat it like XML, not HTML |
|
937 |
if (null != namespaceURI && namespaceURI.length() > 0) |
|
938 |
{ |
|
939 |
super.endElement(namespaceURI, localName, name); |
|
940 |
||
941 |
return; |
|
942 |
} |
|
943 |
||
944 |
try |
|
945 |
{ |
|
946 |
||
947 |
ElemContext elemContext = m_elemContext; |
|
948 |
final ElemDesc elemDesc = elemContext.m_elementDesc; |
|
949 |
final int elemFlags = elemDesc.getFlags(); |
|
950 |
final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0; |
|
951 |
||
952 |
// deal with any indentation issues |
|
953 |
if (m_doIndent) |
|
954 |
{ |
|
955 |
final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0; |
|
956 |
boolean shouldIndent = false; |
|
957 |
||
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
958 |
// If this element is a block element, |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
959 |
// or if this is not a block element, then if the previous is |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
960 |
// neither a text nor an inline |
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
961 |
if (isBlockElement || (!(m_isprevtext || !m_isprevblock))) |
6 | 962 |
{ |
963 |
m_startNewLine = true; |
|
964 |
shouldIndent = true; |
|
965 |
} |
|
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
966 |
if (!elemContext.m_startTagOpen && shouldIndent && (m_childNodeNum > 1 || !m_isprevtext)) |
6 | 967 |
indent(elemContext.m_currentElemDepth - 1); |
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
968 |
|
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
969 |
m_isprevblock = isBlockElement; |
6 | 970 |
} |
971 |
||
972 |
final java.io.Writer writer = m_writer; |
|
973 |
if (!elemContext.m_startTagOpen) |
|
974 |
{ |
|
975 |
writer.write("</"); |
|
976 |
writer.write(name); |
|
977 |
writer.write('>'); |
|
978 |
} |
|
979 |
else |
|
980 |
{ |
|
981 |
// the start-tag open when this method was called, |
|
982 |
// so we need to process it now. |
|
983 |
||
984 |
if (m_tracer != null) |
|
985 |
super.fireStartElem(name); |
|
986 |
||
987 |
// the starting tag was still open when we received this endElement() call |
|
988 |
// so we need to process any gathered attributes NOW, before they go away. |
|
989 |
int nAttrs = m_attributes.getLength(); |
|
990 |
if (nAttrs > 0) |
|
991 |
{ |
|
992 |
processAttributes(m_writer, nAttrs); |
|
993 |
// clear attributes object for re-use with next element |
|
994 |
m_attributes.clear(); |
|
995 |
} |
|
996 |
if (!elemEmpty) |
|
997 |
{ |
|
998 |
// As per Dave/Paul recommendation 12/06/2000 |
|
999 |
// if (shouldIndent) |
|
1000 |
// writer.write('>'); |
|
1001 |
// indent(m_currentIndent); |
|
1002 |
||
1003 |
writer.write("></"); |
|
1004 |
writer.write(name); |
|
1005 |
writer.write('>'); |
|
1006 |
} |
|
1007 |
else |
|
1008 |
{ |
|
1009 |
writer.write('>'); |
|
1010 |
} |
|
1011 |
} |
|
1012 |
||
43744
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
1013 |
if (m_doIndent) { |
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
1014 |
m_childNodeNum = m_childNodeNumStack.remove(m_childNodeNumStack.size() - 1); |
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
1015 |
// clean up because the element has ended |
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
1016 |
m_isprevtext = false; |
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
1017 |
} |
6 | 1018 |
// fire off the end element event |
1019 |
if (m_tracer != null) |
|
1020 |
super.fireEndElem(name); |
|
1021 |
||
1022 |
// OPTIMIZE-EMPTY |
|
1023 |
if (elemEmpty) |
|
1024 |
{ |
|
1025 |
// a quick exit if the HTML element had no children. |
|
1026 |
// This block of code can be removed if the corresponding block of code |
|
1027 |
// in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed |
|
1028 |
m_elemContext = elemContext.m_prev; |
|
1029 |
return; |
|
1030 |
} |
|
1031 |
||
1032 |
// some more clean because the element has ended. |
|
1033 |
m_elemContext = elemContext.m_prev; |
|
1034 |
// m_isRawStack.pop(); |
|
1035 |
} |
|
1036 |
catch (IOException e) |
|
1037 |
{ |
|
1038 |
throw new SAXException(e); |
|
1039 |
} |
|
1040 |
} |
|
1041 |
||
1042 |
/** |
|
1043 |
* Process an attribute. |
|
1044 |
* @param writer The writer to write the processed output to. |
|
1045 |
* @param name The name of the attribute. |
|
1046 |
* @param value The value of the attribute. |
|
1047 |
* @param elemDesc The description of the HTML element |
|
1048 |
* that has this attribute. |
|
1049 |
* |
|
1050 |
* @throws org.xml.sax.SAXException |
|
1051 |
*/ |
|
1052 |
protected void processAttribute( |
|
1053 |
java.io.Writer writer, |
|
1054 |
String name, |
|
1055 |
String value, |
|
1056 |
ElemDesc elemDesc) |
|
51786
c93f14a4ae29
8207760: SAXException: Invalid UTF-16 surrogate detected: d83c ?
joehw
parents:
47216
diff
changeset
|
1057 |
throws IOException, SAXException |
6 | 1058 |
{ |
1059 |
writer.write(' '); |
|
1060 |
||
1061 |
if ( ((value.length() == 0) || value.equalsIgnoreCase(name)) |
|
1062 |
&& elemDesc != null |
|
1063 |
&& elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY)) |
|
1064 |
{ |
|
1065 |
writer.write(name); |
|
1066 |
} |
|
1067 |
else |
|
1068 |
{ |
|
1069 |
// %REVIEW% %OPT% |
|
1070 |
// Two calls to single-char write may NOT |
|
1071 |
// be more efficient than one to string-write... |
|
1072 |
writer.write(name); |
|
1073 |
writer.write("=\""); |
|
1074 |
if ( elemDesc != null |
|
1075 |
&& elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL)) |
|
1076 |
writeAttrURI(writer, value, m_specialEscapeURLs); |
|
1077 |
else |
|
1078 |
writeAttrString(writer, value, this.getEncoding()); |
|
1079 |
writer.write('"'); |
|
1080 |
||
1081 |
} |
|
1082 |
} |
|
1083 |
||
1084 |
/** |
|
1085 |
* Tell if a character is an ASCII digit. |
|
1086 |
*/ |
|
1087 |
private boolean isASCIIDigit(char c) |
|
1088 |
{ |
|
1089 |
return (c >= '0' && c <= '9'); |
|
1090 |
} |
|
1091 |
||
1092 |
/** |
|
1093 |
* Make an integer into an HH hex value. |
|
1094 |
* Does no checking on the size of the input, since this |
|
1095 |
* is only meant to be used locally by writeAttrURI. |
|
1096 |
* |
|
1097 |
* @param i must be a value less than 255. |
|
1098 |
* |
|
1099 |
* @return should be a two character string. |
|
1100 |
*/ |
|
1101 |
private static String makeHHString(int i) |
|
1102 |
{ |
|
1103 |
String s = Integer.toHexString(i).toUpperCase(); |
|
1104 |
if (s.length() == 1) |
|
1105 |
{ |
|
1106 |
s = "0" + s; |
|
1107 |
} |
|
1108 |
return s; |
|
1109 |
} |
|
1110 |
||
1111 |
/** |
|
1112 |
* Dmitri Ilyin: Makes sure if the String is HH encoded sign. |
|
1113 |
* @param str must be 2 characters long |
|
1114 |
* |
|
1115 |
* @return true or false |
|
1116 |
*/ |
|
1117 |
private boolean isHHSign(String str) |
|
1118 |
{ |
|
1119 |
boolean sign = true; |
|
1120 |
try |
|
1121 |
{ |
|
1122 |
char r = (char) Integer.parseInt(str, 16); |
|
1123 |
} |
|
1124 |
catch (NumberFormatException e) |
|
1125 |
{ |
|
1126 |
sign = false; |
|
1127 |
} |
|
1128 |
return sign; |
|
1129 |
} |
|
1130 |
||
1131 |
/** |
|
1132 |
* Write the specified <var>string</var> after substituting non ASCII characters, |
|
1133 |
* with <CODE>%HH</CODE>, where HH is the hex of the byte value. |
|
1134 |
* |
|
1135 |
* @param string String to convert to XML format. |
|
1136 |
* @param doURLEscaping True if we should try to encode as |
|
1137 |
* per http://www.ietf.org/rfc/rfc2396.txt. |
|
1138 |
* |
|
1139 |
* @throws org.xml.sax.SAXException if a bad surrogate pair is detected. |
|
1140 |
*/ |
|
1141 |
public void writeAttrURI( |
|
1142 |
final java.io.Writer writer, String string, boolean doURLEscaping) |
|
1143 |
throws IOException |
|
1144 |
{ |
|
1145 |
// http://www.ietf.org/rfc/rfc2396.txt says: |
|
1146 |
// A URI is always in an "escaped" form, since escaping or unescaping a |
|
1147 |
// completed URI might change its semantics. Normally, the only time |
|
1148 |
// escape encodings can safely be made is when the URI is being created |
|
1149 |
// from its component parts; each component may have its own set of |
|
1150 |
// characters that are reserved, so only the mechanism responsible for |
|
1151 |
// generating or interpreting that component can determine whether or |
|
1152 |
// not escaping a character will change its semantics. Likewise, a URI |
|
1153 |
// must be separated into its components before the escaped characters |
|
1154 |
// within those components can be safely decoded. |
|
1155 |
// |
|
1156 |
// ...So we do our best to do limited escaping of the URL, without |
|
1157 |
// causing damage. If the URL is already properly escaped, in theory, this |
|
1158 |
// function should not change the string value. |
|
1159 |
||
1160 |
final int end = string.length(); |
|
1161 |
if (end > m_attrBuff.length) |
|
1162 |
{ |
|
1163 |
m_attrBuff = new char[end*2 + 1]; |
|
1164 |
} |
|
1165 |
string.getChars(0,end, m_attrBuff, 0); |
|
1166 |
final char[] chars = m_attrBuff; |
|
1167 |
||
1168 |
int cleanStart = 0; |
|
1169 |
int cleanLength = 0; |
|
1170 |
||
1171 |
||
1172 |
char ch = 0; |
|
1173 |
for (int i = 0; i < end; i++) |
|
1174 |
{ |
|
1175 |
ch = chars[i]; |
|
1176 |
||
1177 |
if ((ch < 32) || (ch > 126)) |
|
1178 |
{ |
|
1179 |
if (cleanLength > 0) |
|
1180 |
{ |
|
1181 |
writer.write(chars, cleanStart, cleanLength); |
|
1182 |
cleanLength = 0; |
|
1183 |
} |
|
1184 |
if (doURLEscaping) |
|
1185 |
{ |
|
1186 |
// Encode UTF16 to UTF8. |
|
1187 |
// Reference is Unicode, A Primer, by Tony Graham. |
|
1188 |
// Page 92. |
|
1189 |
||
1190 |
// Note that Kay doesn't escape 0x20... |
|
1191 |
// if(ch == 0x20) // Not sure about this... -sb |
|
1192 |
// { |
|
1193 |
// writer.write(ch); |
|
1194 |
// } |
|
1195 |
// else |
|
1196 |
if (ch <= 0x7F) |
|
1197 |
{ |
|
1198 |
writer.write('%'); |
|
1199 |
writer.write(makeHHString(ch)); |
|
1200 |
} |
|
1201 |
else if (ch <= 0x7FF) |
|
1202 |
{ |
|
1203 |
// Clear low 6 bits before rotate, put high 4 bits in low byte, |
|
1204 |
// and set two high bits. |
|
1205 |
int high = (ch >> 6) | 0xC0; |
|
1206 |
int low = (ch & 0x3F) | 0x80; |
|
1207 |
// First 6 bits, + high bit |
|
1208 |
writer.write('%'); |
|
1209 |
writer.write(makeHHString(high)); |
|
1210 |
writer.write('%'); |
|
1211 |
writer.write(makeHHString(low)); |
|
1212 |
} |
|
1213 |
else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate |
|
1214 |
{ |
|
1215 |
// I'm sure this can be done in 3 instructions, but I choose |
|
1216 |
// to try and do it exactly like it is done in the book, at least |
|
1217 |
// until we are sure this is totally clean. I don't think performance |
|
1218 |
// is a big issue with this particular function, though I could be |
|
1219 |
// wrong. Also, the stuff below clearly does more masking than |
|
1220 |
// it needs to do. |
|
1221 |
||
1222 |
// Clear high 6 bits. |
|
1223 |
int highSurrogate = ((int) ch) & 0x03FF; |
|
1224 |
||
1225 |
// Middle 4 bits (wwww) + 1 |
|
1226 |
// "Note that the value of wwww from the high surrogate bit pattern |
|
1227 |
// is incremented to make the uuuuu bit pattern in the scalar value |
|
1228 |
// so the surrogate pair don't address the BMP." |
|
1229 |
int wwww = ((highSurrogate & 0x03C0) >> 6); |
|
1230 |
int uuuuu = wwww + 1; |
|
1231 |
||
1232 |
// next 4 bits |
|
1233 |
int zzzz = (highSurrogate & 0x003C) >> 2; |
|
1234 |
||
1235 |
// low 2 bits |
|
1236 |
int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30; |
|
1237 |
||
1238 |
// Get low surrogate character. |
|
1239 |
ch = chars[++i]; |
|
1240 |
||
1241 |
// Clear high 6 bits. |
|
1242 |
int lowSurrogate = ((int) ch) & 0x03FF; |
|
1243 |
||
1244 |
// put the middle 4 bits into the bottom of yyyyyy (byte 3) |
|
1245 |
yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6); |
|
1246 |
||
1247 |
// bottom 6 bits. |
|
1248 |
int xxxxxx = (lowSurrogate & 0x003F); |
|
1249 |
||
1250 |
int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu |
|
1251 |
int byte2 = |
|
1252 |
0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz; |
|
1253 |
int byte3 = 0x80 | yyyyyy; |
|
1254 |
int byte4 = 0x80 | xxxxxx; |
|
1255 |
||
1256 |
writer.write('%'); |
|
1257 |
writer.write(makeHHString(byte1)); |
|
1258 |
writer.write('%'); |
|
1259 |
writer.write(makeHHString(byte2)); |
|
1260 |
writer.write('%'); |
|
1261 |
writer.write(makeHHString(byte3)); |
|
1262 |
writer.write('%'); |
|
1263 |
writer.write(makeHHString(byte4)); |
|
1264 |
} |
|
1265 |
else |
|
1266 |
{ |
|
1267 |
int high = (ch >> 12) | 0xE0; // top 4 bits |
|
1268 |
int middle = ((ch & 0x0FC0) >> 6) | 0x80; |
|
1269 |
// middle 6 bits |
|
1270 |
int low = (ch & 0x3F) | 0x80; |
|
1271 |
// First 6 bits, + high bit |
|
1272 |
writer.write('%'); |
|
1273 |
writer.write(makeHHString(high)); |
|
1274 |
writer.write('%'); |
|
1275 |
writer.write(makeHHString(middle)); |
|
1276 |
writer.write('%'); |
|
1277 |
writer.write(makeHHString(low)); |
|
1278 |
} |
|
1279 |
||
1280 |
} |
|
1281 |
else if (escapingNotNeeded(ch)) |
|
1282 |
{ |
|
1283 |
writer.write(ch); |
|
1284 |
} |
|
1285 |
else |
|
1286 |
{ |
|
1287 |
writer.write("&#"); |
|
1288 |
writer.write(Integer.toString(ch)); |
|
1289 |
writer.write(';'); |
|
1290 |
} |
|
1291 |
// In this character range we have first written out any previously accumulated |
|
1292 |
// "clean" characters, then processed the current more complicated character, |
|
1293 |
// which may have incremented "i". |
|
1294 |
// We now we reset the next possible clean character. |
|
1295 |
cleanStart = i + 1; |
|
1296 |
} |
|
1297 |
// Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as |
|
1298 |
// not allowing quotes in the URI proper syntax, nor in the fragment |
|
1299 |
// identifier, we believe that it's OK to double escape quotes. |
|
1300 |
else if (ch == '"') |
|
1301 |
{ |
|
1302 |
// If the character is a '%' number number, try to avoid double-escaping. |
|
1303 |
// There is a question if this is legal behavior. |
|
1304 |
||
1305 |
// Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded |
|
1306 |
// The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little. |
|
1307 |
||
1308 |
// if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) ) |
|
1309 |
||
1310 |
// We are no longer escaping '%' |
|
1311 |
||
1312 |
if (cleanLength > 0) |
|
1313 |
{ |
|
1314 |
writer.write(chars, cleanStart, cleanLength); |
|
1315 |
cleanLength = 0; |
|
1316 |
} |
|
1317 |
||
1318 |
||
1319 |
// Mike Kay encodes this as ", so he may know something I don't? |
|
1320 |
if (doURLEscaping) |
|
1321 |
writer.write("%22"); |
|
1322 |
else |
|
1323 |
writer.write("""); // we have to escape this, I guess. |
|
1324 |
||
1325 |
// We have written out any clean characters, then the escaped '%' and now we |
|
1326 |
// We now we reset the next possible clean character. |
|
1327 |
cleanStart = i + 1; |
|
1328 |
} |
|
1329 |
else if (ch == '&') |
|
1330 |
{ |
|
1331 |
// HTML 4.01 reads, "Authors should use "&" (ASCII decimal 38) |
|
1332 |
// instead of "&" to avoid confusion with the beginning of a character |
|
1333 |
// reference (entity reference open delimiter). |
|
1334 |
if (cleanLength > 0) |
|
1335 |
{ |
|
1336 |
writer.write(chars, cleanStart, cleanLength); |
|
1337 |
cleanLength = 0; |
|
1338 |
} |
|
1339 |
writer.write("&"); |
|
1340 |
cleanStart = i + 1; |
|
1341 |
} |
|
1342 |
else |
|
1343 |
{ |
|
1344 |
// no processing for this character, just count how |
|
1345 |
// many characters in a row that we have that need no processing |
|
1346 |
cleanLength++; |
|
1347 |
} |
|
1348 |
} |
|
1349 |
||
1350 |
// are there any clean characters at the end of the array |
|
1351 |
// that we haven't processed yet? |
|
1352 |
if (cleanLength > 1) |
|
1353 |
{ |
|
1354 |
// if the whole string can be written out as-is do so |
|
1355 |
// otherwise write out the clean chars at the end of the |
|
1356 |
// array |
|
1357 |
if (cleanStart == 0) |
|
1358 |
writer.write(string); |
|
1359 |
else |
|
1360 |
writer.write(chars, cleanStart, cleanLength); |
|
1361 |
} |
|
1362 |
else if (cleanLength == 1) |
|
1363 |
{ |
|
1364 |
// a little optimization for 1 clean character |
|
1365 |
// (we could have let the previous if(...) handle them all) |
|
1366 |
writer.write(ch); |
|
1367 |
} |
|
1368 |
} |
|
1369 |
||
1370 |
/** |
|
1371 |
* Writes the specified <var>string</var> after substituting <VAR>specials</VAR>, |
|
1372 |
* and UTF-16 surrogates for character references <CODE>&#xnn</CODE>. |
|
1373 |
* |
|
1374 |
* @param string String to convert to XML format. |
|
1375 |
* @param encoding CURRENTLY NOT IMPLEMENTED. |
|
1376 |
* |
|
1377 |
* @throws org.xml.sax.SAXException |
|
1378 |
*/ |
|
1379 |
public void writeAttrString( |
|
1380 |
final java.io.Writer writer, String string, String encoding) |
|
51786
c93f14a4ae29
8207760: SAXException: Invalid UTF-16 surrogate detected: d83c ?
joehw
parents:
47216
diff
changeset
|
1381 |
throws IOException, SAXException |
6 | 1382 |
{ |
1383 |
final int end = string.length(); |
|
1384 |
if (end > m_attrBuff.length) |
|
1385 |
{ |
|
1386 |
m_attrBuff = new char[end * 2 + 1]; |
|
1387 |
} |
|
1388 |
string.getChars(0, end, m_attrBuff, 0); |
|
1389 |
final char[] chars = m_attrBuff; |
|
1390 |
||
1391 |
||
1392 |
||
1393 |
int cleanStart = 0; |
|
1394 |
int cleanLength = 0; |
|
1395 |
||
1396 |
char ch = 0; |
|
1397 |
for (int i = 0; i < end; i++) |
|
1398 |
{ |
|
1399 |
ch = chars[i]; |
|
1400 |
||
1401 |
// System.out.println("SPECIALSSIZE: "+SPECIALSSIZE); |
|
1402 |
// System.out.println("ch: "+(int)ch); |
|
1403 |
// System.out.println("m_maxCharacter: "+(int)m_maxCharacter); |
|
1404 |
// System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]); |
|
12902
0a840d92fa30
7151118: Regressions on 7u4 b11 comp. 7u4 b06 on specjvm2008.xml.transform subbenchmark
joehw
parents:
12458
diff
changeset
|
1405 |
if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) |
6 | 1406 |
{ |
1407 |
cleanLength++; |
|
1408 |
} |
|
1409 |
else if ('<' == ch || '>' == ch) |
|
1410 |
{ |
|
1411 |
cleanLength++; // no escaping in this case, as specified in 15.2 |
|
1412 |
} |
|
1413 |
else if ( |
|
1414 |
('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1])) |
|
1415 |
{ |
|
1416 |
cleanLength++; // no escaping in this case, as specified in 15.2 |
|
1417 |
} |
|
1418 |
else |
|
1419 |
{ |
|
1420 |
if (cleanLength > 0) |
|
1421 |
{ |
|
1422 |
writer.write(chars,cleanStart,cleanLength); |
|
1423 |
cleanLength = 0; |
|
1424 |
} |
|
1425 |
int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true); |
|
1426 |
||
1427 |
if (i != pos) |
|
1428 |
{ |
|
1429 |
i = pos - 1; |
|
1430 |
} |
|
1431 |
else |
|
1432 |
{ |
|
51786
c93f14a4ae29
8207760: SAXException: Invalid UTF-16 surrogate detected: d83c ?
joehw
parents:
47216
diff
changeset
|
1433 |
if (Encodings.isHighUTF16Surrogate(ch) || |
c93f14a4ae29
8207760: SAXException: Invalid UTF-16 surrogate detected: d83c ?
joehw
parents:
47216
diff
changeset
|
1434 |
Encodings.isLowUTF16Surrogate(ch)) |
6 | 1435 |
{ |
51786
c93f14a4ae29
8207760: SAXException: Invalid UTF-16 surrogate detected: d83c ?
joehw
parents:
47216
diff
changeset
|
1436 |
if (writeUTF16Surrogate(ch, chars, i, end) >= 0) { |
c93f14a4ae29
8207760: SAXException: Invalid UTF-16 surrogate detected: d83c ?
joehw
parents:
47216
diff
changeset
|
1437 |
// move the index if the low surrogate is consumed |
c93f14a4ae29
8207760: SAXException: Invalid UTF-16 surrogate detected: d83c ?
joehw
parents:
47216
diff
changeset
|
1438 |
// as writeUTF16Surrogate has written the pair |
c93f14a4ae29
8207760: SAXException: Invalid UTF-16 surrogate detected: d83c ?
joehw
parents:
47216
diff
changeset
|
1439 |
if (Encodings.isHighUTF16Surrogate(ch)) { |
c93f14a4ae29
8207760: SAXException: Invalid UTF-16 surrogate detected: d83c ?
joehw
parents:
47216
diff
changeset
|
1440 |
i++; |
c93f14a4ae29
8207760: SAXException: Invalid UTF-16 surrogate detected: d83c ?
joehw
parents:
47216
diff
changeset
|
1441 |
} |
c93f14a4ae29
8207760: SAXException: Invalid UTF-16 surrogate detected: d83c ?
joehw
parents:
47216
diff
changeset
|
1442 |
} |
6 | 1443 |
} |
1444 |
||
1445 |
// The next is kind of a hack to keep from escaping in the case |
|
1446 |
// of Shift_JIS and the like. |
|
1447 |
||
1448 |
/* |
|
1449 |
else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF) |
|
1450 |
&& (ch != 160)) |
|
1451 |
{ |
|
1452 |
writer.write(ch); // no escaping in this case |
|
1453 |
} |
|
1454 |
else |
|
1455 |
*/ |
|
1456 |
String outputStringForChar = m_charInfo.getOutputStringForChar(ch); |
|
1457 |
if (null != outputStringForChar) |
|
1458 |
{ |
|
1459 |
writer.write(outputStringForChar); |
|
1460 |
} |
|
1461 |
else if (escapingNotNeeded(ch)) |
|
1462 |
{ |
|
1463 |
writer.write(ch); // no escaping in this case |
|
1464 |
} |
|
1465 |
else |
|
1466 |
{ |
|
1467 |
writer.write("&#"); |
|
1468 |
writer.write(Integer.toString(ch)); |
|
1469 |
writer.write(';'); |
|
1470 |
} |
|
1471 |
} |
|
1472 |
cleanStart = i + 1; |
|
1473 |
} |
|
1474 |
} // end of for() |
|
1475 |
||
1476 |
// are there any clean characters at the end of the array |
|
1477 |
// that we haven't processed yet? |
|
1478 |
if (cleanLength > 1) |
|
1479 |
{ |
|
1480 |
// if the whole string can be written out as-is do so |
|
1481 |
// otherwise write out the clean chars at the end of the |
|
1482 |
// array |
|
1483 |
if (cleanStart == 0) |
|
1484 |
writer.write(string); |
|
1485 |
else |
|
1486 |
writer.write(chars, cleanStart, cleanLength); |
|
1487 |
} |
|
1488 |
else if (cleanLength == 1) |
|
1489 |
{ |
|
1490 |
// a little optimization for 1 clean character |
|
1491 |
// (we could have let the previous if(...) handle them all) |
|
1492 |
writer.write(ch); |
|
1493 |
} |
|
1494 |
} |
|
1495 |
||
1496 |
||
1497 |
||
1498 |
/** |
|
1499 |
* Receive notification of character data. |
|
1500 |
* |
|
1501 |
* <p>The Parser will call this method to report each chunk of |
|
1502 |
* character data. SAX parsers may return all contiguous character |
|
1503 |
* data in a single chunk, or they may split it into several |
|
1504 |
* chunks; however, all of the characters in any single event |
|
1505 |
* must come from the same external entity, so that the Locator |
|
1506 |
* provides useful information.</p> |
|
1507 |
* |
|
1508 |
* <p>The application must not attempt to read from the array |
|
1509 |
* outside of the specified range.</p> |
|
1510 |
* |
|
1511 |
* <p>Note that some parsers will report whitespace using the |
|
1512 |
* ignorableWhitespace() method rather than this one (validating |
|
1513 |
* parsers must do so).</p> |
|
1514 |
* |
|
1515 |
* @param chars The characters from the XML document. |
|
1516 |
* @param start The start position in the array. |
|
1517 |
* @param length The number of characters to read from the array. |
|
1518 |
* @throws org.xml.sax.SAXException Any SAX exception, possibly |
|
1519 |
* wrapping another exception. |
|
1520 |
* @see #ignorableWhitespace |
|
1521 |
* @see org.xml.sax.Locator |
|
1522 |
* |
|
1523 |
* @throws org.xml.sax.SAXException |
|
1524 |
*/ |
|
1525 |
public final void characters(char chars[], int start, int length) |
|
1526 |
throws org.xml.sax.SAXException |
|
1527 |
{ |
|
1528 |
||
1529 |
if (m_elemContext.m_isRaw) |
|
1530 |
{ |
|
1531 |
try |
|
1532 |
{ |
|
1533 |
if (m_elemContext.m_startTagOpen) |
|
1534 |
{ |
|
1535 |
closeStartTag(); |
|
1536 |
m_elemContext.m_startTagOpen = false; |
|
1537 |
} |
|
1538 |
||
1539 |
// With m_ispreserve just set true it looks like shouldIndent() |
|
1540 |
// will always return false, so drop any possible indentation. |
|
1541 |
// if (shouldIndent()) |
|
1542 |
// indent(); |
|
1543 |
||
1544 |
// writer.write("<![CDATA["); |
|
1545 |
// writer.write(chars, start, length); |
|
1546 |
writeNormalizedChars(chars, start, length, false, m_lineSepUse); |
|
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
1547 |
m_isprevtext = true; |
6 | 1548 |
// writer.write("]]>"); |
1549 |
||
1550 |
// time to generate characters event |
|
1551 |
if (m_tracer != null) |
|
1552 |
super.fireCharEvent(chars, start, length); |
|
1553 |
||
1554 |
return; |
|
1555 |
} |
|
1556 |
catch (IOException ioe) |
|
1557 |
{ |
|
1558 |
throw new org.xml.sax.SAXException( |
|
1559 |
Utils.messages.createMessage( |
|
1560 |
MsgKey.ER_OIERROR, |
|
1561 |
null), |
|
1562 |
ioe); |
|
1563 |
//"IO error", ioe); |
|
1564 |
} |
|
1565 |
} |
|
1566 |
else |
|
1567 |
{ |
|
1568 |
super.characters(chars, start, length); |
|
1569 |
} |
|
1570 |
} |
|
1571 |
||
1572 |
/** |
|
1573 |
* Receive notification of cdata. |
|
1574 |
* |
|
1575 |
* <p>The Parser will call this method to report each chunk of |
|
1576 |
* character data. SAX parsers may return all contiguous character |
|
1577 |
* data in a single chunk, or they may split it into several |
|
1578 |
* chunks; however, all of the characters in any single event |
|
1579 |
* must come from the same external entity, so that the Locator |
|
1580 |
* provides useful information.</p> |
|
1581 |
* |
|
1582 |
* <p>The application must not attempt to read from the array |
|
1583 |
* outside of the specified range.</p> |
|
1584 |
* |
|
1585 |
* <p>Note that some parsers will report whitespace using the |
|
1586 |
* ignorableWhitespace() method rather than this one (validating |
|
1587 |
* parsers must do so).</p> |
|
1588 |
* |
|
1589 |
* @param ch The characters from the XML document. |
|
1590 |
* @param start The start position in the array. |
|
1591 |
* @param length The number of characters to read from the array. |
|
1592 |
* @throws org.xml.sax.SAXException Any SAX exception, possibly |
|
1593 |
* wrapping another exception. |
|
1594 |
* @see #ignorableWhitespace |
|
1595 |
* @see org.xml.sax.Locator |
|
1596 |
* |
|
1597 |
* @throws org.xml.sax.SAXException |
|
1598 |
*/ |
|
1599 |
public final void cdata(char ch[], int start, int length) |
|
1600 |
throws org.xml.sax.SAXException |
|
1601 |
{ |
|
1602 |
if ((null != m_elemContext.m_elementName) |
|
1603 |
&& (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT") |
|
1604 |
|| m_elemContext.m_elementName.equalsIgnoreCase("STYLE"))) |
|
1605 |
{ |
|
1606 |
try |
|
1607 |
{ |
|
1608 |
if (m_elemContext.m_startTagOpen) |
|
1609 |
{ |
|
1610 |
closeStartTag(); |
|
1611 |
m_elemContext.m_startTagOpen = false; |
|
1612 |
} |
|
1613 |
||
1614 |
if (shouldIndent()) |
|
1615 |
indent(); |
|
1616 |
||
1617 |
// writer.write(ch, start, length); |
|
1618 |
writeNormalizedChars(ch, start, length, true, m_lineSepUse); |
|
1619 |
} |
|
1620 |
catch (IOException ioe) |
|
1621 |
{ |
|
1622 |
throw new org.xml.sax.SAXException( |
|
1623 |
Utils.messages.createMessage( |
|
1624 |
MsgKey.ER_OIERROR, |
|
1625 |
null), |
|
1626 |
ioe); |
|
1627 |
//"IO error", ioe); |
|
1628 |
} |
|
1629 |
} |
|
1630 |
else |
|
1631 |
{ |
|
1632 |
super.cdata(ch, start, length); |
|
1633 |
} |
|
1634 |
} |
|
1635 |
||
1636 |
/** |
|
1637 |
* Receive notification of a processing instruction. |
|
1638 |
* |
|
1639 |
* @param target The processing instruction target. |
|
1640 |
* @param data The processing instruction data, or null if |
|
1641 |
* none was supplied. |
|
1642 |
* @throws org.xml.sax.SAXException Any SAX exception, possibly |
|
1643 |
* wrapping another exception. |
|
1644 |
* |
|
1645 |
* @throws org.xml.sax.SAXException |
|
1646 |
*/ |
|
1647 |
public void processingInstruction(String target, String data) |
|
1648 |
throws org.xml.sax.SAXException |
|
1649 |
{ |
|
43744
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
1650 |
if (m_doIndent) { |
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
1651 |
m_childNodeNum++; |
55575
25165403c62e
8223291: Whitespace is added to CDATA tags when using OutputKeys.INDENT to format XML
joehw
parents:
51786
diff
changeset
|
1652 |
flushCharactersBuffer(false); |
43744
5436902a27d7
8173290: 3% regression in SPECjvm2008-XML with b150
fyuan
parents:
42805
diff
changeset
|
1653 |
} |
6 | 1654 |
// Process any pending starDocument and startElement first. |
1655 |
flushPending(); |
|
1656 |
||
1657 |
// Use a fairly nasty hack to tell if the next node is supposed to be |
|
1658 |
// unescaped text. |
|
1659 |
if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING)) |
|
1660 |
{ |
|
1661 |
startNonEscaping(); |
|
1662 |
} |
|
1663 |
else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING)) |
|
1664 |
{ |
|
1665 |
endNonEscaping(); |
|
1666 |
} |
|
1667 |
else |
|
1668 |
{ |
|
1669 |
try |
|
1670 |
{ |
|
1671 |
if (m_elemContext.m_startTagOpen) |
|
1672 |
{ |
|
1673 |
closeStartTag(); |
|
1674 |
m_elemContext.m_startTagOpen = false; |
|
1675 |
} |
|
1676 |
else if (m_needToCallStartDocument) |
|
1677 |
startDocumentInternal(); |
|
1678 |
||
1679 |
if (shouldIndent()) |
|
1680 |
indent(); |
|
1681 |
||
1682 |
final java.io.Writer writer = m_writer; |
|
1683 |
//writer.write("<?" + target); |
|
1684 |
writer.write("<?"); |
|
1685 |
writer.write(target); |
|
1686 |
||
1687 |
if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0))) |
|
1688 |
writer.write(' '); |
|
1689 |
||
1690 |
//writer.write(data + ">"); // different from XML |
|
1691 |
writer.write(data); // different from XML |
|
1692 |
writer.write('>'); // different from XML |
|
1693 |
||
1694 |
// Always output a newline char if not inside of an |
|
1695 |
// element. The whitespace is not significant in that |
|
1696 |
// case. |
|
1697 |
if (m_elemContext.m_currentElemDepth <= 0) |
|
1698 |
outputLineSep(); |
|
1699 |
||
1700 |
m_startNewLine = true; |
|
1701 |
} |
|
1702 |
catch(IOException e) |
|
1703 |
{ |
|
1704 |
throw new SAXException(e); |
|
1705 |
} |
|
1706 |
} |
|
1707 |
||
1708 |
// now generate the PI event |
|
1709 |
if (m_tracer != null) |
|
1710 |
super.fireEscapingEvent(target, data); |
|
1711 |
} |
|
1712 |
||
1713 |
/** |
|
1714 |
* Receive notivication of a entityReference. |
|
1715 |
* |
|
1716 |
* @param name non-null reference to entity name string. |
|
1717 |
* |
|
1718 |
* @throws org.xml.sax.SAXException |
|
1719 |
*/ |
|
1720 |
public final void entityReference(String name) |
|
1721 |
throws org.xml.sax.SAXException |
|
1722 |
{ |
|
1723 |
try |
|
1724 |
{ |
|
1725 |
||
1726 |
final java.io.Writer writer = m_writer; |
|
1727 |
writer.write('&'); |
|
1728 |
writer.write(name); |
|
1729 |
writer.write(';'); |
|
1730 |
||
1731 |
} catch(IOException e) |
|
1732 |
{ |
|
1733 |
throw new SAXException(e); |
|
1734 |
} |
|
1735 |
} |
|
1736 |
/** |
|
1737 |
* @see ExtendedContentHandler#endElement(String) |
|
1738 |
*/ |
|
1739 |
public final void endElement(String elemName) throws SAXException |
|
1740 |
{ |
|
1741 |
endElement(null, null, elemName); |
|
1742 |
} |
|
1743 |
||
1744 |
/** |
|
1745 |
* Process the attributes, which means to write out the currently |
|
1746 |
* collected attributes to the writer. The attributes are not |
|
1747 |
* cleared by this method |
|
1748 |
* |
|
1749 |
* @param writer the writer to write processed attributes to. |
|
1750 |
* @param nAttrs the number of attributes in m_attributes |
|
1751 |
* to be processed |
|
1752 |
* |
|
1753 |
* @throws org.xml.sax.SAXException |
|
1754 |
*/ |
|
1755 |
public void processAttributes(java.io.Writer writer, int nAttrs) |
|
1756 |
throws IOException,SAXException |
|
1757 |
{ |
|
1758 |
/* |
|
1759 |
* process the collected attributes |
|
1760 |
*/ |
|
1761 |
for (int i = 0; i < nAttrs; i++) |
|
1762 |
{ |
|
1763 |
processAttribute( |
|
1764 |
writer, |
|
1765 |
m_attributes.getQName(i), |
|
1766 |
m_attributes.getValue(i), |
|
1767 |
m_elemContext.m_elementDesc); |
|
1768 |
} |
|
1769 |
} |
|
1770 |
||
1771 |
/** |
|
1772 |
* For the enclosing elements starting tag write out out any attributes |
|
1773 |
* followed by ">" |
|
1774 |
* |
|
1775 |
*@throws org.xml.sax.SAXException |
|
1776 |
*/ |
|
1777 |
protected void closeStartTag() throws SAXException |
|
1778 |
{ |
|
1779 |
try |
|
1780 |
{ |
|
1781 |
||
1782 |
// finish processing attributes, time to fire off the start element event |
|
1783 |
if (m_tracer != null) |
|
1784 |
super.fireStartElem(m_elemContext.m_elementName); |
|
1785 |
||
1786 |
int nAttrs = m_attributes.getLength(); |
|
1787 |
if (nAttrs>0) |
|
1788 |
{ |
|
1789 |
processAttributes(m_writer, nAttrs); |
|
1790 |
// clear attributes object for re-use with next element |
|
1791 |
m_attributes.clear(); |
|
1792 |
} |
|
1793 |
||
1794 |
m_writer.write('>'); |
|
1795 |
||
1796 |
/* whether Xalan or XSLTC, we have the prefix mappings now, so |
|
1797 |
* lets determine if the current element is specified in the cdata- |
|
1798 |
* section-elements list. |
|
1799 |
*/ |
|
24888
2e493ac78624
8041523: Xerces Update: Serializer improvements from Xalan
joehw
parents:
23954
diff
changeset
|
1800 |
if (m_StringOfCDATASections != null) |
6 | 1801 |
m_elemContext.m_isCdataSection = isCdataSection(); |
1802 |
||
1803 |
} |
|
1804 |
catch(IOException e) |
|
1805 |
{ |
|
1806 |
throw new SAXException(e); |
|
1807 |
} |
|
1808 |
} |
|
1809 |
||
1810 |
/** |
|
1811 |
* This method is used when a prefix/uri namespace mapping |
|
1812 |
* is indicated after the element was started with a |
|
1813 |
* startElement() and before and endElement(). |
|
1814 |
* startPrefixMapping(prefix,uri) would be used before the |
|
1815 |
* startElement() call. |
|
1816 |
* @param uri the URI of the namespace |
|
1817 |
* @param prefix the prefix associated with the given URI. |
|
1818 |
* |
|
1819 |
* @see ExtendedContentHandler#namespaceAfterStartElement(String, String) |
|
1820 |
*/ |
|
1821 |
public void namespaceAfterStartElement(String prefix, String uri) |
|
1822 |
throws SAXException |
|
1823 |
{ |
|
1824 |
// hack for XSLTC with finding URI for default namespace |
|
1825 |
if (m_elemContext.m_elementURI == null) |
|
1826 |
{ |
|
1827 |
String prefix1 = getPrefixPart(m_elemContext.m_elementName); |
|
1828 |
if (prefix1 == null && EMPTYSTRING.equals(prefix)) |
|
1829 |
{ |
|
1830 |
// the elements URI is not known yet, and it |
|
1831 |
// doesn't have a prefix, and we are currently |
|
1832 |
// setting the uri for prefix "", so we have |
|
1833 |
// the uri for the element... lets remember it |
|
1834 |
m_elemContext.m_elementURI = uri; |
|
1835 |
} |
|
1836 |
} |
|
1837 |
startPrefixMapping(prefix,uri,false); |
|
1838 |
} |
|
1839 |
||
1840 |
public void startDTD(String name, String publicId, String systemId) |
|
1841 |
throws SAXException |
|
1842 |
{ |
|
1843 |
m_inDTD = true; |
|
1844 |
super.startDTD(name, publicId, systemId); |
|
1845 |
} |
|
1846 |
||
1847 |
/** |
|
1848 |
* Report the end of DTD declarations. |
|
1849 |
* @throws org.xml.sax.SAXException The application may raise an exception. |
|
1850 |
* @see #startDTD |
|
1851 |
*/ |
|
1852 |
public void endDTD() throws org.xml.sax.SAXException |
|
1853 |
{ |
|
1854 |
m_inDTD = false; |
|
1855 |
/* for ToHTMLStream the DOCTYPE is entirely output in the |
|
1856 |
* startDocumentInternal() method, so don't do anything here |
|
1857 |
*/ |
|
1858 |
} |
|
1859 |
/** |
|
1860 |
* This method does nothing. |
|
1861 |
*/ |
|
1862 |
public void attributeDecl( |
|
1863 |
String eName, |
|
1864 |
String aName, |
|
1865 |
String type, |
|
1866 |
String valueDefault, |
|
1867 |
String value) |
|
1868 |
throws SAXException |
|
1869 |
{ |
|
1870 |
// The internal DTD subset is not serialized by the ToHTMLStream serializer |
|
1871 |
} |
|
1872 |
||
1873 |
/** |
|
1874 |
* This method does nothing. |
|
1875 |
*/ |
|
1876 |
public void elementDecl(String name, String model) throws SAXException |
|
1877 |
{ |
|
1878 |
// The internal DTD subset is not serialized by the ToHTMLStream serializer |
|
1879 |
} |
|
1880 |
/** |
|
1881 |
* This method does nothing. |
|
1882 |
*/ |
|
1883 |
public void internalEntityDecl(String name, String value) |
|
1884 |
throws SAXException |
|
1885 |
{ |
|
1886 |
// The internal DTD subset is not serialized by the ToHTMLStream serializer |
|
1887 |
} |
|
1888 |
/** |
|
1889 |
* This method does nothing. |
|
1890 |
*/ |
|
1891 |
public void externalEntityDecl( |
|
1892 |
String name, |
|
1893 |
String publicId, |
|
1894 |
String systemId) |
|
1895 |
throws SAXException |
|
1896 |
{ |
|
1897 |
// The internal DTD subset is not serialized by the ToHTMLStream serializer |
|
1898 |
} |
|
1899 |
||
1900 |
/** |
|
1901 |
* This method is used to add an attribute to the currently open element. |
|
1902 |
* The caller has guaranted that this attribute is unique, which means that it |
|
1903 |
* not been seen before and will not be seen again. |
|
1904 |
* |
|
1905 |
* @param name the qualified name of the attribute |
|
1906 |
* @param value the value of the attribute which can contain only |
|
1907 |
* ASCII printable characters characters in the range 32 to 127 inclusive. |
|
1908 |
* @param flags the bit values of this integer give optimization information. |
|
1909 |
*/ |
|
1910 |
public void addUniqueAttribute(String name, String value, int flags) |
|
1911 |
throws SAXException |
|
1912 |
{ |
|
1913 |
try |
|
1914 |
{ |
|
1915 |
final java.io.Writer writer = m_writer; |
|
1916 |
if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt) |
|
1917 |
{ |
|
1918 |
// "flags" has indicated that the characters |
|
1919 |
// '>' '<' '&' and '"' are not in the value and |
|
1920 |
// m_htmlcharInfo has recorded that there are no other |
|
1921 |
// entities in the range 0 to 127 so we write out the |
|
1922 |
// value directly |
|
1923 |
writer.write(' '); |
|
1924 |
writer.write(name); |
|
1925 |
writer.write("=\""); |
|
1926 |
writer.write(value); |
|
1927 |
writer.write('"'); |
|
1928 |
} |
|
1929 |
else if ( |
|
1930 |
(flags & HTML_ATTREMPTY) > 0 |
|
1931 |
&& (value.length() == 0 || value.equalsIgnoreCase(name))) |
|
1932 |
{ |
|
1933 |
writer.write(' '); |
|
1934 |
writer.write(name); |
|
1935 |
} |
|
1936 |
else |
|
1937 |
{ |
|
1938 |
writer.write(' '); |
|
1939 |
writer.write(name); |
|
1940 |
writer.write("=\""); |
|
1941 |
if ((flags & HTML_ATTRURL) > 0) |
|
1942 |
{ |
|
1943 |
writeAttrURI(writer, value, m_specialEscapeURLs); |
|
1944 |
} |
|
1945 |
else |
|
1946 |
{ |
|
1947 |
writeAttrString(writer, value, this.getEncoding()); |
|
1948 |
} |
|
1949 |
writer.write('"'); |
|
1950 |
} |
|
1951 |
} catch (IOException e) { |
|
1952 |
throw new SAXException(e); |
|
1953 |
} |
|
1954 |
} |
|
1955 |
||
1956 |
public void comment(char ch[], int start, int length) |
|
1957 |
throws SAXException |
|
1958 |
{ |
|
1959 |
// The internal DTD subset is not serialized by the ToHTMLStream serializer |
|
1960 |
if (m_inDTD) |
|
1961 |
return; |
|
1962 |
super.comment(ch, start, length); |
|
1963 |
} |
|
1964 |
||
1965 |
public boolean reset() |
|
1966 |
{ |
|
1967 |
boolean ret = super.reset(); |
|
1968 |
if (!ret) |
|
1969 |
return false; |
|
1970 |
initToHTMLStream(); |
|
1971 |
return true; |
|
1972 |
} |
|
1973 |
||
1974 |
private void initToHTMLStream() |
|
1975 |
{ |
|
42805
857b5e6eef37
8087303: LSSerializer pretty print does not work anymore
fyuan
parents:
25868
diff
changeset
|
1976 |
m_isprevblock = false; |
6 | 1977 |
m_inDTD = false; |
1978 |
m_omitMetaTag = false; |
|
1979 |
m_specialEscapeURLs = true; |
|
1980 |
} |
|
1981 |
||
1982 |
static class Trie |
|
1983 |
{ |
|
1984 |
/** |
|
1985 |
* A digital search trie for 7-bit ASCII text |
|
1986 |
* The API is a subset of java.util.Hashtable |
|
1987 |
* The key must be a 7-bit ASCII string |
|
1988 |
* The value may be any Java Object |
|
1989 |
* One can get an object stored in a trie from its key, |
|
1990 |
* but the search is either case sensitive or case |
|
1991 |
* insensitive to the characters in the key, and this |
|
1992 |
* choice of sensitivity or insensitivity is made when |
|
1993 |
* the Trie is created, before any objects are put in it. |
|
1994 |
* |
|
1995 |
* This class is a copy of the one in com.sun.org.apache.xml.internal.utils. |
|
1996 |
* It exists to cut the serializers dependancy on that package. |
|
1997 |
* |
|
1998 |
* @xsl.usage internal |
|
1999 |
*/ |
|
2000 |
||
2001 |
/** Size of the m_nextChar array. */ |
|
2002 |
public static final int ALPHA_SIZE = 128; |
|
2003 |
||
2004 |
/** The root node of the tree. */ |
|
2005 |
final Node m_Root; |
|
2006 |
||
2007 |
/** helper buffer to convert Strings to char arrays */ |
|
2008 |
private char[] m_charBuffer = new char[0]; |
|
2009 |
||
2010 |
/** true if the search for an object is lower case only with the key */ |
|
2011 |
private final boolean m_lowerCaseOnly; |
|
2012 |
||
2013 |
/** |
|
2014 |
* Construct the trie that has a case insensitive search. |
|
2015 |
*/ |
|
2016 |
public Trie() |
|
2017 |
{ |
|
2018 |
m_Root = new Node(); |
|
2019 |
m_lowerCaseOnly = false; |
|
2020 |
} |
|
2021 |
||
2022 |
/** |
|
2023 |
* Construct the trie given the desired case sensitivity with the key. |
|
2024 |
* @param lowerCaseOnly true if the search keys are to be loser case only, |
|
2025 |
* not case insensitive. |
|
2026 |
*/ |
|
2027 |
public Trie(boolean lowerCaseOnly) |
|
2028 |
{ |
|
2029 |
m_Root = new Node(); |
|
2030 |
m_lowerCaseOnly = lowerCaseOnly; |
|
2031 |
} |
|
2032 |
||
2033 |
/** |
|
2034 |
* Put an object into the trie for lookup. |
|
2035 |
* |
|
2036 |
* @param key must be a 7-bit ASCII string |
|
2037 |
* @param value any java object. |
|
2038 |
* |
|
2039 |
* @return The old object that matched key, or null. |
|
2040 |
*/ |
|
2041 |
public Object put(String key, Object value) |
|
2042 |
{ |
|
2043 |
||
2044 |
final int len = key.length(); |
|
2045 |
if (len > m_charBuffer.length) |
|
2046 |
{ |
|
2047 |
// make the biggest buffer ever needed in get(String) |
|
2048 |
m_charBuffer = new char[len]; |
|
2049 |
} |
|
2050 |
||
2051 |
Node node = m_Root; |
|
2052 |
||
2053 |
for (int i = 0; i < len; i++) |
|
2054 |
{ |
|
2055 |
Node nextNode = |
|
2056 |
node.m_nextChar[Character.toLowerCase(key.charAt(i))]; |
|
2057 |
||
2058 |
if (nextNode != null) |
|
2059 |
{ |
|
2060 |
node = nextNode; |
|
2061 |
} |
|
2062 |
else |
|
2063 |
{ |
|
2064 |
for (; i < len; i++) |
|
2065 |
{ |
|
2066 |
Node newNode = new Node(); |
|
2067 |
if (m_lowerCaseOnly) |
|
2068 |
{ |
|
2069 |
// put this value into the tree only with a lower case key |
|
2070 |
node.m_nextChar[Character.toLowerCase( |
|
2071 |
key.charAt(i))] = |
|
2072 |
newNode; |
|
2073 |
} |
|
2074 |
else |
|
2075 |
{ |
|
2076 |
// put this value into the tree with a case insensitive key |
|
2077 |
node.m_nextChar[Character.toUpperCase( |
|
2078 |
key.charAt(i))] = |
|
2079 |
newNode; |
|
2080 |
node.m_nextChar[Character.toLowerCase( |
|
2081 |
key.charAt(i))] = |
|
2082 |
newNode; |
|
2083 |
} |
|
2084 |
node = newNode; |
|
2085 |
} |
|
2086 |
break; |
|
2087 |
} |
|
2088 |
} |
|
2089 |
||
2090 |
Object ret = node.m_Value; |
|
2091 |
||
2092 |
node.m_Value = value; |
|
2093 |
||
2094 |
return ret; |
|
2095 |
} |
|
2096 |
||
2097 |
/** |
|
2098 |
* Get an object that matches the key. |
|
2099 |
* |
|
2100 |
* @param key must be a 7-bit ASCII string |
|
2101 |
* |
|
2102 |
* @return The object that matches the key, or null. |
|
2103 |
*/ |
|
2104 |
public Object get(final String key) |
|
2105 |
{ |
|
2106 |
||
2107 |
final int len = key.length(); |
|
2108 |
||
2109 |
/* If the name is too long, we won't find it, this also keeps us |
|
2110 |
* from overflowing m_charBuffer |
|
2111 |
*/ |
|
2112 |
if (m_charBuffer.length < len) |
|
2113 |
return null; |
|
2114 |
||
2115 |
Node node = m_Root; |
|
2116 |
switch (len) // optimize the look up based on the number of chars |
|
2117 |
{ |
|
2118 |
// case 0 looks silly, but the generated bytecode runs |
|
2119 |
// faster for lookup of elements of length 2 with this in |
|
2120 |
// and a fair bit faster. Don't know why. |
|
2121 |
case 0 : |
|
2122 |
{ |
|
2123 |
return null; |
|
2124 |
} |
|
2125 |
||
2126 |
case 1 : |
|
2127 |
{ |
|
2128 |
final char ch = key.charAt(0); |
|
2129 |
if (ch < ALPHA_SIZE) |
|
2130 |
{ |
|
2131 |
node = node.m_nextChar[ch]; |
|
2132 |
if (node != null) |
|
2133 |
return node.m_Value; |
|
2134 |
} |
|
2135 |
return null; |
|
2136 |
} |
|
2137 |
// comment out case 2 because the default is faster |
|
2138 |
// case 2 : |
|
2139 |
// { |
|
2140 |
// final char ch0 = key.charAt(0); |
|
2141 |
// final char ch1 = key.charAt(1); |
|
2142 |
// if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE) |
|
2143 |
// { |
|
2144 |
// node = node.m_nextChar[ch0]; |
|
2145 |
// if (node != null) |
|
2146 |
// { |
|
2147 |
// |
|
2148 |
// if (ch1 < ALPHA_SIZE) |
|
2149 |
// { |
|
2150 |
// node = node.m_nextChar[ch1]; |
|
2151 |
// if (node != null) |
|
2152 |
// return node.m_Value; |
|
2153 |
// } |
|
2154 |
// } |
|
2155 |
// } |
|
2156 |
// return null; |
|
2157 |
// } |
|
2158 |
default : |
|
2159 |
{ |
|
2160 |
for (int i = 0; i < len; i++) |
|
2161 |
{ |
|
2162 |
// A thread-safe way to loop over the characters |
|
2163 |
final char ch = key.charAt(i); |
|
2164 |
if (ALPHA_SIZE <= ch) |
|
2165 |
{ |
|
2166 |
// the key is not 7-bit ASCII so we won't find it here |
|
2167 |
return null; |
|
2168 |
} |
|
2169 |
||
2170 |
node = node.m_nextChar[ch]; |
|
2171 |
if (node == null) |
|
2172 |
return null; |
|
2173 |
} |
|
2174 |
||
2175 |
return node.m_Value; |
|
2176 |
} |
|
2177 |
} |
|
2178 |
} |
|
2179 |
||
2180 |
/** |
|
2181 |
* The node representation for the trie. |
|
2182 |
* @xsl.usage internal |
|
2183 |
*/ |
|
2184 |
private class Node |
|
2185 |
{ |
|
2186 |
||
2187 |
/** |
|
2188 |
* Constructor, creates a Node[ALPHA_SIZE]. |
|
2189 |
*/ |
|
2190 |
Node() |
|
2191 |
{ |
|
2192 |
m_nextChar = new Node[ALPHA_SIZE]; |
|
2193 |
m_Value = null; |
|
2194 |
} |
|
2195 |
||
2196 |
/** The next nodes. */ |
|
2197 |
final Node m_nextChar[]; |
|
2198 |
||
2199 |
/** The value. */ |
|
2200 |
Object m_Value; |
|
2201 |
} |
|
2202 |
/** |
|
2203 |
* Construct the trie from another Trie. |
|
2204 |
* Both the existing Trie and this new one share the same table for |
|
2205 |
* lookup, and it is assumed that the table is fully populated and |
|
2206 |
* not changing anymore. |
|
2207 |
* |
|
2208 |
* @param existingTrie the Trie that this one is a copy of. |
|
2209 |
*/ |
|
2210 |
public Trie(Trie existingTrie) |
|
2211 |
{ |
|
2212 |
// copy some fields from the existing Trie into this one. |
|
2213 |
m_Root = existingTrie.m_Root; |
|
2214 |
m_lowerCaseOnly = existingTrie.m_lowerCaseOnly; |
|
2215 |
||
2216 |
// get a buffer just big enough to hold the longest key in the table. |
|
2217 |
int max = existingTrie.getLongestKeyLength(); |
|
2218 |
m_charBuffer = new char[max]; |
|
2219 |
} |
|
2220 |
||
2221 |
/** |
|
2222 |
* Get an object that matches the key. |
|
2223 |
* This method is faster than get(), but is not thread-safe. |
|
2224 |
* |
|
2225 |
* @param key must be a 7-bit ASCII string |
|
2226 |
* |
|
2227 |
* @return The object that matches the key, or null. |
|
2228 |
*/ |
|
2229 |
public Object get2(final String key) |
|
2230 |
{ |
|
2231 |
||
2232 |
final int len = key.length(); |
|
2233 |
||
2234 |
/* If the name is too long, we won't find it, this also keeps us |
|
2235 |
* from overflowing m_charBuffer |
|
2236 |
*/ |
|
2237 |
if (m_charBuffer.length < len) |
|
2238 |
return null; |
|
2239 |
||
2240 |
Node node = m_Root; |
|
2241 |
switch (len) // optimize the look up based on the number of chars |
|
2242 |
{ |
|
2243 |
// case 0 looks silly, but the generated bytecode runs |
|
2244 |
// faster for lookup of elements of length 2 with this in |
|
2245 |
// and a fair bit faster. Don't know why. |
|
2246 |
case 0 : |
|
2247 |
{ |
|
2248 |
return null; |
|
2249 |
} |
|
2250 |
||
2251 |
case 1 : |
|
2252 |
{ |
|
2253 |
final char ch = key.charAt(0); |
|
2254 |
if (ch < ALPHA_SIZE) |
|
2255 |
{ |
|
2256 |
node = node.m_nextChar[ch]; |
|
2257 |
if (node != null) |
|
2258 |
return node.m_Value; |
|
2259 |
} |
|
2260 |
return null; |
|
2261 |
} |
|
2262 |
default : |
|
2263 |
{ |
|
2264 |
/* Copy string into array. This is not thread-safe because |
|
2265 |
* it modifies the contents of m_charBuffer. If multiple |
|
2266 |
* threads were to use this Trie they all would be |
|
2267 |
* using this same array (not good). So this |
|
2268 |
* method is not thread-safe, but it is faster because |
|
2269 |
* converting to a char[] and looping over elements of |
|
2270 |
* the array is faster than a String's charAt(i). |
|
2271 |
*/ |
|
2272 |
key.getChars(0, len, m_charBuffer, 0); |
|
2273 |
||
2274 |
for (int i = 0; i < len; i++) |
|
2275 |
{ |
|
2276 |
final char ch = m_charBuffer[i]; |
|
2277 |
if (ALPHA_SIZE <= ch) |
|
2278 |
{ |
|
2279 |
// the key is not 7-bit ASCII so we won't find it here |
|
2280 |
return null; |
|
2281 |
} |
|
2282 |
||
2283 |
node = node.m_nextChar[ch]; |
|
2284 |
if (node == null) |
|
2285 |
return null; |
|
2286 |
} |
|
2287 |
||
2288 |
return node.m_Value; |
|
2289 |
} |
|
2290 |
} |
|
2291 |
} |
|
2292 |
||
2293 |
/** |
|
2294 |
* Get the length of the longest key used in the table. |
|
2295 |
*/ |
|
2296 |
public int getLongestKeyLength() |
|
2297 |
{ |
|
2298 |
return m_charBuffer.length; |
|
2299 |
} |
|
2300 |
} |
|
2301 |
} |