6
|
1 |
/*
|
|
2 |
* reserved comment block
|
|
3 |
* DO NOT REMOVE OR ALTER!
|
|
4 |
*/
|
|
5 |
/*
|
|
6 |
* Copyright 1999-2002,2004 The Apache Software Foundation.
|
|
7 |
*
|
|
8 |
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
9 |
* you may not use this file except in compliance with the License.
|
|
10 |
* You may obtain a copy of the License at
|
|
11 |
*
|
|
12 |
* http://www.apache.org/licenses/LICENSE-2.0
|
|
13 |
*
|
|
14 |
* Unless required by applicable law or agreed to in writing, software
|
|
15 |
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
16 |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
17 |
* See the License for the specific language governing permissions and
|
|
18 |
* limitations under the License.
|
|
19 |
*/
|
|
20 |
|
|
21 |
|
|
22 |
// Sep 14, 2000:
|
|
23 |
// Fixed serializer to report IO exception directly, instead at
|
|
24 |
// the end of document processing.
|
|
25 |
// Reported by Patrick Higgins <phiggins@transzap.com>
|
|
26 |
|
|
27 |
|
|
28 |
package com.sun.org.apache.xml.internal.serialize;
|
|
29 |
|
|
30 |
|
|
31 |
import java.io.IOException;
|
|
32 |
|
|
33 |
import org.w3c.dom.Element;
|
|
34 |
import org.w3c.dom.Node;
|
|
35 |
import org.xml.sax.AttributeList;
|
|
36 |
import org.xml.sax.Attributes;
|
|
37 |
import org.xml.sax.SAXException;
|
|
38 |
|
|
39 |
|
|
40 |
/**
|
|
41 |
* Implements a text serializer supporting both DOM and SAX
|
|
42 |
* serializing. For usage instructions see {@link Serializer}.
|
|
43 |
* <p>
|
|
44 |
* If an output stream is used, the encoding is taken from the
|
|
45 |
* output format (defaults to <tt>UTF-8</tt>). If a writer is
|
|
46 |
* used, make sure the writer uses the same encoding (if applies)
|
|
47 |
* as specified in the output format.
|
|
48 |
* <p>
|
|
49 |
* The serializer supports both DOM and SAX. DOM serializing is done
|
|
50 |
* by calling {@link #serialize} and SAX serializing is done by firing
|
|
51 |
* SAX events and using the serializer as a document handler.
|
|
52 |
* <p>
|
|
53 |
* If an I/O exception occurs while serializing, the serializer
|
|
54 |
* will not throw an exception directly, but only throw it
|
|
55 |
* at the end of serializing (either DOM or SAX's {@link
|
|
56 |
* org.xml.sax.DocumentHandler#endDocument}.
|
|
57 |
*
|
|
58 |
*
|
|
59 |
* @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
|
|
60 |
* @see Serializer
|
|
61 |
*/
|
|
62 |
public class TextSerializer
|
|
63 |
extends BaseMarkupSerializer
|
|
64 |
{
|
|
65 |
|
|
66 |
|
|
67 |
/**
|
|
68 |
* Constructs a new serializer. The serializer cannot be used without
|
|
69 |
* calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
|
|
70 |
* first.
|
|
71 |
*/
|
|
72 |
public TextSerializer()
|
|
73 |
{
|
|
74 |
super( new OutputFormat( Method.TEXT, null, false ) );
|
|
75 |
}
|
|
76 |
|
|
77 |
|
|
78 |
public void setOutputFormat( OutputFormat format )
|
|
79 |
{
|
|
80 |
super.setOutputFormat( format != null ? format : new OutputFormat( Method.TEXT, null, false ) );
|
|
81 |
}
|
|
82 |
|
|
83 |
|
|
84 |
//-----------------------------------------//
|
|
85 |
// SAX content handler serializing methods //
|
|
86 |
//-----------------------------------------//
|
|
87 |
|
|
88 |
|
|
89 |
public void startElement( String namespaceURI, String localName,
|
|
90 |
String rawName, Attributes attrs )
|
|
91 |
throws SAXException
|
|
92 |
{
|
|
93 |
startElement( rawName == null ? localName : rawName, null );
|
|
94 |
}
|
|
95 |
|
|
96 |
|
|
97 |
public void endElement( String namespaceURI, String localName,
|
|
98 |
String rawName )
|
|
99 |
throws SAXException
|
|
100 |
{
|
|
101 |
endElement( rawName == null ? localName : rawName );
|
|
102 |
}
|
|
103 |
|
|
104 |
|
|
105 |
//------------------------------------------//
|
|
106 |
// SAX document handler serializing methods //
|
|
107 |
//------------------------------000---------//
|
|
108 |
|
|
109 |
|
|
110 |
public void startElement( String tagName, AttributeList attrs )
|
|
111 |
throws SAXException
|
|
112 |
{
|
|
113 |
boolean preserveSpace;
|
|
114 |
ElementState state;
|
|
115 |
|
|
116 |
try {
|
|
117 |
state = getElementState();
|
|
118 |
if ( isDocumentState() ) {
|
|
119 |
// If this is the root element handle it differently.
|
|
120 |
// If the first root element in the document, serialize
|
|
121 |
// the document's DOCTYPE. Space preserving defaults
|
|
122 |
// to that of the output format.
|
|
123 |
if ( ! _started )
|
|
124 |
startDocument( tagName );
|
|
125 |
}
|
|
126 |
// For any other element, if first in parent, then
|
|
127 |
// use the parnet's space preserving.
|
|
128 |
preserveSpace = state.preserveSpace;
|
|
129 |
|
|
130 |
// Do not change the current element state yet.
|
|
131 |
// This only happens in endElement().
|
|
132 |
|
|
133 |
// Ignore all other attributes of the element, only printing
|
|
134 |
// its contents.
|
|
135 |
|
|
136 |
// Now it's time to enter a new element state
|
|
137 |
// with the tag name and space preserving.
|
|
138 |
// We still do not change the curent element state.
|
|
139 |
state = enterElementState( null, null, tagName, preserveSpace );
|
|
140 |
} catch ( IOException except ) {
|
|
141 |
throw new SAXException( except );
|
|
142 |
}
|
|
143 |
}
|
|
144 |
|
|
145 |
|
|
146 |
public void endElement( String tagName )
|
|
147 |
throws SAXException
|
|
148 |
{
|
|
149 |
try {
|
|
150 |
endElementIO( tagName );
|
|
151 |
} catch ( IOException except ) {
|
|
152 |
throw new SAXException( except );
|
|
153 |
}
|
|
154 |
}
|
|
155 |
|
|
156 |
|
|
157 |
public void endElementIO( String tagName )
|
|
158 |
throws IOException
|
|
159 |
{
|
|
160 |
ElementState state;
|
|
161 |
|
|
162 |
// Works much like content() with additions for closing
|
|
163 |
// an element. Note the different checks for the closed
|
|
164 |
// element's state and the parent element's state.
|
|
165 |
state = getElementState();
|
|
166 |
// Leave the element state and update that of the parent
|
|
167 |
// (if we're not root) to not empty and after element.
|
|
168 |
state = leaveElementState();
|
|
169 |
state.afterElement = true;
|
|
170 |
state.empty = false;
|
|
171 |
if ( isDocumentState() )
|
|
172 |
_printer.flush();
|
|
173 |
}
|
|
174 |
|
|
175 |
|
|
176 |
public void processingInstructionIO( String target, String code ) throws IOException
|
|
177 |
{
|
|
178 |
}
|
|
179 |
|
|
180 |
|
|
181 |
public void comment( String text )
|
|
182 |
{
|
|
183 |
}
|
|
184 |
|
|
185 |
|
|
186 |
public void comment( char[] chars, int start, int length )
|
|
187 |
{
|
|
188 |
}
|
|
189 |
|
|
190 |
|
|
191 |
public void characters( char[] chars, int start, int length )
|
|
192 |
throws SAXException
|
|
193 |
{
|
|
194 |
ElementState state;
|
|
195 |
|
|
196 |
try {
|
|
197 |
state = content();
|
|
198 |
state.doCData = state.inCData = false;
|
|
199 |
printText( chars, start, length, true, true );
|
|
200 |
} catch ( IOException except ) {
|
|
201 |
throw new SAXException( except );
|
|
202 |
}
|
|
203 |
}
|
|
204 |
|
|
205 |
|
|
206 |
protected void characters( String text, boolean unescaped )
|
|
207 |
throws IOException
|
|
208 |
{
|
|
209 |
ElementState state;
|
|
210 |
|
|
211 |
state = content();
|
|
212 |
state.doCData = state.inCData = false;
|
|
213 |
printText( text, true, true );
|
|
214 |
}
|
|
215 |
|
|
216 |
|
|
217 |
//------------------------------------------//
|
|
218 |
// Generic node serializing methods methods //
|
|
219 |
//------------------------------------------//
|
|
220 |
|
|
221 |
|
|
222 |
/**
|
|
223 |
* Called to serialize the document's DOCTYPE by the root element.
|
|
224 |
* <p>
|
|
225 |
* This method will check if it has not been called before ({@link #_started}),
|
|
226 |
* will serialize the document type declaration, and will serialize all
|
|
227 |
* pre-root comments and PIs that were accumulated in the document
|
|
228 |
* (see {@link #serializePreRoot}). Pre-root will be serialized even if
|
|
229 |
* this is not the first root element of the document.
|
|
230 |
*/
|
|
231 |
protected void startDocument( String rootTagName )
|
|
232 |
throws IOException
|
|
233 |
{
|
|
234 |
// Required to stop processing the DTD, even though the DTD
|
|
235 |
// is not printed.
|
|
236 |
_printer.leaveDTD();
|
|
237 |
|
|
238 |
_started = true;
|
|
239 |
// Always serialize these, even if not te first root element.
|
|
240 |
serializePreRoot();
|
|
241 |
}
|
|
242 |
|
|
243 |
|
|
244 |
/**
|
|
245 |
* Called to serialize a DOM element. Equivalent to calling {@link
|
|
246 |
* #startElement}, {@link #endElement} and serializing everything
|
|
247 |
* inbetween, but better optimized.
|
|
248 |
*/
|
|
249 |
protected void serializeElement( Element elem )
|
|
250 |
throws IOException
|
|
251 |
{
|
|
252 |
Node child;
|
|
253 |
ElementState state;
|
|
254 |
boolean preserveSpace;
|
|
255 |
String tagName;
|
|
256 |
|
|
257 |
tagName = elem.getTagName();
|
|
258 |
state = getElementState();
|
|
259 |
if ( isDocumentState() ) {
|
|
260 |
// If this is the root element handle it differently.
|
|
261 |
// If the first root element in the document, serialize
|
|
262 |
// the document's DOCTYPE. Space preserving defaults
|
|
263 |
// to that of the output format.
|
|
264 |
if ( ! _started )
|
|
265 |
startDocument( tagName );
|
|
266 |
}
|
|
267 |
// For any other element, if first in parent, then
|
|
268 |
// use the parnet's space preserving.
|
|
269 |
preserveSpace = state.preserveSpace;
|
|
270 |
|
|
271 |
// Do not change the current element state yet.
|
|
272 |
// This only happens in endElement().
|
|
273 |
|
|
274 |
// Ignore all other attributes of the element, only printing
|
|
275 |
// its contents.
|
|
276 |
|
|
277 |
// If element has children, then serialize them, otherwise
|
|
278 |
// serialize en empty tag.
|
|
279 |
if ( elem.hasChildNodes() ) {
|
|
280 |
// Enter an element state, and serialize the children
|
|
281 |
// one by one. Finally, end the element.
|
|
282 |
state = enterElementState( null, null, tagName, preserveSpace );
|
|
283 |
child = elem.getFirstChild();
|
|
284 |
while ( child != null ) {
|
|
285 |
serializeNode( child );
|
|
286 |
child = child.getNextSibling();
|
|
287 |
}
|
|
288 |
endElementIO( tagName );
|
|
289 |
} else {
|
|
290 |
if ( ! isDocumentState() ) {
|
|
291 |
// After element but parent element is no longer empty.
|
|
292 |
state.afterElement = true;
|
|
293 |
state.empty = false;
|
|
294 |
}
|
|
295 |
}
|
|
296 |
}
|
|
297 |
|
|
298 |
|
|
299 |
/**
|
|
300 |
* Serialize the DOM node. This method is unique to the Text serializer.
|
|
301 |
*
|
|
302 |
* @param node The node to serialize
|
|
303 |
*/
|
|
304 |
protected void serializeNode( Node node )
|
|
305 |
throws IOException
|
|
306 |
{
|
|
307 |
// Based on the node type call the suitable SAX handler.
|
|
308 |
// Only comments entities and documents which are not
|
|
309 |
// handled by SAX are serialized directly.
|
|
310 |
switch ( node.getNodeType() ) {
|
|
311 |
case Node.TEXT_NODE : {
|
|
312 |
String text;
|
|
313 |
|
|
314 |
text = node.getNodeValue();
|
|
315 |
if ( text != null )
|
|
316 |
characters( node.getNodeValue(), true );
|
|
317 |
break;
|
|
318 |
}
|
|
319 |
|
|
320 |
case Node.CDATA_SECTION_NODE : {
|
|
321 |
String text;
|
|
322 |
|
|
323 |
text = node.getNodeValue();
|
|
324 |
if ( text != null )
|
|
325 |
characters( node.getNodeValue(), true );
|
|
326 |
break;
|
|
327 |
}
|
|
328 |
|
|
329 |
case Node.COMMENT_NODE :
|
|
330 |
break;
|
|
331 |
|
|
332 |
case Node.ENTITY_REFERENCE_NODE :
|
|
333 |
// Ignore.
|
|
334 |
break;
|
|
335 |
|
|
336 |
case Node.PROCESSING_INSTRUCTION_NODE :
|
|
337 |
break;
|
|
338 |
|
|
339 |
case Node.ELEMENT_NODE :
|
|
340 |
serializeElement( (Element) node );
|
|
341 |
break;
|
|
342 |
|
|
343 |
case Node.DOCUMENT_NODE :
|
|
344 |
// !!! Fall through
|
|
345 |
case Node.DOCUMENT_FRAGMENT_NODE : {
|
|
346 |
Node child;
|
|
347 |
|
|
348 |
// By definition this will happen if the node is a document,
|
|
349 |
// document fragment, etc. Just serialize its contents. It will
|
|
350 |
// work well for other nodes that we do not know how to serialize.
|
|
351 |
child = node.getFirstChild();
|
|
352 |
while ( child != null ) {
|
|
353 |
serializeNode( child );
|
|
354 |
child = child.getNextSibling();
|
|
355 |
}
|
|
356 |
break;
|
|
357 |
}
|
|
358 |
|
|
359 |
default:
|
|
360 |
break;
|
|
361 |
}
|
|
362 |
}
|
|
363 |
|
|
364 |
|
|
365 |
protected ElementState content()
|
|
366 |
{
|
|
367 |
ElementState state;
|
|
368 |
|
|
369 |
state = getElementState();
|
|
370 |
if ( ! isDocumentState() ) {
|
|
371 |
// If this is the first content in the element,
|
|
372 |
// change the state to not-empty.
|
|
373 |
if ( state.empty )
|
|
374 |
state.empty = false;
|
|
375 |
// Except for one content type, all of them
|
|
376 |
// are not last element. That one content
|
|
377 |
// type will take care of itself.
|
|
378 |
state.afterElement = false;
|
|
379 |
}
|
|
380 |
return state;
|
|
381 |
}
|
|
382 |
|
|
383 |
|
|
384 |
protected String getEntityRef( int ch )
|
|
385 |
{
|
|
386 |
return null;
|
|
387 |
}
|
|
388 |
|
|
389 |
|
|
390 |
}
|