diff -r 16ba58282d11 -r a754d69d5e60 jaxp/src/share/classes/org/w3c/dom/ls/LSSerializer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jaxp/src/share/classes/org/w3c/dom/ls/LSSerializer.java Sun Mar 04 11:55:34 2012 -0800
@@ -0,0 +1,465 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * This file is available under and governed by the GNU General Public
+ * License version 2 only, as published by the Free Software Foundation.
+ * However, the following notice accompanied the original version of this
+ * file and, per its terms, should not be removed:
+ *
+ * Copyright (c) 2004 World Wide Web Consortium,
+ *
+ * (Massachusetts Institute of Technology, European Research Consortium for
+ * Informatics and Mathematics, Keio University). All Rights Reserved. This
+ * work is distributed under the W3C(r) Software License [1] in the hope that
+ * it will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * [1] http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
+ */
+
+package org.w3c.dom.ls;
+
+import org.w3c.dom.DOMConfiguration;
+import org.w3c.dom.Node;
+import org.w3c.dom.DOMException;
+
+/**
+ * A LSSerializer
provides an API for serializing (writing) a
+ * DOM document out into XML. The XML data is written to a string or an
+ * output stream. Any changes or fixups made during the serialization affect
+ * only the serialized data. The Document
object and its
+ * children are never altered by the serialization operation.
+ *
During serialization of XML data, namespace fixup is done as defined in [DOM Level 3 Core]
+ * , Appendix B. [DOM Level 2 Core]
+ * allows empty strings as a real namespace URI. If the
+ * namespaceURI
of a Node
is empty string, the
+ * serialization will treat them as null
, ignoring the prefix
+ * if any.
+ *
LSSerializer
accepts any node type for serialization. For
+ * nodes of type Document
or Entity
, well-formed
+ * XML will be created when possible (well-formedness is guaranteed if the
+ * document or entity comes from a parse operation and is unchanged since it
+ * was created). The serialized output for these node types is either as a
+ * XML document or an External XML Entity, respectively, and is acceptable
+ * input for an XML parser. For all other types of nodes the serialized form
+ * is implementation dependent.
+ *
Within a Document
, DocumentFragment
, or
+ * Entity
being serialized, Nodes
are processed as
+ * follows
+ *
Document
nodes are written, including the XML
+ * declaration (unless the parameter "xml-declaration" is set to
+ * false
) and a DTD subset, if one exists in the DOM. Writing a
+ * Document
node serializes the entire document.
+ * Entity
nodes, when written directly by
+ * LSSerializer.write
, outputs the entity expansion but no
+ * namespace fixup is done. The resulting output will be valid as an
+ * external entity.
+ * true
, EntityReference
nodes are
+ * serialized as an entity reference of the form "
+ * &entityName;
" in the output. Child nodes (the expansion)
+ * of the entity reference are ignored. If the parameter "
+ * entities" is set to false
, only the children of the entity reference
+ * are serialized. EntityReference
nodes with no children (no
+ * corresponding Entity
node or the corresponding
+ * Entity
nodes have no children) are always serialized.
+ * CDATAsections
containing content characters that cannot be
+ * represented in the specified output encoding are handled according to the
+ * "
+ * split-cdata-sections" parameter. If the parameter is set to true
,
+ * CDATAsections
are split, and the unrepresentable characters
+ * are serialized as numeric character references in ordinary content. The
+ * exact position and number of splits is not specified. If the parameter
+ * is set to false
, unrepresentable characters in a
+ * CDATAsection
are reported as
+ * "wf-invalid-character"
errors if the parameter "
+ * well-formed" is set to true
. The error is not recoverable - there is no
+ * mechanism for supplying alternative characters and continuing with the
+ * serialization.
+ * DocumentFragment
nodes are serialized by
+ * serializing the children of the document fragment in the order they
+ * appear in the document fragment.
+ * Note: The serialization of a Node
does not always
+ * generate a well-formed XML document, i.e. a LSParser
might
+ * throw fatal errors when parsing the resulting serialization.
+ *
Within the character data of a document (outside of markup), any + * characters that cannot be represented directly are replaced with + * character references. Occurrences of '<' and '&' are replaced by + * the predefined entities < and &. The other predefined + * entities (>, ', and ") might not be used, except + * where needed (e.g. using > in cases such as ']]>'). Any + * characters that cannot be represented directly in the output character + * encoding are serialized as numeric character references (and since + * character encoding standards commonly use hexadecimal representations of + * characters, using the hexadecimal representation when serializing + * character references is encouraged). + *
To allow attribute values to contain both single and double quotes, the + * apostrophe or single-quote character (') may be represented as + * "'", and the double-quote character (") as """. New + * line characters and other characters that cannot be represented directly + * in attribute values in the output character encoding are serialized as a + * numeric character reference. + *
Within markup, but outside of attributes, any occurrence of a character
+ * that cannot be represented in the output character encoding is reported
+ * as a DOMError
fatal error. An example would be serializing
+ * the element <LaCa\u00f1ada/> with encoding="us-ascii"
.
+ * This will result with a generation of a DOMError
+ * "wf-invalid-character-in-node-name" (as proposed in "
+ * well-formed").
+ *
When requested by setting the parameter "
+ * normalize-characters" on LSSerializer
to true, character normalization is
+ * performed according to the definition of fully
+ * normalized characters included in appendix E of [XML 1.1] on all
+ * data to be serialized, both markup and character data. The character
+ * normalization process affects only the data as it is being written; it
+ * does not alter the DOM's view of the document after serialization has
+ * completed.
+ *
Implementations are required to support the encodings "UTF-8",
+ * "UTF-16", "UTF-16BE", and "UTF-16LE" to guarantee that data is
+ * serializable in all encodings that are required to be supported by all
+ * XML parsers. When the encoding is UTF-8, whether or not a byte order mark
+ * is serialized, or if the output is big-endian or little-endian, is
+ * implementation dependent. When the encoding is UTF-16, whether or not the
+ * output is big-endian or little-endian is implementation dependent, but a
+ * Byte Order Mark must be generated for non-character outputs, such as
+ * LSOutput.byteStream
or LSOutput.systemId
. If
+ * the Byte Order Mark is not generated, a "byte-order-mark-needed" warning
+ * is reported. When the encoding is UTF-16LE or UTF-16BE, the output is
+ * big-endian (UTF-16BE) or little-endian (UTF-16LE) and the Byte Order Mark
+ * is not be generated. In all cases, the encoding declaration, if
+ * generated, will correspond to the encoding used during the serialization
+ * (e.g. encoding="UTF-16"
will appear if UTF-16 was
+ * requested).
+ *
Namespaces are fixed up during serialization, the serialization process + * will verify that namespace declarations, namespace prefixes and the + * namespace URI associated with elements and attributes are consistent. If + * inconsistencies are found, the serialized form of the document will be + * altered to remove them. The method used for doing the namespace fixup + * while serializing a document is the algorithm defined in Appendix B.1, + * "Namespace normalization", of [DOM Level 3 Core] + * . + *
While serializing a document, the parameter "discard-default-content" + * controls whether or not non-specified data is serialized. + *
While serializing, errors and warnings are reported to the application
+ * through the error handler (LSSerializer.domConfig
's "
+ * error-handler" parameter). This specification does in no way try to define all possible
+ * errors and warnings that can occur while serializing a DOM node, but some
+ * common error and warning cases are defined. The types (
+ * DOMError.type
) of errors and warnings defined by this
+ * specification are:
+ *
"no-output-specified" [fatal]
LSOutput
if no output is specified in the
+ * LSOutput
. "unbound-prefix-in-entity-reference" [fatal]
true
and an entity whose replacement text
+ * contains unbound namespace prefixes is referenced in a location where
+ * there are no bindings for the namespace prefixes. "unsupported-encoding" [fatal]
In addition to raising the defined errors and warnings, implementations + * are expected to raise implementation specific errors and warnings for any + * other error and warning cases such as IO errors (file not found, + * permission denied,...) and so on. + *
See also the Document Object Model (DOM) Level 3 Load
+and Save Specification.
+ */
+public interface LSSerializer {
+ /**
+ * The DOMConfiguration
object used by the
+ * LSSerializer
when serializing a DOM node.
+ *
In addition to the parameters recognized by the
+ * DOMConfiguration interface defined in [DOM Level 3 Core]
+ * , the DOMConfiguration
objects for
+ * LSSerializer
adds, or modifies, the following
+ * parameters:
+ *
"canonical-form"
true
true
will set the parameters
+ * "format-pretty-print", "discard-default-content", and "xml-declaration
+ * ", to false
. Setting one of those parameters to
+ * true
will set this parameter to false
.
+ * Serializing an XML 1.1 document when "canonical-form" is
+ * true
will generate a fatal error. false
"discard-default-content"
true
Attr.specified
attribute to decide what attributes
+ * should be discarded. Note that some implementations might use
+ * whatever information available to the implementation (i.e. XML
+ * schema, DTD, the Attr.specified
attribute, and so on) to
+ * determine what attributes and content to discard if this parameter is
+ * set to true
. false
"format-pretty-print"
true
false
"ignore-unknown-character-denormalizations"
true
"unknown-character-denormalization"
warning (instead of
+ * raising an error, if this parameter is not set) and ignore any
+ * possible denormalizations caused by these characters. false
"normalize-characters"
DOMConfiguration
in [DOM Level 3 Core]
+ * . Unlike in the Core, the default value for this parameter is
+ * true
. While DOM implementations are not required to
+ * support fully
+ * normalizing the characters in the document according to appendix E of [XML 1.1], this
+ * parameter must be activated by default if supported. "xml-declaration"
true
Document
, Element
, or Entity
+ * node is serialized, the XML declaration, or text declaration, should
+ * be included. The version (Document.xmlVersion
if the
+ * document is a Level 3 document and the version is non-null, otherwise
+ * use the value "1.0"), and the output encoding (see
+ * LSSerializer.write
for details on how to find the output
+ * encoding) are specified in the serialized XML declaration. false
"xml-declaration-needed"
warning if this will cause
+ * problems (i.e. the serialized data is of an XML version other than [XML 1.0], or an
+ * encoding would be needed to be able to re-parse the serialized data). null
will reset its
+ * value to the default value.
+ * null
will reset its
+ * value to the default value.
+ * DOMConfiguration
parameters have been applied. For
+ * example, CDATA sections won't be passed to the filter if "
+ * cdata-sections" is set to false
.
+ */
+ public LSSerializerFilter getFilter();
+ /**
+ * When the application provides a filter, the serializer will call out
+ * to the filter before serializing each Node. The filter implementation
+ * can choose to remove the node from the stream or to terminate the
+ * serialization early.
+ * DOMConfiguration
parameters have been applied. For
+ * example, CDATA sections won't be passed to the filter if "
+ * cdata-sections" is set to false
.
+ */
+ public void setFilter(LSSerializerFilter filter);
+
+ /**
+ * Serialize the specified node as described above in the general
+ * description of the LSSerializer
interface. The output is
+ * written to the supplied LSOutput
.
+ * LSOutput
, the encoding is found by
+ * looking at the encoding information that is reachable through the
+ * LSOutput
and the item to be written (or its owner
+ * document) in this order:
+ * LSOutput.encoding
,
+ * Document.inputEncoding
,
+ * Document.xmlEncoding
.
+ * LSOutput
, a
+ * "no-output-specified" fatal error is raised.
+ * true
if node
was
+ * successfully serialized. Return false
in case the
+ * normal processing stopped but the implementation kept serializing
+ * the document; the result of the serialization being implementation
+ * dependent then.
+ * @exception LSException
+ * SERIALIZE_ERR: Raised if the LSSerializer
was unable to
+ * serialize the node. DOM applications should attach a
+ * DOMErrorHandler
using the parameter "
+ * error-handler" if they wish to get details on the error.
+ */
+ public boolean write(Node nodeArg,
+ LSOutput destination)
+ throws LSException;
+
+ /**
+ * A convenience method that acts as if LSSerializer.write
+ * was called with a LSOutput
with no encoding specified
+ * and LSOutput.systemId
set to the uri
+ * argument.
+ * @param nodeArg The node to serialize.
+ * @param uri The URI to write to.
+ * @return Returns true
if node
was
+ * successfully serialized. Return false
in case the
+ * normal processing stopped but the implementation kept serializing
+ * the document; the result of the serialization being implementation
+ * dependent then.
+ * @exception LSException
+ * SERIALIZE_ERR: Raised if the LSSerializer
was unable to
+ * serialize the node. DOM applications should attach a
+ * DOMErrorHandler
using the parameter "
+ * error-handler" if they wish to get details on the error.
+ */
+ public boolean writeToURI(Node nodeArg,
+ String uri)
+ throws LSException;
+
+ /**
+ * Serialize the specified node as described above in the general
+ * description of the LSSerializer
interface. The output is
+ * written to a DOMString
that is returned to the caller.
+ * The encoding used is the encoding of the DOMString
type,
+ * i.e. UTF-16. Note that no Byte Order Mark is generated in a
+ * DOMString
object.
+ * @param nodeArg The node to serialize.
+ * @return Returns the serialized data.
+ * @exception DOMException
+ * DOMSTRING_SIZE_ERR: Raised if the resulting string is too long to
+ * fit in a DOMString
.
+ * @exception LSException
+ * SERIALIZE_ERR: Raised if the LSSerializer
was unable to
+ * serialize the node. DOM applications should attach a
+ * DOMErrorHandler
using the parameter "
+ * error-handler" if they wish to get details on the error.
+ */
+ public String writeToString(Node nodeArg)
+ throws DOMException, LSException;
+
+}