jaxp/src/com/sun/org/apache/xml/internal/serialize/OutputFormat.java
changeset 12457 c348e06f0e82
parent 6 7f561c08de6b
child 25834 aba3efbf4ec5
equal deleted inserted replaced
12324:1d7e6da6adc8 12457:c348e06f0e82
       
     1 /*
       
     2  * reserved comment block
       
     3  * DO NOT REMOVE OR ALTER!
       
     4  */
       
     5 /*
       
     6  * Copyright 1999-2002,2004 The Apache Software Foundation.
       
     7  *
       
     8  * Licensed under the Apache License, Version 2.0 (the "License");
       
     9  * you may not use this file except in compliance with the License.
       
    10  * You may obtain a copy of the License at
       
    11  *
       
    12  *      http://www.apache.org/licenses/LICENSE-2.0
       
    13  *
       
    14  * Unless required by applicable law or agreed to in writing, software
       
    15  * distributed under the License is distributed on an "AS IS" BASIS,
       
    16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
       
    17  * See the License for the specific language governing permissions and
       
    18  * limitations under the License.
       
    19  */
       
    20 
       
    21 
       
    22 // Aug 21, 2000:
       
    23 //  Added ability to omit DOCTYPE declaration.
       
    24 //  Reported by Lars Martin <lars@smb-tec.com>
       
    25 // Aug 25, 2000:
       
    26 //  Added ability to omit comments.
       
    27 //  Contributed by Anupam Bagchi <abagchi@jtcsv.com>
       
    28 
       
    29 
       
    30 package com.sun.org.apache.xml.internal.serialize;
       
    31 
       
    32 
       
    33 import java.io.UnsupportedEncodingException;
       
    34 
       
    35 import org.w3c.dom.Document;
       
    36 import org.w3c.dom.DocumentType;
       
    37 import org.w3c.dom.Node;
       
    38 import org.w3c.dom.html.HTMLDocument;
       
    39 
       
    40 
       
    41 /**
       
    42  * Specifies an output format to control the serializer. Based on the
       
    43  * XSLT specification for output format, plus additional parameters.
       
    44  * Used to select the suitable serializer and determine how the
       
    45  * document should be formatted on output.
       
    46  * <p>
       
    47  * The two interesting constructors are:
       
    48  * <ul>
       
    49  * <li>{@link #OutputFormat(String,String,boolean)} creates a format
       
    50  *  for the specified method (XML, HTML, Text, etc), encoding and indentation
       
    51  * <li>{@link #OutputFormat(Document,String,boolean)} creates a format
       
    52  *  compatible with the document type (XML, HTML, Text, etc), encoding and
       
    53  *  indentation
       
    54  * </ul>
       
    55  *
       
    56  *
       
    57  * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
       
    58  *         <a href="mailto:visco@intalio.com">Keith Visco</a>
       
    59  * @see Serializer
       
    60  * @see Method
       
    61  * @see LineSeparator
       
    62  */
       
    63 public class OutputFormat
       
    64 {
       
    65 
       
    66 
       
    67     public static class DTD
       
    68     {
       
    69 
       
    70         /**
       
    71          * Public identifier for HTML 4.01 (Strict) document type.
       
    72          */
       
    73         public static final String HTMLPublicId = "-//W3C//DTD HTML 4.01//EN";
       
    74 
       
    75         /**
       
    76          * System identifier for HTML 4.01 (Strict) document type.
       
    77          */
       
    78         public static final String HTMLSystemId =
       
    79             "http://www.w3.org/TR/html4/strict.dtd";
       
    80 
       
    81         /**
       
    82          * Public identifier for XHTML 1.0 (Strict) document type.
       
    83          */
       
    84         public static final String XHTMLPublicId =
       
    85             "-//W3C//DTD XHTML 1.0 Strict//EN";
       
    86 
       
    87         /**
       
    88          * System identifier for XHTML 1.0 (Strict) document type.
       
    89          */
       
    90         public static final String XHTMLSystemId =
       
    91             "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
       
    92 
       
    93     }
       
    94 
       
    95 
       
    96     public static class Defaults
       
    97     {
       
    98 
       
    99         /**
       
   100          * If indentation is turned on, the default identation
       
   101          * level is 4.
       
   102          *
       
   103          * @see #setIndenting(boolean)
       
   104          */
       
   105         public static final int Indent = 4;
       
   106 
       
   107         /**
       
   108          * The default encoding for Web documents it UTF-8.
       
   109          *
       
   110          * @see #getEncoding()
       
   111          */
       
   112         public static final String Encoding = "UTF-8";
       
   113 
       
   114         /**
       
   115          * The default line width at which to break long lines
       
   116          * when identing. This is set to 72.
       
   117          */
       
   118         public static final int LineWidth = 72;
       
   119 
       
   120     }
       
   121 
       
   122 
       
   123     /**
       
   124      * Holds the output method specified for this document,
       
   125      * or null if no method was specified.
       
   126      */
       
   127     private String _method;
       
   128 
       
   129 
       
   130     /**
       
   131      * Specifies the version of the output method.
       
   132      */
       
   133     private String _version;
       
   134 
       
   135 
       
   136     /**
       
   137      * The indentation level, or zero if no indentation
       
   138      * was requested.
       
   139      */
       
   140     private int _indent = 0;
       
   141 
       
   142 
       
   143     /**
       
   144      * The encoding to use, if an input stream is used.
       
   145      * The default is always UTF-8.
       
   146      */
       
   147     private String _encoding = Defaults.Encoding;
       
   148 
       
   149     /**
       
   150      * The EncodingInfo instance for _encoding.
       
   151      */
       
   152     private EncodingInfo _encodingInfo = null;
       
   153 
       
   154     // whether java names for encodings are permitted
       
   155     private boolean _allowJavaNames = false;
       
   156 
       
   157     /**
       
   158      * The specified media type or null.
       
   159      */
       
   160     private String _mediaType;
       
   161 
       
   162 
       
   163     /**
       
   164      * The specified document type system identifier, or null.
       
   165      */
       
   166     private String _doctypeSystem;
       
   167 
       
   168 
       
   169     /**
       
   170      * The specified document type public identifier, or null.
       
   171      */
       
   172     private String _doctypePublic;
       
   173 
       
   174 
       
   175     /**
       
   176      * Ture if the XML declaration should be ommited;
       
   177      */
       
   178     private boolean _omitXmlDeclaration = false;
       
   179 
       
   180 
       
   181     /**
       
   182      * Ture if the DOCTYPE declaration should be ommited;
       
   183      */
       
   184     private boolean _omitDoctype = false;
       
   185 
       
   186 
       
   187     /**
       
   188      * Ture if comments should be ommited;
       
   189      */
       
   190     private boolean _omitComments = false;
       
   191 
       
   192 
       
   193     /**
       
   194      * Ture if the comments should be ommited;
       
   195      */
       
   196     private boolean _stripComments = false;
       
   197 
       
   198 
       
   199     /**
       
   200      * True if the document type should be marked as standalone.
       
   201      */
       
   202     private boolean _standalone = false;
       
   203 
       
   204 
       
   205     /**
       
   206      * List of element tag names whose text node children must
       
   207      * be output as CDATA.
       
   208      */
       
   209     private String[] _cdataElements;
       
   210 
       
   211 
       
   212     /**
       
   213      * List of element tag names whose text node children must
       
   214      * be output unescaped.
       
   215      */
       
   216     private String[] _nonEscapingElements;
       
   217 
       
   218 
       
   219     /**
       
   220      * The selected line separator.
       
   221      */
       
   222     private String _lineSeparator = LineSeparator.Web;
       
   223 
       
   224 
       
   225     /**
       
   226      * The line width at which to wrap long lines when indenting.
       
   227      */
       
   228     private int _lineWidth = Defaults.LineWidth;
       
   229 
       
   230 
       
   231     /**
       
   232      * True if spaces should be preserved in elements that do not
       
   233      * specify otherwise, or specify the default behavior.
       
   234      */
       
   235     private boolean _preserve = false;
       
   236         /** If true, an empty string valued attribute is output as "". If false and
       
   237          * and we are using the HTMLSerializer, then only the attribute name is
       
   238          * serialized. Defaults to false for backwards compatibility.
       
   239          */
       
   240         private boolean _preserveEmptyAttributes = false;
       
   241 
       
   242     /**
       
   243      * Constructs a new output format with the default values.
       
   244      */
       
   245     public OutputFormat()
       
   246     {
       
   247     }
       
   248 
       
   249 
       
   250     /**
       
   251      * Constructs a new output format with the default values for
       
   252      * the specified method and encoding. If <tt>indent</tt>
       
   253      * is true, the document will be pretty printed with the default
       
   254      * indentation level and default line wrapping.
       
   255      *
       
   256      * @param method The specified output method
       
   257      * @param encoding The specified encoding
       
   258      * @param indenting True for pretty printing
       
   259      * @see #setEncoding
       
   260      * @see #setIndenting
       
   261      * @see #setMethod
       
   262      */
       
   263     public OutputFormat( String method, String encoding, boolean indenting )
       
   264     {
       
   265         setMethod( method );
       
   266         setEncoding( encoding );
       
   267         setIndenting( indenting );
       
   268     }
       
   269 
       
   270 
       
   271     /**
       
   272      * Constructs a new output format with the proper method,
       
   273      * document type identifiers and media type for the specified
       
   274      * document.
       
   275      *
       
   276      * @param doc The document to output
       
   277      * @see #whichMethod
       
   278      */
       
   279     public OutputFormat( Document doc )
       
   280     {
       
   281         setMethod( whichMethod( doc ) );
       
   282         setDoctype( whichDoctypePublic( doc ), whichDoctypeSystem( doc ) );
       
   283         setMediaType( whichMediaType( getMethod() ) );
       
   284     }
       
   285 
       
   286 
       
   287     /**
       
   288      * Constructs a new output format with the proper method,
       
   289      * document type identifiers and media type for the specified
       
   290      * document, and with the specified encoding. If <tt>indent</tt>
       
   291      * is true, the document will be pretty printed with the default
       
   292      * indentation level and default line wrapping.
       
   293      *
       
   294      * @param doc The document to output
       
   295      * @param encoding The specified encoding
       
   296      * @param indenting True for pretty printing
       
   297      * @see #setEncoding
       
   298      * @see #setIndenting
       
   299      * @see #whichMethod
       
   300      */
       
   301     public OutputFormat( Document doc, String encoding, boolean indenting )
       
   302     {
       
   303         this( doc );
       
   304         setEncoding( encoding );
       
   305         setIndenting( indenting );
       
   306     }
       
   307 
       
   308 
       
   309     /**
       
   310      * Returns the method specified for this output format.
       
   311      * Typically the method will be <tt>xml</tt>, <tt>html</tt>
       
   312      * or <tt>text</tt>, but it might be other values.
       
   313      * If no method was specified, null will be returned
       
   314      * and the most suitable method will be determined for
       
   315      * the document by calling {@link #whichMethod}.
       
   316      *
       
   317      * @return The specified output method, or null
       
   318      */
       
   319     public String getMethod()
       
   320     {
       
   321         return _method;
       
   322     }
       
   323 
       
   324 
       
   325     /**
       
   326      * Sets the method for this output format.
       
   327      *
       
   328      * @see #getMethod
       
   329      * @param method The output method, or null
       
   330      */
       
   331     public void setMethod( String method )
       
   332     {
       
   333         _method = method;
       
   334     }
       
   335 
       
   336 
       
   337     /**
       
   338      * Returns the version for this output method.
       
   339      * If no version was specified, will return null
       
   340      * and the default version number will be used.
       
   341      * If the serializerr does not support that particular
       
   342      * version, it should default to a supported version.
       
   343      *
       
   344      * @return The specified method version, or null
       
   345      */
       
   346     public String getVersion()
       
   347     {
       
   348         return _version;
       
   349     }
       
   350 
       
   351 
       
   352     /**
       
   353      * Sets the version for this output method.
       
   354      * For XML the value would be "1.0", for HTML
       
   355      * it would be "4.0".
       
   356      *
       
   357      * @see #getVersion
       
   358      * @param version The output method version, or null
       
   359      */
       
   360     public void setVersion( String version )
       
   361     {
       
   362         _version = version;
       
   363     }
       
   364 
       
   365 
       
   366     /**
       
   367      * Returns the indentation specified. If no indentation
       
   368      * was specified, zero is returned and the document
       
   369      * should not be indented.
       
   370      *
       
   371      * @return The indentation or zero
       
   372      * @see #setIndenting
       
   373      */
       
   374     public int getIndent()
       
   375     {
       
   376         return _indent;
       
   377     }
       
   378 
       
   379 
       
   380     /**
       
   381      * Returns true if indentation was specified.
       
   382      */
       
   383     public boolean getIndenting()
       
   384     {
       
   385         return ( _indent > 0 );
       
   386     }
       
   387 
       
   388 
       
   389     /**
       
   390      * Sets the indentation. The document will not be
       
   391      * indented if the indentation is set to zero.
       
   392      * Calling {@link #setIndenting} will reset this
       
   393      * value to zero (off) or the default (on).
       
   394      *
       
   395      * @param indent The indentation, or zero
       
   396      */
       
   397     public void setIndent( int indent )
       
   398     {
       
   399         if ( indent < 0 )
       
   400             _indent = 0;
       
   401         else
       
   402             _indent = indent;
       
   403     }
       
   404 
       
   405 
       
   406     /**
       
   407      * Sets the indentation on and off. When set on, the default
       
   408      * indentation level and default line wrapping is used
       
   409      * (see {@link Defaults#Indent} and {@link Defaults#LineWidth}).
       
   410      * To specify a different indentation level or line wrapping,
       
   411      * use {@link #setIndent} and {@link #setLineWidth}.
       
   412      *
       
   413      * @param on True if indentation should be on
       
   414      */
       
   415     public void setIndenting( boolean on )
       
   416     {
       
   417         if ( on ) {
       
   418             _indent = Defaults.Indent;
       
   419             _lineWidth = Defaults.LineWidth;
       
   420         } else {
       
   421             _indent = 0;
       
   422             _lineWidth = 0;
       
   423         }
       
   424     }
       
   425 
       
   426 
       
   427     /**
       
   428      * Returns the specified encoding. If no encoding was
       
   429      * specified, the default is always "UTF-8".
       
   430      *
       
   431      * @return The encoding
       
   432      */
       
   433     public String getEncoding()
       
   434     {
       
   435         return _encoding;
       
   436     }
       
   437 
       
   438 
       
   439     /**
       
   440      * Sets the encoding for this output method. If no
       
   441      * encoding was specified, the default is always "UTF-8".
       
   442      * Make sure the encoding is compatible with the one
       
   443      * used by the {@link java.io.Writer}.
       
   444      *
       
   445      * @see #getEncoding
       
   446      * @param encoding The encoding, or null
       
   447      */
       
   448     public void setEncoding( String encoding )
       
   449     {
       
   450         _encoding = encoding;
       
   451         _encodingInfo = null;
       
   452     }
       
   453 
       
   454     /**
       
   455      * Sets the encoding for this output method with an <code>EncodingInfo</code>
       
   456      * instance.
       
   457      */
       
   458     public void setEncoding(EncodingInfo encInfo) {
       
   459         _encoding = encInfo.getIANAName();
       
   460         _encodingInfo = encInfo;
       
   461     }
       
   462 
       
   463     /**
       
   464      * Returns an <code>EncodingInfo<code> instance for the encoding.
       
   465      *
       
   466      * @see #setEncoding
       
   467      */
       
   468     public EncodingInfo getEncodingInfo() throws UnsupportedEncodingException {
       
   469         if (_encodingInfo == null)
       
   470             _encodingInfo = Encodings.getEncodingInfo(_encoding, _allowJavaNames);
       
   471         return _encodingInfo;
       
   472     }
       
   473 
       
   474     /**
       
   475      * Sets whether java encoding names are permitted
       
   476      */
       
   477     public void setAllowJavaNames (boolean allow) {
       
   478         _allowJavaNames = allow;
       
   479     }
       
   480 
       
   481     /**
       
   482      * Returns whether java encoding names are permitted
       
   483      */
       
   484     public boolean setAllowJavaNames () {
       
   485         return _allowJavaNames;
       
   486     }
       
   487 
       
   488     /**
       
   489      * Returns the specified media type, or null.
       
   490      * To determine the media type based on the
       
   491      * document type, use {@link #whichMediaType}.
       
   492      *
       
   493      * @return The specified media type, or null
       
   494      */
       
   495     public String getMediaType()
       
   496     {
       
   497         return _mediaType;
       
   498     }
       
   499 
       
   500 
       
   501     /**
       
   502      * Sets the media type.
       
   503      *
       
   504      * @see #getMediaType
       
   505      * @param mediaType The specified media type
       
   506      */
       
   507     public void setMediaType( String mediaType )
       
   508     {
       
   509         _mediaType = mediaType;
       
   510     }
       
   511 
       
   512 
       
   513     /**
       
   514      * Sets the document type public and system identifiers.
       
   515      * Required only if the DOM Document or SAX events do not
       
   516      * specify the document type, and one must be present in
       
   517      * the serialized document. Any document type specified
       
   518      * by the DOM Document or SAX events will override these
       
   519      * values.
       
   520      *
       
   521      * @param publicId The public identifier, or null
       
   522      * @param systemId The system identifier, or null
       
   523      */
       
   524     public void setDoctype( String publicId, String systemId )
       
   525     {
       
   526         _doctypePublic = publicId;
       
   527         _doctypeSystem = systemId;
       
   528     }
       
   529 
       
   530 
       
   531     /**
       
   532      * Returns the specified document type public identifier,
       
   533      * or null.
       
   534      */
       
   535     public String getDoctypePublic()
       
   536     {
       
   537         return _doctypePublic;
       
   538     }
       
   539 
       
   540 
       
   541     /**
       
   542      * Returns the specified document type system identifier,
       
   543      * or null.
       
   544      */
       
   545     public String getDoctypeSystem()
       
   546     {
       
   547         return _doctypeSystem;
       
   548     }
       
   549 
       
   550 
       
   551     /**
       
   552      * Returns true if comments should be ommited.
       
   553      * The default is false.
       
   554      */
       
   555     public boolean getOmitComments()
       
   556     {
       
   557         return _omitComments;
       
   558     }
       
   559 
       
   560 
       
   561     /**
       
   562      * Sets comment omitting on and off.
       
   563      *
       
   564      * @param omit True if comments should be ommited
       
   565      */
       
   566     public void setOmitComments( boolean omit )
       
   567     {
       
   568         _omitComments = omit;
       
   569     }
       
   570 
       
   571 
       
   572     /**
       
   573      * Returns true if the DOCTYPE declaration should
       
   574      * be ommited. The default is false.
       
   575      */
       
   576     public boolean getOmitDocumentType()
       
   577     {
       
   578         return _omitDoctype;
       
   579     }
       
   580 
       
   581 
       
   582     /**
       
   583      * Sets DOCTYPE declaration omitting on and off.
       
   584      *
       
   585      * @param omit True if DOCTYPE declaration should be ommited
       
   586      */
       
   587     public void setOmitDocumentType( boolean omit )
       
   588     {
       
   589         _omitDoctype = omit;
       
   590     }
       
   591 
       
   592 
       
   593     /**
       
   594      * Returns true if the XML document declaration should
       
   595      * be ommited. The default is false.
       
   596      */
       
   597     public boolean getOmitXMLDeclaration()
       
   598     {
       
   599         return _omitXmlDeclaration;
       
   600     }
       
   601 
       
   602 
       
   603     /**
       
   604      * Sets XML declaration omitting on and off.
       
   605      *
       
   606      * @param omit True if XML declaration should be ommited
       
   607      */
       
   608     public void setOmitXMLDeclaration( boolean omit )
       
   609     {
       
   610         _omitXmlDeclaration = omit;
       
   611     }
       
   612 
       
   613 
       
   614     /**
       
   615      * Returns true if the document type is standalone.
       
   616      * The default is false.
       
   617      */
       
   618     public boolean getStandalone()
       
   619     {
       
   620         return _standalone;
       
   621     }
       
   622 
       
   623 
       
   624     /**
       
   625      * Sets document DTD standalone. The public and system
       
   626      * identifiers must be null for the document to be
       
   627      * serialized as standalone.
       
   628      *
       
   629      * @param standalone True if document DTD is standalone
       
   630      */
       
   631     public void setStandalone( boolean standalone )
       
   632     {
       
   633         _standalone = standalone;
       
   634     }
       
   635 
       
   636 
       
   637     /**
       
   638      * Returns a list of all the elements whose text node children
       
   639      * should be output as CDATA, or null if no such elements were
       
   640      * specified.
       
   641      */
       
   642     public String[] getCDataElements()
       
   643     {
       
   644         return _cdataElements;
       
   645     }
       
   646 
       
   647 
       
   648     /**
       
   649      * Returns true if the text node children of the given elements
       
   650      * should be output as CDATA.
       
   651      *
       
   652      * @param tagName The element's tag name
       
   653      * @return True if should serialize as CDATA
       
   654      */
       
   655     public boolean isCDataElement( String tagName )
       
   656     {
       
   657         int i;
       
   658 
       
   659         if ( _cdataElements == null )
       
   660             return false;
       
   661         for ( i = 0 ; i < _cdataElements.length ; ++i )
       
   662             if ( _cdataElements[ i ].equals( tagName ) )
       
   663                 return true;
       
   664         return false;
       
   665     }
       
   666 
       
   667 
       
   668     /**
       
   669      * Sets the list of elements for which text node children
       
   670      * should be output as CDATA.
       
   671      *
       
   672      * @param cdataElements List of CDATA element tag names
       
   673      */
       
   674     public void setCDataElements( String[] cdataElements )
       
   675     {
       
   676         _cdataElements = cdataElements;
       
   677     }
       
   678 
       
   679 
       
   680     /**
       
   681      * Returns a list of all the elements whose text node children
       
   682      * should be output unescaped (no character references), or null
       
   683      * if no such elements were specified.
       
   684      */
       
   685     public String[] getNonEscapingElements()
       
   686     {
       
   687         return _nonEscapingElements;
       
   688     }
       
   689 
       
   690 
       
   691     /**
       
   692      * Returns true if the text node children of the given elements
       
   693      * should be output unescaped.
       
   694      *
       
   695      * @param tagName The element's tag name
       
   696      * @return True if should serialize unescaped
       
   697      */
       
   698     public boolean isNonEscapingElement( String tagName )
       
   699     {
       
   700         int i;
       
   701 
       
   702         if ( _nonEscapingElements == null ) {
       
   703             return false;
       
   704         }
       
   705         for ( i = 0 ; i < _nonEscapingElements.length ; ++i )
       
   706             if ( _nonEscapingElements[ i ].equals( tagName ) )
       
   707                 return true;
       
   708         return false;
       
   709     }
       
   710 
       
   711 
       
   712     /**
       
   713      * Sets the list of elements for which text node children
       
   714      * should be output unescaped (no character references).
       
   715      *
       
   716      * @param nonEscapingElements List of unescaped element tag names
       
   717      */
       
   718     public void setNonEscapingElements( String[] nonEscapingElements )
       
   719     {
       
   720         _nonEscapingElements = nonEscapingElements;
       
   721     }
       
   722 
       
   723 
       
   724 
       
   725     /**
       
   726      * Returns a specific line separator to use. The default is the
       
   727      * Web line separator (<tt>\n</tt>). A string is returned to
       
   728      * support double codes (CR + LF).
       
   729      *
       
   730      * @return The specified line separator
       
   731      */
       
   732     public String getLineSeparator()
       
   733     {
       
   734         return _lineSeparator;
       
   735     }
       
   736 
       
   737 
       
   738     /**
       
   739      * Sets the line separator. The default is the Web line separator
       
   740      * (<tt>\n</tt>). The machine's line separator can be obtained
       
   741      * from the system property <tt>line.separator</tt>, but is only
       
   742      * useful if the document is edited on machines of the same type.
       
   743      * For general documents, use the Web line separator.
       
   744      *
       
   745      * @param lineSeparator The specified line separator
       
   746      */
       
   747     public void setLineSeparator( String lineSeparator )
       
   748     {
       
   749         if ( lineSeparator == null )
       
   750             _lineSeparator =  LineSeparator.Web;
       
   751         else
       
   752             _lineSeparator = lineSeparator;
       
   753     }
       
   754 
       
   755 
       
   756     /**
       
   757      * Returns true if the default behavior for this format is to
       
   758      * preserve spaces. All elements that do not specify otherwise
       
   759      * or specify the default behavior will be formatted based on
       
   760      * this rule. All elements that specify space preserving will
       
   761      * always preserve space.
       
   762      */
       
   763     public boolean getPreserveSpace()
       
   764     {
       
   765         return _preserve;
       
   766     }
       
   767 
       
   768 
       
   769     /**
       
   770      * Sets space preserving as the default behavior. The default is
       
   771      * space stripping and all elements that do not specify otherwise
       
   772      * or use the default value will not preserve spaces.
       
   773      *
       
   774      * @param preserve True if spaces should be preserved
       
   775      */
       
   776     public void setPreserveSpace( boolean preserve )
       
   777     {
       
   778         _preserve = preserve;
       
   779     }
       
   780 
       
   781 
       
   782     /**
       
   783      * Return the selected line width for breaking up long lines.
       
   784      * When indenting, and only when indenting, long lines will be
       
   785      * broken at space boundaries based on this line width.
       
   786      * No line wrapping occurs if this value is zero.
       
   787      */
       
   788     public int getLineWidth()
       
   789     {
       
   790         return _lineWidth;
       
   791     }
       
   792 
       
   793 
       
   794     /**
       
   795      * Sets the line width. If zero then no line wrapping will
       
   796      * occur. Calling {@link #setIndenting} will reset this
       
   797      * value to zero (off) or the default (on).
       
   798      *
       
   799      * @param lineWidth The line width to use, zero for default
       
   800      * @see #getLineWidth
       
   801      * @see #setIndenting
       
   802      */
       
   803     public void setLineWidth( int lineWidth )
       
   804     {
       
   805         if ( lineWidth <= 0 )
       
   806             _lineWidth = 0;
       
   807         else
       
   808             _lineWidth = lineWidth;
       
   809     }
       
   810         /**
       
   811          * Returns the preserveEmptyAttribute flag. If flag is false, then'
       
   812          * attributes with empty string values are output as the attribute
       
   813          * name only (in HTML mode).
       
   814          * @return preserve the preserve flag
       
   815          */     public boolean getPreserveEmptyAttributes () {          return _preserveEmptyAttributes;        }       /**
       
   816          * Sets the preserveEmptyAttribute flag. If flag is false, then'
       
   817          * attributes with empty string values are output as the attribute
       
   818          * name only (in HTML mode).
       
   819          * @param preserve the preserve flag
       
   820          */     public void setPreserveEmptyAttributes (boolean preserve) {             _preserveEmptyAttributes = preserve;    }
       
   821 
       
   822     /**
       
   823      * Returns the last printable character based on the selected
       
   824      * encoding. Control characters and non-printable characters
       
   825      * are always printed as character references.
       
   826      */
       
   827     public char getLastPrintable()
       
   828     {
       
   829         if ( getEncoding() != null &&
       
   830              ( getEncoding().equalsIgnoreCase( "ASCII" ) ) )
       
   831             return 0xFF;
       
   832         else
       
   833             return 0xFFFF;
       
   834     }
       
   835 
       
   836 
       
   837     /**
       
   838      * Determine the output method for the specified document.
       
   839      * If the document is an instance of {@link org.w3c.dom.html.HTMLDocument}
       
   840      * then the method is said to be <tt>html</tt>. If the root
       
   841      * element is 'html' and all text nodes preceding the root
       
   842      * element are all whitespace, then the method is said to be
       
   843      * <tt>html</tt>. Otherwise the method is <tt>xml</tt>.
       
   844      *
       
   845      * @param doc The document to check
       
   846      * @return The suitable method
       
   847      */
       
   848     public static String whichMethod( Document doc )
       
   849     {
       
   850         Node    node;
       
   851         String  value;
       
   852         int     i;
       
   853 
       
   854         // If document is derived from HTMLDocument then the default
       
   855         // method is html.
       
   856         if ( doc instanceof HTMLDocument )
       
   857             return Method.HTML;
       
   858 
       
   859         // Lookup the root element and the text nodes preceding it.
       
   860         // If root element is html and all text nodes contain whitespace
       
   861         // only, the method is html.
       
   862 
       
   863         // FIXME (SM) should we care about namespaces here?
       
   864 
       
   865         node = doc.getFirstChild();
       
   866         while (node != null) {
       
   867             // If the root element is html, the method is html.
       
   868             if ( node.getNodeType() == Node.ELEMENT_NODE ) {
       
   869                 if ( node.getNodeName().equalsIgnoreCase( "html" ) ) {
       
   870                     return Method.HTML;
       
   871                 } else if ( node.getNodeName().equalsIgnoreCase( "root" ) ) {
       
   872                     return Method.FOP;
       
   873                 } else {
       
   874                     return Method.XML;
       
   875                 }
       
   876             } else if ( node.getNodeType() == Node.TEXT_NODE ) {
       
   877                 // If a text node preceding the root element contains
       
   878                 // only whitespace, this might be html, otherwise it's
       
   879                 // definitely xml.
       
   880                 value = node.getNodeValue();
       
   881                 for ( i = 0 ; i < value.length() ; ++i )
       
   882                     if ( value.charAt( i ) != 0x20 && value.charAt( i ) != 0x0A &&
       
   883                          value.charAt( i ) != 0x09 && value.charAt( i ) != 0x0D )
       
   884                         return Method.XML;
       
   885             }
       
   886             node = node.getNextSibling();
       
   887         }
       
   888         // Anything else, the method is xml.
       
   889         return Method.XML;
       
   890     }
       
   891 
       
   892 
       
   893     /**
       
   894      * Returns the document type public identifier
       
   895      * specified for this document, or null.
       
   896      */
       
   897     public static String whichDoctypePublic( Document doc )
       
   898     {
       
   899         DocumentType doctype;
       
   900 
       
   901            /*  DOM Level 2 was introduced into the code base*/
       
   902            doctype = doc.getDoctype();
       
   903            if ( doctype != null ) {
       
   904            // Note on catch: DOM Level 1 does not specify this method
       
   905            // and the code will throw a NoSuchMethodError
       
   906            try {
       
   907            return doctype.getPublicId();
       
   908            } catch ( Error except ) {  }
       
   909            }
       
   910 
       
   911         if ( doc instanceof HTMLDocument )
       
   912             return DTD.XHTMLPublicId;
       
   913         return null;
       
   914     }
       
   915 
       
   916 
       
   917     /**
       
   918      * Returns the document type system identifier
       
   919      * specified for this document, or null.
       
   920      */
       
   921     public static String whichDoctypeSystem( Document doc )
       
   922     {
       
   923         DocumentType doctype;
       
   924 
       
   925         /* DOM Level 2 was introduced into the code base*/
       
   926            doctype = doc.getDoctype();
       
   927            if ( doctype != null ) {
       
   928            // Note on catch: DOM Level 1 does not specify this method
       
   929            // and the code will throw a NoSuchMethodError
       
   930            try {
       
   931            return doctype.getSystemId();
       
   932            } catch ( Error except ) { }
       
   933            }
       
   934 
       
   935         if ( doc instanceof HTMLDocument )
       
   936             return DTD.XHTMLSystemId;
       
   937         return null;
       
   938     }
       
   939 
       
   940 
       
   941     /**
       
   942      * Returns the suitable media format for a document
       
   943      * output with the specified method.
       
   944      */
       
   945     public static String whichMediaType( String method )
       
   946     {
       
   947         if ( method.equalsIgnoreCase( Method.XML ) )
       
   948             return "text/xml";
       
   949         if ( method.equalsIgnoreCase( Method.HTML ) )
       
   950             return "text/html";
       
   951         if ( method.equalsIgnoreCase( Method.XHTML ) )
       
   952             return "text/html";
       
   953         if ( method.equalsIgnoreCase( Method.TEXT ) )
       
   954             return "text/plain";
       
   955         if ( method.equalsIgnoreCase( Method.FOP ) )
       
   956             return "application/pdf";
       
   957         return null;
       
   958     }
       
   959 
       
   960 
       
   961 }