--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jaxp/src/com/sun/org/apache/xml/internal/serialize/HTMLdtd.java Thu Apr 12 08:38:26 2012 -0700
@@ -0,0 +1,557 @@
+/*
+ * reserved comment block
+ * DO NOT REMOVE OR ALTER!
+ */
+/*
+ * Copyright 1999-2002,2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+// Aug 21, 2000:
+// Fixed bug in isElement and made HTMLdtd public.
+// Contributed by Eric SCHAEFFER" <eschaeffer@posterconseil.com>
+
+
+package com.sun.org.apache.xml.internal.serialize;
+
+import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter;
+
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.BufferedReader;
+import java.util.Hashtable;
+import java.util.Locale;
+
+
+/**
+ * Utility class for accessing information specific to HTML documents.
+ * The HTML DTD is expressed as three utility function groups. Two methods
+ * allow for checking whether an element requires an open tag on printing
+ * ({@link #isEmptyTag}) or on parsing ({@link #isOptionalClosing}).
+ * <P>
+ * Two other methods translate character references from name to value and
+ * from value to name. A small entities resource is loaded into memory the
+ * first time any of these methods is called for fast and efficient access.
+ *
+ *
+ * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
+ */
+public final class HTMLdtd
+{
+
+ /**
+ * Public identifier for HTML 4.01 (Strict) document type.
+ */
+ public static final String HTMLPublicId = "-//W3C//DTD HTML 4.01//EN";
+
+ /**
+ * System identifier for HTML 4.01 (Strict) document type.
+ */
+ public static final String HTMLSystemId =
+ "http://www.w3.org/TR/html4/strict.dtd";
+
+ /**
+ * Public identifier for XHTML 1.0 (Strict) document type.
+ */
+ public static final String XHTMLPublicId =
+ "-//W3C//DTD XHTML 1.0 Strict//EN";
+
+ /**
+ * System identifier for XHTML 1.0 (Strict) document type.
+ */
+ public static final String XHTMLSystemId =
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
+
+ /**
+ * Table of reverse character reference mapping. Character codes are held
+ * as single-character strings, mapped to their reference name.
+ */
+ private static Hashtable _byChar;
+
+
+ /**
+ * Table of entity name to value mapping. Entities are held as strings,
+ * character references as <TT>Character</TT> objects.
+ */
+ private static Hashtable _byName;
+
+
+ private static Hashtable _boolAttrs;
+
+
+ /**
+ * Holds element definitions.
+ */
+ private static Hashtable _elemDefs;
+
+
+ /**
+ * Locates the HTML entities file that is loaded upon initialization.
+ * This file is a resource loaded with the default class loader.
+ */
+ private static final String ENTITIES_RESOURCE = "HTMLEntities.res";
+
+
+ /**
+ * Only opening tag should be printed.
+ */
+ private static final int ONLY_OPENING = 0x0001;
+
+ /**
+ * Element contains element content only.
+ */
+ private static final int ELEM_CONTENT = 0x0002;
+
+
+ /**
+ * Element preserve spaces.
+ */
+ private static final int PRESERVE = 0x0004;
+
+
+ /**
+ * Optional closing tag.
+ */
+ private static final int OPT_CLOSING = 0x0008;
+
+
+ /**
+ * Element is empty (also means only opening tag)
+ */
+ private static final int EMPTY = 0x0010 | ONLY_OPENING;
+
+
+ /**
+ * Allowed to appear in head.
+ */
+ private static final int ALLOWED_HEAD = 0x0020;
+
+
+ /**
+ * When opened, closes P.
+ */
+ private static final int CLOSE_P = 0x0040;
+
+
+ /**
+ * When opened, closes DD or DT.
+ */
+ private static final int CLOSE_DD_DT = 0x0080;
+
+
+ /**
+ * When opened, closes itself.
+ */
+ private static final int CLOSE_SELF = 0x0100;
+
+
+ /**
+ * When opened, closes another table section.
+ */
+ private static final int CLOSE_TABLE = 0x0200;
+
+
+ /**
+ * When opened, closes TH or TD.
+ */
+ private static final int CLOSE_TH_TD = 0x04000;
+
+
+ /**
+ * Returns true if element is declared to be empty. HTML elements are
+ * defines as empty in the DTD, not by the document syntax.
+ *
+ * @param tagName The element tag name (upper case)
+ * @return True if element is empty
+ */
+ public static boolean isEmptyTag( String tagName )
+ {
+ return isElement( tagName, EMPTY );
+ }
+
+
+ /**
+ * Returns true if element is declared to have element content.
+ * Whitespaces appearing inside element content will be ignored,
+ * other text will simply report an error.
+ *
+ * @param tagName The element tag name (upper case)
+ * @return True if element content
+ */
+ public static boolean isElementContent( String tagName )
+ {
+ return isElement( tagName, ELEM_CONTENT );
+ }
+
+
+ /**
+ * Returns true if element's textual contents preserves spaces.
+ * This only applies to PRE and TEXTAREA, all other HTML elements
+ * do not preserve space.
+ *
+ * @param tagName The element tag name (upper case)
+ * @return True if element's text content preserves spaces
+ */
+ public static boolean isPreserveSpace( String tagName )
+ {
+ return isElement( tagName, PRESERVE );
+ }
+
+
+ /**
+ * Returns true if element's closing tag is optional and need not
+ * exist. An error will not be reported for such elements if they
+ * are not closed. For example, <tt>LI</tt> is most often not closed.
+ *
+ * @param tagName The element tag name (upper case)
+ * @return True if closing tag implied
+ */
+ public static boolean isOptionalClosing( String tagName )
+ {
+ return isElement( tagName, OPT_CLOSING );
+ }
+
+
+ /**
+ * Returns true if element's closing tag is generally not printed.
+ * For example, <tt>LI</tt> should not print the closing tag.
+ *
+ * @param tagName The element tag name (upper case)
+ * @return True if only opening tag should be printed
+ */
+ public static boolean isOnlyOpening( String tagName )
+ {
+ return isElement( tagName, ONLY_OPENING );
+ }
+
+
+ /**
+ * Returns true if the opening of one element (<tt>tagName</tt>) implies
+ * the closing of another open element (<tt>openTag</tt>). For example,
+ * every opening <tt>LI</tt> will close the previously open <tt>LI</tt>,
+ * and every opening <tt>BODY</tt> will close the previously open <tt>HEAD</tt>.
+ *
+ * @param tagName The newly opened element
+ * @param openTag The already opened element
+ * @return True if closing tag closes opening tag
+ */
+ public static boolean isClosing( String tagName, String openTag )
+ {
+ // Several elements are defined as closing the HEAD
+ if ( openTag.equalsIgnoreCase( "HEAD" ) )
+ return ! isElement( tagName, ALLOWED_HEAD );
+ // P closes iteself
+ if ( openTag.equalsIgnoreCase( "P" ) )
+ return isElement( tagName, CLOSE_P );
+ // DT closes DD, DD closes DT
+ if ( openTag.equalsIgnoreCase( "DT" ) || openTag.equalsIgnoreCase( "DD" ) )
+ return isElement( tagName, CLOSE_DD_DT );
+ // LI and OPTION close themselves
+ if ( openTag.equalsIgnoreCase( "LI" ) || openTag.equalsIgnoreCase( "OPTION" ) )
+ return isElement( tagName, CLOSE_SELF );
+ // Each of these table sections closes all the others
+ if ( openTag.equalsIgnoreCase( "THEAD" ) || openTag.equalsIgnoreCase( "TFOOT" ) ||
+ openTag.equalsIgnoreCase( "TBODY" ) || openTag.equalsIgnoreCase( "TR" ) ||
+ openTag.equalsIgnoreCase( "COLGROUP" ) )
+ return isElement( tagName, CLOSE_TABLE );
+ // TD closes TH and TH closes TD
+ if ( openTag.equalsIgnoreCase( "TH" ) || openTag.equalsIgnoreCase( "TD" ) )
+ return isElement( tagName, CLOSE_TH_TD );
+ return false;
+ }
+
+
+ /**
+ * Returns true if the specified attribute it a URI and should be
+ * escaped appropriately. In HTML URIs are escaped differently
+ * than normal attributes.
+ *
+ * @param tagName The element's tag name
+ * @param attrName The attribute's name
+ */
+ public static boolean isURI( String tagName, String attrName )
+ {
+ // Stupid checks.
+ return ( attrName.equalsIgnoreCase( "href" ) || attrName.equalsIgnoreCase( "src" ) );
+ }
+
+
+ /**
+ * Returns true if the specified attribute is a boolean and should be
+ * printed without the value. This applies to attributes that are true
+ * if they exist, such as selected (OPTION/INPUT).
+ *
+ * @param tagName The element's tag name
+ * @param attrName The attribute's name
+ */
+ public static boolean isBoolean( String tagName, String attrName )
+ {
+ String[] attrNames;
+
+ attrNames = (String[]) _boolAttrs.get( tagName.toUpperCase(Locale.ENGLISH) );
+ if ( attrNames == null )
+ return false;
+ for ( int i = 0 ; i < attrNames.length ; ++i )
+ if ( attrNames[ i ].equalsIgnoreCase( attrName ) )
+ return true;
+ return false;
+ }
+
+
+ /**
+ * Returns the value of an HTML character reference by its name. If the
+ * reference is not found or was not defined as a character reference,
+ * returns EOF (-1).
+ *
+ * @param name Name of character reference
+ * @return Character code or EOF (-1)
+ */
+ public static int charFromName( String name )
+ {
+ Object value;
+
+ initialize();
+ value = _byName.get( name );
+ if ( value != null && value instanceof Integer )
+ return ( (Integer) value ).intValue();
+ else
+ return -1;
+ }
+
+
+ /**
+ * Returns the name of an HTML character reference based on its character
+ * value. Only valid for entities defined from character references. If no
+ * such character value was defined, return null.
+ *
+ * @param value Character value of entity
+ * @return Entity's name or null
+ */
+ public static String fromChar(int value )
+ {
+ if (value > 0xffff)
+ return null;
+
+ String name;
+
+ initialize();
+ name = (String) _byChar.get( new Integer( value ) );
+ return name;
+ }
+
+
+ /**
+ * Initialize upon first access. Will load all the HTML character references
+ * into a list that is accessible by name or character value and is optimized
+ * for character substitution. This method may be called any number of times
+ * but will execute only once.
+ */
+ private static void initialize()
+ {
+ InputStream is = null;
+ BufferedReader reader = null;
+ int index;
+ String name;
+ String value;
+ int code;
+ String line;
+
+ // Make sure not to initialize twice.
+ if ( _byName != null )
+ return;
+ try {
+ _byName = new Hashtable();
+ _byChar = new Hashtable();
+ is = HTMLdtd.class.getResourceAsStream( ENTITIES_RESOURCE );
+ if ( is == null ) {
+ throw new RuntimeException(
+ DOMMessageFormatter.formatMessage(
+ DOMMessageFormatter.SERIALIZER_DOMAIN,
+ "ResourceNotFound", new Object[] {ENTITIES_RESOURCE}));
+ }
+ reader = new BufferedReader( new InputStreamReader( is, "ASCII" ) );
+ line = reader.readLine();
+ while ( line != null ) {
+ if ( line.length() == 0 || line.charAt( 0 ) == '#' ) {
+ line = reader.readLine();
+ continue;
+ }
+ index = line.indexOf( ' ' );
+ if ( index > 1 ) {
+ name = line.substring( 0, index );
+ ++index;
+ if ( index < line.length() ) {
+ value = line.substring( index );
+ index = value.indexOf( ' ' );
+ if ( index > 0 )
+ value = value.substring( 0, index );
+ code = Integer.parseInt( value );
+ defineEntity( name, (char) code );
+ }
+ }
+ line = reader.readLine();
+ }
+ is.close();
+ } catch ( Exception except ) {
+ throw new RuntimeException(
+ DOMMessageFormatter.formatMessage(
+ DOMMessageFormatter.SERIALIZER_DOMAIN,
+ "ResourceNotLoaded", new Object[] {ENTITIES_RESOURCE, except.toString()}));
+ } finally {
+ if ( is != null ) {
+ try {
+ is.close();
+ } catch ( Exception except ) { }
+ }
+ }
+ }
+
+
+ /**
+ * Defines a new character reference. The reference's name and value are
+ * supplied. Nothing happens if the character reference is already defined.
+ * <P>
+ * Unlike internal entities, character references are a string to single
+ * character mapping. They are used to map non-ASCII characters both on
+ * parsing and printing, primarily for HTML documents. '<amp;' is an
+ * example of a character reference.
+ *
+ * @param name The entity's name
+ * @param value The entity's value
+ */
+ private static void defineEntity( String name, char value )
+ {
+ if ( _byName.get( name ) == null ) {
+ _byName.put( name, new Integer( value ) );
+ _byChar.put( new Integer( value ), name );
+ }
+ }
+
+
+ private static void defineElement( String name, int flags )
+ {
+ _elemDefs.put( name, new Integer( flags ) );
+ }
+
+
+ private static void defineBoolean( String tagName, String attrName )
+ {
+ defineBoolean( tagName, new String[] { attrName } );
+ }
+
+
+ private static void defineBoolean( String tagName, String[] attrNames )
+ {
+ _boolAttrs.put( tagName, attrNames );
+ }
+
+
+ private static boolean isElement( String name, int flag )
+ {
+ Integer flags;
+
+ flags = (Integer) _elemDefs.get( name.toUpperCase(Locale.ENGLISH) );
+ if ( flags == null )
+ return false;
+ else
+ return ( ( flags.intValue() & flag ) == flag );
+ }
+
+
+ static
+ {
+ _elemDefs = new Hashtable();
+ defineElement( "ADDRESS", CLOSE_P );
+ defineElement( "AREA", EMPTY );
+ defineElement( "BASE", EMPTY | ALLOWED_HEAD );
+ defineElement( "BASEFONT", EMPTY );
+ defineElement( "BLOCKQUOTE", CLOSE_P );
+ defineElement( "BODY", OPT_CLOSING );
+ defineElement( "BR", EMPTY );
+ defineElement( "COL", EMPTY );
+ defineElement( "COLGROUP", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
+ defineElement( "DD", OPT_CLOSING | ONLY_OPENING | CLOSE_DD_DT );
+ defineElement( "DIV", CLOSE_P );
+ defineElement( "DL", ELEM_CONTENT | CLOSE_P );
+ defineElement( "DT", OPT_CLOSING | ONLY_OPENING | CLOSE_DD_DT );
+ defineElement( "FIELDSET", CLOSE_P );
+ defineElement( "FORM", CLOSE_P );
+ defineElement( "FRAME", EMPTY | OPT_CLOSING );
+ defineElement( "H1", CLOSE_P );
+ defineElement( "H2", CLOSE_P );
+ defineElement( "H3", CLOSE_P );
+ defineElement( "H4", CLOSE_P );
+ defineElement( "H5", CLOSE_P );
+ defineElement( "H6", CLOSE_P );
+ defineElement( "HEAD", ELEM_CONTENT | OPT_CLOSING );
+ defineElement( "HR", EMPTY | CLOSE_P );
+ defineElement( "HTML", ELEM_CONTENT | OPT_CLOSING );
+ defineElement( "IMG", EMPTY );
+ defineElement( "INPUT", EMPTY );
+ defineElement( "ISINDEX", EMPTY | ALLOWED_HEAD );
+ defineElement( "LI", OPT_CLOSING | ONLY_OPENING | CLOSE_SELF );
+ defineElement( "LINK", EMPTY | ALLOWED_HEAD );
+ defineElement( "MAP", ALLOWED_HEAD );
+ defineElement( "META", EMPTY | ALLOWED_HEAD );
+ defineElement( "OL", ELEM_CONTENT | CLOSE_P );
+ defineElement( "OPTGROUP", ELEM_CONTENT );
+ defineElement( "OPTION", OPT_CLOSING | ONLY_OPENING | CLOSE_SELF );
+ defineElement( "P", OPT_CLOSING | CLOSE_P | CLOSE_SELF );
+ defineElement( "PARAM", EMPTY );
+ defineElement( "PRE", PRESERVE | CLOSE_P );
+ defineElement( "SCRIPT", ALLOWED_HEAD | PRESERVE );
+ defineElement( "NOSCRIPT", ALLOWED_HEAD | PRESERVE );
+ defineElement( "SELECT", ELEM_CONTENT );
+ defineElement( "STYLE", ALLOWED_HEAD | PRESERVE );
+ defineElement( "TABLE", ELEM_CONTENT | CLOSE_P );
+ defineElement( "TBODY", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
+ defineElement( "TD", OPT_CLOSING | CLOSE_TH_TD );
+ defineElement( "TEXTAREA", PRESERVE );
+ defineElement( "TFOOT", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
+ defineElement( "TH", OPT_CLOSING | CLOSE_TH_TD );
+ defineElement( "THEAD", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
+ defineElement( "TITLE", ALLOWED_HEAD );
+ defineElement( "TR", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE );
+ defineElement( "UL", ELEM_CONTENT | CLOSE_P );
+
+ _boolAttrs = new Hashtable();
+ defineBoolean( "AREA", "href" );
+ defineBoolean( "BUTTON", "disabled" );
+ defineBoolean( "DIR", "compact" );
+ defineBoolean( "DL", "compact" );
+ defineBoolean( "FRAME", "noresize" );
+ defineBoolean( "HR", "noshade" );
+ defineBoolean( "IMAGE", "ismap" );
+ defineBoolean( "INPUT", new String[] { "defaultchecked", "checked", "readonly", "disabled" } );
+ defineBoolean( "LINK", "link" );
+ defineBoolean( "MENU", "compact" );
+ defineBoolean( "OBJECT", "declare" );
+ defineBoolean( "OL", "compact" );
+ defineBoolean( "OPTGROUP", "disabled" );
+ defineBoolean( "OPTION", new String[] { "default-selected", "selected", "disabled" } );
+ defineBoolean( "SCRIPT", "defer" );
+ defineBoolean( "SELECT", new String[] { "multiple", "disabled" } );
+ defineBoolean( "STYLE", "disabled" );
+ defineBoolean( "TD", "nowrap" );
+ defineBoolean( "TH", "nowrap" );
+ defineBoolean( "TEXTAREA", new String[] { "disabled", "readonly" } );
+ defineBoolean( "UL", "compact" );
+
+ initialize();
+ }
+
+
+
+}