diff -r 1d7e6da6adc8 -r c348e06f0e82 jaxp/src/com/sun/org/apache/xml/internal/serialize/HTMLdtd.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jaxp/src/com/sun/org/apache/xml/internal/serialize/HTMLdtd.java Thu Apr 12 08:38:26 2012 -0700 @@ -0,0 +1,557 @@ +/* + * reserved comment block + * DO NOT REMOVE OR ALTER! + */ +/* + * Copyright 1999-2002,2004 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +// Aug 21, 2000: +// Fixed bug in isElement and made HTMLdtd public. +// Contributed by Eric SCHAEFFER" + + +package com.sun.org.apache.xml.internal.serialize; + +import com.sun.org.apache.xerces.internal.dom.DOMMessageFormatter; + +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.BufferedReader; +import java.util.Hashtable; +import java.util.Locale; + + +/** + * Utility class for accessing information specific to HTML documents. + * The HTML DTD is expressed as three utility function groups. Two methods + * allow for checking whether an element requires an open tag on printing + * ({@link #isEmptyTag}) or on parsing ({@link #isOptionalClosing}). + *

+ * Two other methods translate character references from name to value and + * from value to name. A small entities resource is loaded into memory the + * first time any of these methods is called for fast and efficient access. + * + * + * @author Assaf Arkin + */ +public final class HTMLdtd +{ + + /** + * Public identifier for HTML 4.01 (Strict) document type. + */ + public static final String HTMLPublicId = "-//W3C//DTD HTML 4.01//EN"; + + /** + * System identifier for HTML 4.01 (Strict) document type. + */ + public static final String HTMLSystemId = + "http://www.w3.org/TR/html4/strict.dtd"; + + /** + * Public identifier for XHTML 1.0 (Strict) document type. + */ + public static final String XHTMLPublicId = + "-//W3C//DTD XHTML 1.0 Strict//EN"; + + /** + * System identifier for XHTML 1.0 (Strict) document type. + */ + public static final String XHTMLSystemId = + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; + + /** + * Table of reverse character reference mapping. Character codes are held + * as single-character strings, mapped to their reference name. + */ + private static Hashtable _byChar; + + + /** + * Table of entity name to value mapping. Entities are held as strings, + * character references as Character objects. + */ + private static Hashtable _byName; + + + private static Hashtable _boolAttrs; + + + /** + * Holds element definitions. + */ + private static Hashtable _elemDefs; + + + /** + * Locates the HTML entities file that is loaded upon initialization. + * This file is a resource loaded with the default class loader. + */ + private static final String ENTITIES_RESOURCE = "HTMLEntities.res"; + + + /** + * Only opening tag should be printed. + */ + private static final int ONLY_OPENING = 0x0001; + + /** + * Element contains element content only. + */ + private static final int ELEM_CONTENT = 0x0002; + + + /** + * Element preserve spaces. + */ + private static final int PRESERVE = 0x0004; + + + /** + * Optional closing tag. + */ + private static final int OPT_CLOSING = 0x0008; + + + /** + * Element is empty (also means only opening tag) + */ + private static final int EMPTY = 0x0010 | ONLY_OPENING; + + + /** + * Allowed to appear in head. + */ + private static final int ALLOWED_HEAD = 0x0020; + + + /** + * When opened, closes P. + */ + private static final int CLOSE_P = 0x0040; + + + /** + * When opened, closes DD or DT. + */ + private static final int CLOSE_DD_DT = 0x0080; + + + /** + * When opened, closes itself. + */ + private static final int CLOSE_SELF = 0x0100; + + + /** + * When opened, closes another table section. + */ + private static final int CLOSE_TABLE = 0x0200; + + + /** + * When opened, closes TH or TD. + */ + private static final int CLOSE_TH_TD = 0x04000; + + + /** + * Returns true if element is declared to be empty. HTML elements are + * defines as empty in the DTD, not by the document syntax. + * + * @param tagName The element tag name (upper case) + * @return True if element is empty + */ + public static boolean isEmptyTag( String tagName ) + { + return isElement( tagName, EMPTY ); + } + + + /** + * Returns true if element is declared to have element content. + * Whitespaces appearing inside element content will be ignored, + * other text will simply report an error. + * + * @param tagName The element tag name (upper case) + * @return True if element content + */ + public static boolean isElementContent( String tagName ) + { + return isElement( tagName, ELEM_CONTENT ); + } + + + /** + * Returns true if element's textual contents preserves spaces. + * This only applies to PRE and TEXTAREA, all other HTML elements + * do not preserve space. + * + * @param tagName The element tag name (upper case) + * @return True if element's text content preserves spaces + */ + public static boolean isPreserveSpace( String tagName ) + { + return isElement( tagName, PRESERVE ); + } + + + /** + * Returns true if element's closing tag is optional and need not + * exist. An error will not be reported for such elements if they + * are not closed. For example, LI is most often not closed. + * + * @param tagName The element tag name (upper case) + * @return True if closing tag implied + */ + public static boolean isOptionalClosing( String tagName ) + { + return isElement( tagName, OPT_CLOSING ); + } + + + /** + * Returns true if element's closing tag is generally not printed. + * For example, LI should not print the closing tag. + * + * @param tagName The element tag name (upper case) + * @return True if only opening tag should be printed + */ + public static boolean isOnlyOpening( String tagName ) + { + return isElement( tagName, ONLY_OPENING ); + } + + + /** + * Returns true if the opening of one element (tagName) implies + * the closing of another open element (openTag). For example, + * every opening LI will close the previously open LI, + * and every opening BODY will close the previously open HEAD. + * + * @param tagName The newly opened element + * @param openTag The already opened element + * @return True if closing tag closes opening tag + */ + public static boolean isClosing( String tagName, String openTag ) + { + // Several elements are defined as closing the HEAD + if ( openTag.equalsIgnoreCase( "HEAD" ) ) + return ! isElement( tagName, ALLOWED_HEAD ); + // P closes iteself + if ( openTag.equalsIgnoreCase( "P" ) ) + return isElement( tagName, CLOSE_P ); + // DT closes DD, DD closes DT + if ( openTag.equalsIgnoreCase( "DT" ) || openTag.equalsIgnoreCase( "DD" ) ) + return isElement( tagName, CLOSE_DD_DT ); + // LI and OPTION close themselves + if ( openTag.equalsIgnoreCase( "LI" ) || openTag.equalsIgnoreCase( "OPTION" ) ) + return isElement( tagName, CLOSE_SELF ); + // Each of these table sections closes all the others + if ( openTag.equalsIgnoreCase( "THEAD" ) || openTag.equalsIgnoreCase( "TFOOT" ) || + openTag.equalsIgnoreCase( "TBODY" ) || openTag.equalsIgnoreCase( "TR" ) || + openTag.equalsIgnoreCase( "COLGROUP" ) ) + return isElement( tagName, CLOSE_TABLE ); + // TD closes TH and TH closes TD + if ( openTag.equalsIgnoreCase( "TH" ) || openTag.equalsIgnoreCase( "TD" ) ) + return isElement( tagName, CLOSE_TH_TD ); + return false; + } + + + /** + * Returns true if the specified attribute it a URI and should be + * escaped appropriately. In HTML URIs are escaped differently + * than normal attributes. + * + * @param tagName The element's tag name + * @param attrName The attribute's name + */ + public static boolean isURI( String tagName, String attrName ) + { + // Stupid checks. + return ( attrName.equalsIgnoreCase( "href" ) || attrName.equalsIgnoreCase( "src" ) ); + } + + + /** + * Returns true if the specified attribute is a boolean and should be + * printed without the value. This applies to attributes that are true + * if they exist, such as selected (OPTION/INPUT). + * + * @param tagName The element's tag name + * @param attrName The attribute's name + */ + public static boolean isBoolean( String tagName, String attrName ) + { + String[] attrNames; + + attrNames = (String[]) _boolAttrs.get( tagName.toUpperCase(Locale.ENGLISH) ); + if ( attrNames == null ) + return false; + for ( int i = 0 ; i < attrNames.length ; ++i ) + if ( attrNames[ i ].equalsIgnoreCase( attrName ) ) + return true; + return false; + } + + + /** + * Returns the value of an HTML character reference by its name. If the + * reference is not found or was not defined as a character reference, + * returns EOF (-1). + * + * @param name Name of character reference + * @return Character code or EOF (-1) + */ + public static int charFromName( String name ) + { + Object value; + + initialize(); + value = _byName.get( name ); + if ( value != null && value instanceof Integer ) + return ( (Integer) value ).intValue(); + else + return -1; + } + + + /** + * Returns the name of an HTML character reference based on its character + * value. Only valid for entities defined from character references. If no + * such character value was defined, return null. + * + * @param value Character value of entity + * @return Entity's name or null + */ + public static String fromChar(int value ) + { + if (value > 0xffff) + return null; + + String name; + + initialize(); + name = (String) _byChar.get( new Integer( value ) ); + return name; + } + + + /** + * Initialize upon first access. Will load all the HTML character references + * into a list that is accessible by name or character value and is optimized + * for character substitution. This method may be called any number of times + * but will execute only once. + */ + private static void initialize() + { + InputStream is = null; + BufferedReader reader = null; + int index; + String name; + String value; + int code; + String line; + + // Make sure not to initialize twice. + if ( _byName != null ) + return; + try { + _byName = new Hashtable(); + _byChar = new Hashtable(); + is = HTMLdtd.class.getResourceAsStream( ENTITIES_RESOURCE ); + if ( is == null ) { + throw new RuntimeException( + DOMMessageFormatter.formatMessage( + DOMMessageFormatter.SERIALIZER_DOMAIN, + "ResourceNotFound", new Object[] {ENTITIES_RESOURCE})); + } + reader = new BufferedReader( new InputStreamReader( is, "ASCII" ) ); + line = reader.readLine(); + while ( line != null ) { + if ( line.length() == 0 || line.charAt( 0 ) == '#' ) { + line = reader.readLine(); + continue; + } + index = line.indexOf( ' ' ); + if ( index > 1 ) { + name = line.substring( 0, index ); + ++index; + if ( index < line.length() ) { + value = line.substring( index ); + index = value.indexOf( ' ' ); + if ( index > 0 ) + value = value.substring( 0, index ); + code = Integer.parseInt( value ); + defineEntity( name, (char) code ); + } + } + line = reader.readLine(); + } + is.close(); + } catch ( Exception except ) { + throw new RuntimeException( + DOMMessageFormatter.formatMessage( + DOMMessageFormatter.SERIALIZER_DOMAIN, + "ResourceNotLoaded", new Object[] {ENTITIES_RESOURCE, except.toString()})); + } finally { + if ( is != null ) { + try { + is.close(); + } catch ( Exception except ) { } + } + } + } + + + /** + * Defines a new character reference. The reference's name and value are + * supplied. Nothing happens if the character reference is already defined. + *

+ * Unlike internal entities, character references are a string to single + * character mapping. They are used to map non-ASCII characters both on + * parsing and printing, primarily for HTML documents. '<amp;' is an + * example of a character reference. + * + * @param name The entity's name + * @param value The entity's value + */ + private static void defineEntity( String name, char value ) + { + if ( _byName.get( name ) == null ) { + _byName.put( name, new Integer( value ) ); + _byChar.put( new Integer( value ), name ); + } + } + + + private static void defineElement( String name, int flags ) + { + _elemDefs.put( name, new Integer( flags ) ); + } + + + private static void defineBoolean( String tagName, String attrName ) + { + defineBoolean( tagName, new String[] { attrName } ); + } + + + private static void defineBoolean( String tagName, String[] attrNames ) + { + _boolAttrs.put( tagName, attrNames ); + } + + + private static boolean isElement( String name, int flag ) + { + Integer flags; + + flags = (Integer) _elemDefs.get( name.toUpperCase(Locale.ENGLISH) ); + if ( flags == null ) + return false; + else + return ( ( flags.intValue() & flag ) == flag ); + } + + + static + { + _elemDefs = new Hashtable(); + defineElement( "ADDRESS", CLOSE_P ); + defineElement( "AREA", EMPTY ); + defineElement( "BASE", EMPTY | ALLOWED_HEAD ); + defineElement( "BASEFONT", EMPTY ); + defineElement( "BLOCKQUOTE", CLOSE_P ); + defineElement( "BODY", OPT_CLOSING ); + defineElement( "BR", EMPTY ); + defineElement( "COL", EMPTY ); + defineElement( "COLGROUP", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE ); + defineElement( "DD", OPT_CLOSING | ONLY_OPENING | CLOSE_DD_DT ); + defineElement( "DIV", CLOSE_P ); + defineElement( "DL", ELEM_CONTENT | CLOSE_P ); + defineElement( "DT", OPT_CLOSING | ONLY_OPENING | CLOSE_DD_DT ); + defineElement( "FIELDSET", CLOSE_P ); + defineElement( "FORM", CLOSE_P ); + defineElement( "FRAME", EMPTY | OPT_CLOSING ); + defineElement( "H1", CLOSE_P ); + defineElement( "H2", CLOSE_P ); + defineElement( "H3", CLOSE_P ); + defineElement( "H4", CLOSE_P ); + defineElement( "H5", CLOSE_P ); + defineElement( "H6", CLOSE_P ); + defineElement( "HEAD", ELEM_CONTENT | OPT_CLOSING ); + defineElement( "HR", EMPTY | CLOSE_P ); + defineElement( "HTML", ELEM_CONTENT | OPT_CLOSING ); + defineElement( "IMG", EMPTY ); + defineElement( "INPUT", EMPTY ); + defineElement( "ISINDEX", EMPTY | ALLOWED_HEAD ); + defineElement( "LI", OPT_CLOSING | ONLY_OPENING | CLOSE_SELF ); + defineElement( "LINK", EMPTY | ALLOWED_HEAD ); + defineElement( "MAP", ALLOWED_HEAD ); + defineElement( "META", EMPTY | ALLOWED_HEAD ); + defineElement( "OL", ELEM_CONTENT | CLOSE_P ); + defineElement( "OPTGROUP", ELEM_CONTENT ); + defineElement( "OPTION", OPT_CLOSING | ONLY_OPENING | CLOSE_SELF ); + defineElement( "P", OPT_CLOSING | CLOSE_P | CLOSE_SELF ); + defineElement( "PARAM", EMPTY ); + defineElement( "PRE", PRESERVE | CLOSE_P ); + defineElement( "SCRIPT", ALLOWED_HEAD | PRESERVE ); + defineElement( "NOSCRIPT", ALLOWED_HEAD | PRESERVE ); + defineElement( "SELECT", ELEM_CONTENT ); + defineElement( "STYLE", ALLOWED_HEAD | PRESERVE ); + defineElement( "TABLE", ELEM_CONTENT | CLOSE_P ); + defineElement( "TBODY", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE ); + defineElement( "TD", OPT_CLOSING | CLOSE_TH_TD ); + defineElement( "TEXTAREA", PRESERVE ); + defineElement( "TFOOT", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE ); + defineElement( "TH", OPT_CLOSING | CLOSE_TH_TD ); + defineElement( "THEAD", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE ); + defineElement( "TITLE", ALLOWED_HEAD ); + defineElement( "TR", ELEM_CONTENT | OPT_CLOSING | CLOSE_TABLE ); + defineElement( "UL", ELEM_CONTENT | CLOSE_P ); + + _boolAttrs = new Hashtable(); + defineBoolean( "AREA", "href" ); + defineBoolean( "BUTTON", "disabled" ); + defineBoolean( "DIR", "compact" ); + defineBoolean( "DL", "compact" ); + defineBoolean( "FRAME", "noresize" ); + defineBoolean( "HR", "noshade" ); + defineBoolean( "IMAGE", "ismap" ); + defineBoolean( "INPUT", new String[] { "defaultchecked", "checked", "readonly", "disabled" } ); + defineBoolean( "LINK", "link" ); + defineBoolean( "MENU", "compact" ); + defineBoolean( "OBJECT", "declare" ); + defineBoolean( "OL", "compact" ); + defineBoolean( "OPTGROUP", "disabled" ); + defineBoolean( "OPTION", new String[] { "default-selected", "selected", "disabled" } ); + defineBoolean( "SCRIPT", "defer" ); + defineBoolean( "SELECT", new String[] { "multiple", "disabled" } ); + defineBoolean( "STYLE", "disabled" ); + defineBoolean( "TD", "nowrap" ); + defineBoolean( "TH", "nowrap" ); + defineBoolean( "TEXTAREA", new String[] { "disabled", "readonly" } ); + defineBoolean( "UL", "compact" ); + + initialize(); + } + + + +}