jdk/src/share/classes/javax/swing/text/html/parser/Parser.java
author rupashka
Wed, 06 Apr 2011 12:05:38 +0400
changeset 9213 856f16c729a5
parent 7668 d4a77089c587
child 12999 d0cec5582bd7
permissions -rw-r--r--
7003777: Nonexistent html entities not parsed properly. Reviewed-by: peterz
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
7668
d4a77089c587 6962318: Update copyright year
ohair
parents: 7014
diff changeset
     2
 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 1299
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 1299
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 1299
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 1299
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 1299
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
package javax.swing.text.html.parser;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
import javax.swing.text.SimpleAttributeSet;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
import javax.swing.text.html.HTML;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
import javax.swing.text.ChangedCharSetException;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
import java.io.*;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
import java.util.Hashtable;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
import java.util.Properties;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
import java.util.Vector;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
import java.util.Enumeration;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
import java.net.URL;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
import sun.misc.MessageUtils;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
 * A simple DTD-driven HTML parser. The parser reads an
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
 * HTML file from an InputStream and calls various methods
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
 * (which should be overridden in a subclass) when tags and
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
 * data are encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
 * Unfortunately there are many badly implemented HTML parsers
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
 * out there, and as a result there are many badly formatted
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
 * HTML files. This parser attempts to parse most HTML files.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
 * This means that the implementation sometimes deviates from
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
 * the SGML specification in favor of HTML.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
 * The parser treats \r and \r\n as \n. Newlines after starttags
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
 * and before end tags are ignored just as specified in the SGML/HTML
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
 * specification.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
 * The html spec does not specify how spaces are to be coalesced very well.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
 * Specifically, the following scenarios are not discussed (note that a
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 * space should be used here, but I am using &amp;nbsp to force the space to
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 * be displayed):
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    61
 * '&lt;b>blah&nbsp;&lt;i>&nbsp;&lt;strike>&nbsp;foo' which can be treated as:
90ce3da70b43 Initial load
duke
parents:
diff changeset
    62
 * '&lt;b>blah&nbsp;&lt;i>&lt;strike>foo'
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
 * <p>as well as:
90ce3da70b43 Initial load
duke
parents:
diff changeset
    64
 * '&lt;p>&lt;a href="xx">&nbsp;&lt;em>Using&lt;/em>&lt;/a>&lt;/p>'
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
 * which appears to be treated as:
90ce3da70b43 Initial load
duke
parents:
diff changeset
    66
 * '&lt;p>&lt;a href="xx">&lt;em>Using&lt;/em>&lt;/a>&lt;/p>'
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
 * If <code>strict</code> is false, when a tag that breaks flow,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
 * (<code>TagElement.breaksFlows</code>) or trailing whitespace is
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
 * encountered, all whitespace will be ignored until a non whitespace
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
 * character is encountered. This appears to give behavior closer to
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
 * the popular browsers.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
 * @see DTD
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
 * @see TagElement
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
 * @see SimpleAttributeSet
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
 * @author Arthur van Hoff
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
 * @author Sunita Mani
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
public
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
class Parser implements DTDConstants {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
    private char text[] = new char[1024];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
    private int textpos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
    private TagElement last;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
    private boolean space;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
    private char str[] = new char[128];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
    private int strpos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
    protected DTD dtd = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
    private int ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
    private int ln;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
    private Reader in;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
    private Element recent;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
    private TagStack stack;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
    private boolean skipTag = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
    private TagElement lastFormSent = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
    private SimpleAttributeSet attributes = new SimpleAttributeSet();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
    // State for <html>, <head> and <body>.  Since people like to slap
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
    // together HTML documents without thinking, occasionally they
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
    // have multiple instances of these tags.  These booleans track
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
    // the first sightings of these tags so they can be safely ignored
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
    // by the parser if repeated.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
    private boolean seenHtml = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
    private boolean seenHead = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
    private boolean seenBody = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
     * The html spec does not specify how spaces are coalesced very well.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
     * If strict == false, ignoreSpace is used to try and mimic the behavior
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
     * of the popular browsers.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
     * The problematic scenarios are:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
     * '&lt;b>blah &lt;i> &lt;strike> foo' which can be treated as:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
     * '&lt;b>blah &lt;i>&lt;strike>foo'
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
     * as well as:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
     * '&lt;p>&lt;a href="xx"> &lt;em>Using&lt;/em>&lt;/a>&lt;/p>'
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
     * which appears to be treated as:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
     * '&lt;p>&lt;a href="xx">&lt;em>Using&lt;/em>&lt;/a>&lt;/p>'
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
     * When a tag that breaks flow, or trailing whitespace is encountered
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
     * ignoreSpace is set to true. From then on, all whitespace will be
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
     * ignored.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
     * ignoreSpace will be set back to false the first time a
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
     * non whitespace character is encountered. This appears to give
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
     * behavior closer to the popular browsers.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
    private boolean ignoreSpace;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
     * This flag determines whether or not the Parser will be strict
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
     * in enforcing SGML compatibility.  If false, it will be lenient
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
     * with certain common classes of erroneous HTML constructs.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
     * Strict or not, in either case an error will be recorded.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
    protected boolean strict = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
    /** Number of \r\n's encountered. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
    private int crlfCount;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
    /** Number of \r's encountered. A \r\n will not increment this. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
    private int crCount;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
    /** Number of \n's encountered. A \r\n will not increment this. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
    private int lfCount;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
    // To correctly identify the start of a tag/comment/text we need two
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
    // ivars. Two are needed as handleText isn't invoked until the tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
    // after the text has been parsed, that is the parser parses the text,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
    // then a tag, then invokes handleText followed by handleStart.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
    /** The start position of the current block. Block is overloaded here,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
     * it really means the current start position for the current comment,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
     * tag, text. Use getBlockStartPosition to access this. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
    private int currentBlockStartPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
    /** Start position of the last block. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
    private int lastBlockStartPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
     * array for mapping numeric references in range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
     * 130-159 to displayable Unicode characters.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
    private static final char[] cp1252Map = {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
        8218,  // &#130;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
        402,   // &#131;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
        8222,  // &#132;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
        8230,  // &#133;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
        8224,  // &#134;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
        8225,  // &#135;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
        710,   // &#136;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
        8240,  // &#137;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
        352,   // &#138;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
        8249,  // &#139;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
        338,   // &#140;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
        141,   // &#141;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
        142,   // &#142;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
        143,   // &#143;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
        144,   // &#144;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
        8216,  // &#145;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
        8217,  // &#146;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
        8220,  // &#147;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
        8221,  // &#148;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
        8226,  // &#149;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
        8211,  // &#150;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
        8212,  // &#151;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
        732,   // &#152;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
        8482,  // &#153;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
        353,   // &#154;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
        8250,  // &#155;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
        339,   // &#156;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
        157,   // &#157;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
        158,   // &#158;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
        376    // &#159;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
    };
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
    public Parser(DTD dtd) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
        this.dtd = dtd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
     * @return the line number of the line currently being parsed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
    protected int getCurrentLine() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
        return ln;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
     * Returns the start position of the current block. Block is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
     * overloaded here, it really means the current start position for
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
     * the current comment tag, text, block.... This is provided for
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
     * subclassers that wish to know the start of the current block when
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
     * called with one of the handleXXX methods.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
    int getBlockStartPosition() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
        return Math.max(0, lastBlockStartPos - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
     * Makes a TagElement.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
    protected TagElement makeTag(Element elem, boolean fictional) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
        return new TagElement(elem, fictional);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
    protected TagElement makeTag(Element elem) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
        return makeTag(elem, false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
    protected SimpleAttributeSet getAttributes() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
        return attributes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
    protected void flushAttributes() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
        attributes.removeAttributes(attributes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
     * Called when PCDATA is encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
    protected void handleText(char text[]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
     * Called when an HTML title tag is encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
    protected void handleTitle(char text[]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
        // default behavior is to call handleText. Subclasses
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
        // can override if necessary.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
        handleText(text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
     * Called when an HTML comment is encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
    protected void handleComment(char text[]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
    protected void handleEOFInComment() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
        // We've reached EOF.  Our recovery strategy is to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
        // see if we have more than one line in the comment;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
        // if so, we pretend that the comment was an unterminated
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
        // single line comment, and reparse the lines after the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
        // first line as normal HTML content.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
        int commentEndPos = strIndexOf('\n');
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
        if (commentEndPos >= 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
            handleComment(getChars(0, commentEndPos));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
            try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
                in.close();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
                in = new CharArrayReader(getChars(commentEndPos + 1));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
                ch = '>';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
            } catch (IOException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
                error("ioexception");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
            resetStrBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
            // no newline, so signal an error
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
            error("eof.comment");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
     * Called when an empty tag is encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
    protected void handleEmptyTag(TagElement tag) throws ChangedCharSetException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
     * Called when a start tag is encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
    protected void handleStartTag(TagElement tag) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
     * Called when an end tag is encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
    protected void handleEndTag(TagElement tag) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
     * An error has occurred.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
    protected void handleError(int ln, String msg) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
        /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
        Thread.dumpStack();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
        System.out.println("**** " + stack);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
        System.out.println("line " + ln + ": error: " + msg);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
        System.out.println();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
        */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   318
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
     * Output text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
    void handleText(TagElement tag) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
        if (tag.breaksFlow()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
            space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
            if (!strict) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
                ignoreSpace = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
        if (textpos == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
            if ((!space) || (stack == null) || last.breaksFlow() ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
                !stack.advance(dtd.pcdata)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
                last = tag;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
                space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
                lastBlockStartPos = currentBlockStartPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
        if (space) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
            if (!ignoreSpace) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
                // enlarge buffer if needed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
                if (textpos + 1 > text.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
                    char newtext[] = new char[text.length + 200];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
                    System.arraycopy(text, 0, newtext, 0, text.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
                    text = newtext;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
                // output pending space
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
                text[textpos++] = ' ';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
                if (!strict && !tag.getElement().isEmpty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
                    ignoreSpace = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
            space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
        char newtext[] = new char[textpos];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
        System.arraycopy(text, 0, newtext, 0, textpos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
        // Handles cases of bad html where the title tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
        // was getting lost when we did error recovery.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
        if (tag.getElement().getName().equals("title")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
            handleTitle(newtext);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
            handleText(newtext);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
        lastBlockStartPos = currentBlockStartPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
        textpos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
        last = tag;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
        space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   368
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   369
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
     * Invoke the error handler.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
    protected void error(String err, String arg1, String arg2,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
        String arg3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
        handleError(ln, err + " " + arg1 + " " + arg2 + " " + arg3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
    protected void error(String err, String arg1, String arg2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
        error(err, arg1, arg2, "?");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   380
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   381
    protected void error(String err, String arg1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   382
        error(err, arg1, "?", "?");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
    protected void error(String err) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   385
        error(err, "?", "?", "?");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   387
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
     * Handle a start tag. The new tag is pushed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
     * onto the tag stack. The attribute list is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
     * checked for required attributes.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
    protected void startTag(TagElement tag) throws ChangedCharSetException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
        Element elem = tag.getElement();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
        // If the tag is an empty tag and texpos != 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
        // this implies that there is text before the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
        // start tag that needs to be processed before
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
        // handling the tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
        if (!elem.isEmpty() ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
                    ((last != null) && !last.breaksFlow()) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
                    (textpos != 0)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
            handleText(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
            // this variable gets updated in handleText().
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
            // Since in this case we do not call handleText()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
            // we need to update it here.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
            //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
            last = tag;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
            // Note that we should really check last.breakFlows before
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
            // assuming this should be false.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
            space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
        lastBlockStartPos = currentBlockStartPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   417
90ce3da70b43 Initial load
duke
parents:
diff changeset
   418
        // check required attributes
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
        for (AttributeList a = elem.atts ; a != null ; a = a.next) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
            if ((a.modifier == REQUIRED) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   421
                ((attributes.isEmpty()) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   422
                 ((!attributes.isDefined(a.name)) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   423
                  (!attributes.isDefined(HTML.getAttributeKey(a.name)))))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   424
                error("req.att ", a.getName(), elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   425
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
        if (elem.isEmpty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
            handleEmptyTag(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
            /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   431
        } else if (elem.getName().equals("form")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
            handleStartTag(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
            */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
            recent = elem;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
            stack = new TagStack(tag, stack);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
            handleStartTag(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   440
90ce3da70b43 Initial load
duke
parents:
diff changeset
   441
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   442
     * Handle an end tag. The end tag is popped
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
     * from the tag stack.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   444
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   445
    protected void endTag(boolean omitted) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   446
        handleText(stack.tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   447
90ce3da70b43 Initial load
duke
parents:
diff changeset
   448
        if (omitted && !stack.elem.omitEnd()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   449
            error("end.missing", stack.elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   450
        } else if (!stack.terminate()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   451
            error("end.unexpected", stack.elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
90ce3da70b43 Initial load
duke
parents:
diff changeset
   454
        // handle the tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
        handleEndTag(stack.tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
        stack = stack.next;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
        recent = (stack != null) ? stack.elem : null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
    boolean ignoreElement(Element elem) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   462
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
        String stackElement = stack.elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   464
        String elemName = elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
        /* We ignore all elements that are not valid in the context of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
           a table except <td>, <th> (these we handle in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
           legalElementContext()) and #pcdata.  We also ignore the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
           <font> tag in the context of <ul> and <ol> We additonally
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
           ignore the <meta> and the <style> tag if the body tag has
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
           been seen. **/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
        if ((elemName.equals("html") && seenHtml) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
            (elemName.equals("head") && seenHead) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
            (elemName.equals("body") && seenBody)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
            return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
        if (elemName.equals("dt") || elemName.equals("dd")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
            TagStack s = stack;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   478
            while (s != null && !s.elem.getName().equals("dl")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
                s = s.next;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
            if (s == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   482
                return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
        if (((stackElement.equals("table")) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
             (!elemName.equals("#pcdata")) && (!elemName.equals("input"))) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
            ((elemName.equals("font")) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
             (stackElement.equals("ul") || stackElement.equals("ol"))) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
            (elemName.equals("meta") && stack != null) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
            (elemName.equals("style") && seenBody) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
            (stackElement.equals("table") && elemName.equals("a"))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
            return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   494
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
        return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
90ce3da70b43 Initial load
duke
parents:
diff changeset
   498
90ce3da70b43 Initial load
duke
parents:
diff changeset
   499
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   500
     * Marks the first time a tag has been seen in a document
90ce3da70b43 Initial load
duke
parents:
diff changeset
   501
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   502
90ce3da70b43 Initial load
duke
parents:
diff changeset
   503
    protected void markFirstTime(Element elem) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   504
        String elemName = elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   505
        if (elemName.equals("html")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   506
            seenHtml = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   507
        } else if (elemName.equals("head")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   508
            seenHead = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   509
        } else if (elemName.equals("body")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   510
            if (buf.length == 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   511
                // Refer to note in definition of buf for details on this.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   512
                char[] newBuf = new char[256];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   513
90ce3da70b43 Initial load
duke
parents:
diff changeset
   514
                newBuf[0] = buf[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   515
                buf = newBuf;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   516
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   517
            seenBody = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   518
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   519
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   520
90ce3da70b43 Initial load
duke
parents:
diff changeset
   521
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   522
     * Create a legal content for an element.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   523
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   524
    boolean legalElementContext(Element elem) throws ChangedCharSetException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   525
90ce3da70b43 Initial load
duke
parents:
diff changeset
   526
        // System.out.println("-- legalContext -- " + elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   527
90ce3da70b43 Initial load
duke
parents:
diff changeset
   528
        // Deal with the empty stack
90ce3da70b43 Initial load
duke
parents:
diff changeset
   529
        if (stack == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   530
            // System.out.println("-- stack is empty");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   531
            if (elem != dtd.html) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   532
                // System.out.println("-- pushing html");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   533
                startTag(makeTag(dtd.html, true));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   534
                return legalElementContext(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   535
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   536
            return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   537
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   538
90ce3da70b43 Initial load
duke
parents:
diff changeset
   539
        // Is it allowed in the current context
90ce3da70b43 Initial load
duke
parents:
diff changeset
   540
        if (stack.advance(elem)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   541
            // System.out.println("-- legal context");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   542
            markFirstTime(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   543
            return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   544
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   545
        boolean insertTag = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   546
90ce3da70b43 Initial load
duke
parents:
diff changeset
   547
        // The use of all error recovery strategies are contingent
90ce3da70b43 Initial load
duke
parents:
diff changeset
   548
        // on the value of the strict property.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   549
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   550
        // These are commonly occuring errors.  if insertTag is true,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   551
        // then we want to adopt an error recovery strategy that
90ce3da70b43 Initial load
duke
parents:
diff changeset
   552
        // involves attempting to insert an additional tag to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   553
        // legalize the context.  The two errors addressed here
90ce3da70b43 Initial load
duke
parents:
diff changeset
   554
        // are:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   555
        // 1) when a <td> or <th> is seen soon after a <table> tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   556
        //    In this case we insert a <tr>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   557
        // 2) when any other tag apart from a <tr> is seen
90ce3da70b43 Initial load
duke
parents:
diff changeset
   558
        //    in the context of a <tr>.  In this case we would
90ce3da70b43 Initial load
duke
parents:
diff changeset
   559
        //    like to add a <td>.  If a <tr> is seen within a
90ce3da70b43 Initial load
duke
parents:
diff changeset
   560
        //    <tr> context, then we will close out the current
90ce3da70b43 Initial load
duke
parents:
diff changeset
   561
        //    <tr>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   562
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   563
        // This insertion strategy is handled later in the method.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   564
        // The reason for checking this now, is that in other cases
90ce3da70b43 Initial load
duke
parents:
diff changeset
   565
        // we would like to apply other error recovery strategies for example
90ce3da70b43 Initial load
duke
parents:
diff changeset
   566
        // ignoring tags.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   567
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   568
        // In certain cases it is better to ignore a tag than try to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   569
        // fix the situation.  So the first test is to see if this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   570
        // is what we need to do.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   571
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   572
        String stackElemName = stack.elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   573
        String elemName = elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   574
90ce3da70b43 Initial load
duke
parents:
diff changeset
   575
90ce3da70b43 Initial load
duke
parents:
diff changeset
   576
        if (!strict &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   577
            ((stackElemName.equals("table") && elemName.equals("td")) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   578
             (stackElemName.equals("table") && elemName.equals("th")) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   579
             (stackElemName.equals("tr") && !elemName.equals("tr")))){
90ce3da70b43 Initial load
duke
parents:
diff changeset
   580
             insertTag = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   581
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   582
90ce3da70b43 Initial load
duke
parents:
diff changeset
   583
90ce3da70b43 Initial load
duke
parents:
diff changeset
   584
        if (!strict && !insertTag && (stack.elem.getName() != elem.getName() ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   585
                                      elem.getName().equals("body"))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   586
            if (skipTag = ignoreElement(elem)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   587
                error("tag.ignore", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   588
                return skipTag;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   589
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   590
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   591
90ce3da70b43 Initial load
duke
parents:
diff changeset
   592
        // Check for anything after the start of the table besides tr, td, th
90ce3da70b43 Initial load
duke
parents:
diff changeset
   593
        // or caption, and if those aren't there, insert the <tr> and call
90ce3da70b43 Initial load
duke
parents:
diff changeset
   594
        // legalElementContext again.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   595
        if (!strict && stackElemName.equals("table") &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   596
            !elemName.equals("tr") && !elemName.equals("td") &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   597
            !elemName.equals("th") && !elemName.equals("caption")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   598
            Element e = dtd.getElement("tr");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   599
            TagElement t = makeTag(e, true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   600
            legalTagContext(t);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   601
            startTag(t);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   602
            error("start.missing", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   603
            return legalElementContext(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   604
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   605
90ce3da70b43 Initial load
duke
parents:
diff changeset
   606
        // They try to find a legal context by checking if the current
90ce3da70b43 Initial load
duke
parents:
diff changeset
   607
        // tag is valid in an enclosing context.  If so
90ce3da70b43 Initial load
duke
parents:
diff changeset
   608
        // close out the tags by outputing end tags and then
90ce3da70b43 Initial load
duke
parents:
diff changeset
   609
        // insert the curent tag.  If the tags that are
90ce3da70b43 Initial load
duke
parents:
diff changeset
   610
        // being closed out do not have an optional end tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
   611
        // specification in the DTD then an html error is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   612
        // reported.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   613
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   614
        if (!insertTag && stack.terminate() && (!strict || stack.elem.omitEnd())) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   615
            for (TagStack s = stack.next ; s != null ; s = s.next) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   616
                if (s.advance(elem)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   617
                    while (stack != s) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   618
                        endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   619
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   620
                    return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   621
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   622
                if (!s.terminate() || (strict && !s.elem.omitEnd())) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   623
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   624
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   625
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   626
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   627
90ce3da70b43 Initial load
duke
parents:
diff changeset
   628
        // Check if we know what tag is expected next.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   629
        // If so insert the tag.  Report an error if the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   630
        // tag does not have its start tag spec in the DTD as optional.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   631
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   632
        Element next = stack.first();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   633
        if (next != null && (!strict || next.omitStart()) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   634
           !(next==dtd.head && elem==dtd.pcdata) ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   635
            // System.out.println("-- omitting start tag: " + next);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   636
            TagElement t = makeTag(next, true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   637
            legalTagContext(t);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   638
            startTag(t);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   639
            if (!next.omitStart()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   640
                error("start.missing", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   641
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   642
            return legalElementContext(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   643
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   644
90ce3da70b43 Initial load
duke
parents:
diff changeset
   645
90ce3da70b43 Initial load
duke
parents:
diff changeset
   646
        // Traverse the list of expected elements and determine if adding
90ce3da70b43 Initial load
duke
parents:
diff changeset
   647
        // any of these elements would make for a legal context.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   648
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   649
90ce3da70b43 Initial load
duke
parents:
diff changeset
   650
        if (!strict) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   651
            ContentModel content = stack.contentModel();
1287
a04aca99c77a 6722802: Code improvement and warnings removing from the javax.swing.text package
rupashka
parents: 438
diff changeset
   652
            Vector<Element> elemVec = new Vector<Element>();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   653
            if (content != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   654
                content.getElements(elemVec);
1287
a04aca99c77a 6722802: Code improvement and warnings removing from the javax.swing.text package
rupashka
parents: 438
diff changeset
   655
                for (Element e : elemVec) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   656
                    // Ensure that this element has not been included as
90ce3da70b43 Initial load
duke
parents:
diff changeset
   657
                    // part of the exclusions in the DTD.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   658
                    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   659
                    if (stack.excluded(e.getIndex())) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   660
                        continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   661
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   662
90ce3da70b43 Initial load
duke
parents:
diff changeset
   663
                    boolean reqAtts = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   664
90ce3da70b43 Initial load
duke
parents:
diff changeset
   665
                    for (AttributeList a = e.getAttributes(); a != null ; a = a.next) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   666
                        if (a.modifier == REQUIRED) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   667
                            reqAtts = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   668
                            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   669
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   670
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   671
                    // Ensure that no tag that has required attributes
90ce3da70b43 Initial load
duke
parents:
diff changeset
   672
                    // gets inserted.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   673
                    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   674
                    if (reqAtts) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   675
                        continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   676
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   677
90ce3da70b43 Initial load
duke
parents:
diff changeset
   678
                    ContentModel m = e.getContent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   679
                    if (m != null && m.first(elem)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   680
                        // System.out.println("-- adding a legal tag: " + e);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   681
                        TagElement t = makeTag(e, true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   682
                        legalTagContext(t);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   683
                        startTag(t);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   684
                        error("start.missing", e.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   685
                        return legalElementContext(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   686
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   687
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   688
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   689
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   690
90ce3da70b43 Initial load
duke
parents:
diff changeset
   691
        // Check if the stack can be terminated.  If so add the appropriate
90ce3da70b43 Initial load
duke
parents:
diff changeset
   692
        // end tag.  Report an error if the tag being ended does not have its
90ce3da70b43 Initial load
duke
parents:
diff changeset
   693
        // end tag spec in the DTD as optional.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   694
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   695
        if (stack.terminate() && (stack.elem != dtd.body) && (!strict || stack.elem.omitEnd())) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   696
            // System.out.println("-- omitting end tag: " + stack.elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   697
            if (!stack.elem.omitEnd()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   698
                error("end.missing", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   699
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   700
90ce3da70b43 Initial load
duke
parents:
diff changeset
   701
            endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   702
            return legalElementContext(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   703
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   704
90ce3da70b43 Initial load
duke
parents:
diff changeset
   705
        // At this point we know that something is screwed up.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   706
        return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   707
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   708
90ce3da70b43 Initial load
duke
parents:
diff changeset
   709
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   710
     * Create a legal context for a tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   711
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   712
    void legalTagContext(TagElement tag) throws ChangedCharSetException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   713
        if (legalElementContext(tag.getElement())) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   714
            markFirstTime(tag.getElement());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   715
            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   716
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   717
90ce3da70b43 Initial load
duke
parents:
diff changeset
   718
        // Avoid putting a block tag in a flow tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   719
        if (tag.breaksFlow() && (stack != null) && !stack.tag.breaksFlow()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   720
            endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   721
            legalTagContext(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   722
            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   723
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   724
90ce3da70b43 Initial load
duke
parents:
diff changeset
   725
        // Avoid putting something wierd in the head of the document.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   726
        for (TagStack s = stack ; s != null ; s = s.next) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   727
            if (s.tag.getElement() == dtd.head) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   728
                while (stack != s) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   729
                    endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   730
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   731
                endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   732
                legalTagContext(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   733
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   734
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   735
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   736
90ce3da70b43 Initial load
duke
parents:
diff changeset
   737
        // Everything failed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   738
        error("tag.unexpected", tag.getElement().getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   739
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   740
90ce3da70b43 Initial load
duke
parents:
diff changeset
   741
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   742
     * Error context. Something went wrong, make sure we are in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   743
     * the document's body context
90ce3da70b43 Initial load
duke
parents:
diff changeset
   744
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   745
    void errorContext() throws ChangedCharSetException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   746
        for (; (stack != null) && (stack.tag.getElement() != dtd.body) ; stack = stack.next) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   747
            handleEndTag(stack.tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   748
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   749
        if (stack == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   750
            legalElementContext(dtd.body);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   751
            startTag(makeTag(dtd.body, true));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   752
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   753
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   754
90ce3da70b43 Initial load
duke
parents:
diff changeset
   755
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   756
     * Add a char to the string buffer.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   757
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   758
    void addString(int c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   759
        if (strpos  == str.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   760
            char newstr[] = new char[str.length + 128];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   761
            System.arraycopy(str, 0, newstr, 0, str.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   762
            str = newstr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   763
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   764
        str[strpos++] = (char)c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   765
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   766
90ce3da70b43 Initial load
duke
parents:
diff changeset
   767
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   768
     * Get the string that's been accumulated.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   769
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   770
    String getString(int pos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   771
        char newStr[] = new char[strpos - pos];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   772
        System.arraycopy(str, pos, newStr, 0, strpos - pos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   773
        strpos = pos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   774
        return new String(newStr);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   775
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   776
90ce3da70b43 Initial load
duke
parents:
diff changeset
   777
    char[] getChars(int pos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   778
        char newStr[] = new char[strpos - pos];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   779
        System.arraycopy(str, pos, newStr, 0, strpos - pos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   780
        strpos = pos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   781
        return newStr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   782
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   783
90ce3da70b43 Initial load
duke
parents:
diff changeset
   784
    char[] getChars(int pos, int endPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   785
        char newStr[] = new char[endPos - pos];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   786
        System.arraycopy(str, pos, newStr, 0, endPos - pos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   787
        // REMIND: it's not clear whether this version should set strpos or not
90ce3da70b43 Initial load
duke
parents:
diff changeset
   788
        // strpos = pos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   789
        return newStr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   790
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   791
90ce3da70b43 Initial load
duke
parents:
diff changeset
   792
    void resetStrBuffer() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   793
        strpos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   794
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   795
90ce3da70b43 Initial load
duke
parents:
diff changeset
   796
    int strIndexOf(char target) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   797
        for (int i = 0; i < strpos; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   798
            if (str[i] == target) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   799
                return i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   800
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   801
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   802
90ce3da70b43 Initial load
duke
parents:
diff changeset
   803
        return -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   804
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   805
90ce3da70b43 Initial load
duke
parents:
diff changeset
   806
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   807
     * Skip space.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   808
     * [5] 297:5
90ce3da70b43 Initial load
duke
parents:
diff changeset
   809
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   810
    void skipSpace() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   811
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   812
            switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   813
              case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   814
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   815
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   816
                lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   817
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   818
90ce3da70b43 Initial load
duke
parents:
diff changeset
   819
              case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   820
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   821
                if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   822
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   823
                    crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   824
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   825
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   826
                    crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   827
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   828
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   829
              case ' ':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   830
              case '\t':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   831
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   832
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   833
90ce3da70b43 Initial load
duke
parents:
diff changeset
   834
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   835
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   836
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   837
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   838
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   839
90ce3da70b43 Initial load
duke
parents:
diff changeset
   840
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   841
     * Parse identifier. Uppercase characters are folded
90ce3da70b43 Initial load
duke
parents:
diff changeset
   842
     * to lowercase when lower is true. Returns falsed if
90ce3da70b43 Initial load
duke
parents:
diff changeset
   843
     * no identifier is found. [55] 346:17
90ce3da70b43 Initial load
duke
parents:
diff changeset
   844
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   845
    boolean parseIdentifier(boolean lower) throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   846
        switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   847
          case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   848
          case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   849
          case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   850
          case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   851
          case 'Y': case 'Z':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   852
            if (lower) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   853
                ch = 'a' + (ch - 'A');
90ce3da70b43 Initial load
duke
parents:
diff changeset
   854
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   855
90ce3da70b43 Initial load
duke
parents:
diff changeset
   856
          case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   857
          case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   858
          case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   859
          case 's': case 't': case 'u': case 'v': case 'w': case 'x':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   860
          case 'y': case 'z':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   861
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   862
90ce3da70b43 Initial load
duke
parents:
diff changeset
   863
          default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   864
            return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   865
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   866
90ce3da70b43 Initial load
duke
parents:
diff changeset
   867
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   868
            addString(ch);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   869
90ce3da70b43 Initial load
duke
parents:
diff changeset
   870
            switch (ch = readCh()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   871
              case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   872
              case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   873
              case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   874
              case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   875
              case 'Y': case 'Z':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   876
                if (lower) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   877
                    ch = 'a' + (ch - 'A');
90ce3da70b43 Initial load
duke
parents:
diff changeset
   878
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   879
90ce3da70b43 Initial load
duke
parents:
diff changeset
   880
              case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   881
              case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   882
              case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   883
              case 's': case 't': case 'u': case 'v': case 'w': case 'x':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   884
              case 'y': case 'z':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   885
90ce3da70b43 Initial load
duke
parents:
diff changeset
   886
              case '0': case '1': case '2': case '3': case '4':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   887
              case '5': case '6': case '7': case '8': case '9':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   888
90ce3da70b43 Initial load
duke
parents:
diff changeset
   889
              case '.': case '-':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   890
90ce3da70b43 Initial load
duke
parents:
diff changeset
   891
              case '_': // not officially allowed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   892
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   893
90ce3da70b43 Initial load
duke
parents:
diff changeset
   894
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   895
                return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   896
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   897
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   898
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   899
90ce3da70b43 Initial load
duke
parents:
diff changeset
   900
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   901
     * Parse an entity reference. [59] 350:17
90ce3da70b43 Initial load
duke
parents:
diff changeset
   902
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   903
    private char[] parseEntityReference() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   904
        int pos = strpos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   905
90ce3da70b43 Initial load
duke
parents:
diff changeset
   906
        if ((ch = readCh()) == '#') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   907
            int n = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   908
            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   909
            if ((ch >= '0') && (ch <= '9') ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   910
                    ch == 'x' || ch == 'X') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   911
90ce3da70b43 Initial load
duke
parents:
diff changeset
   912
                if ((ch >= '0') && (ch <= '9')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   913
                    // parse decimal reference
90ce3da70b43 Initial load
duke
parents:
diff changeset
   914
                    while ((ch >= '0') && (ch <= '9')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   915
                        n = (n * 10) + ch - '0';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   916
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   917
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   918
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   919
                    // parse hexadecimal reference
90ce3da70b43 Initial load
duke
parents:
diff changeset
   920
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   921
                    char lch = (char) Character.toLowerCase(ch);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   922
                    while ((lch >= '0') && (lch <= '9') ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   923
                            (lch >= 'a') && (lch <= 'f')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   924
                        if (lch >= '0' && lch <= '9') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   925
                            n = (n * 16) + lch - '0';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   926
                        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   927
                            n = (n * 16) + lch - 'a' + 10;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   928
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   929
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   930
                        lch = (char) Character.toLowerCase(ch);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   931
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   932
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   933
                switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   934
                    case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   935
                        ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   936
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   937
                        lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   938
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   939
90ce3da70b43 Initial load
duke
parents:
diff changeset
   940
                    case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   941
                        ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   942
                        if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   943
                            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   944
                            crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   945
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   946
                        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   947
                            crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   948
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   949
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   950
90ce3da70b43 Initial load
duke
parents:
diff changeset
   951
                    case ';':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   952
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   953
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   954
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   955
                char data[] = {mapNumericReference((char) n)};
90ce3da70b43 Initial load
duke
parents:
diff changeset
   956
                return data;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   957
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   958
            addString('#');
90ce3da70b43 Initial load
duke
parents:
diff changeset
   959
            if (!parseIdentifier(false)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   960
                error("ident.expected");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   961
                strpos = pos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   962
                char data[] = {'&', '#'};
90ce3da70b43 Initial load
duke
parents:
diff changeset
   963
                return data;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   964
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   965
        } else if (!parseIdentifier(false)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   966
            char data[] = {'&'};
90ce3da70b43 Initial load
duke
parents:
diff changeset
   967
            return data;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   968
        }
9213
856f16c729a5 7003777: Nonexistent html entities not parsed properly.
rupashka
parents: 7668
diff changeset
   969
856f16c729a5 7003777: Nonexistent html entities not parsed properly.
rupashka
parents: 7668
diff changeset
   970
        boolean semicolon = false;
856f16c729a5 7003777: Nonexistent html entities not parsed properly.
rupashka
parents: 7668
diff changeset
   971
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   972
        switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   973
          case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   974
            ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   975
            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   976
            lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   977
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   978
90ce3da70b43 Initial load
duke
parents:
diff changeset
   979
          case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   980
            ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   981
            if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   982
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   983
                crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   984
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   985
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   986
                crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   987
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   988
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   989
90ce3da70b43 Initial load
duke
parents:
diff changeset
   990
          case ';':
9213
856f16c729a5 7003777: Nonexistent html entities not parsed properly.
rupashka
parents: 7668
diff changeset
   991
            semicolon = true;
856f16c729a5 7003777: Nonexistent html entities not parsed properly.
rupashka
parents: 7668
diff changeset
   992
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   993
            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   994
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   995
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   996
90ce3da70b43 Initial load
duke
parents:
diff changeset
   997
        String nm = getString(pos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   998
        Entity ent = dtd.getEntity(nm);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   999
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1000
        // entities are case sensitive - however if strict
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1001
        // is false then we will try to make a match by
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1002
        // converting the string to all lowercase.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1003
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1004
        if (!strict && (ent == null)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1005
            ent = dtd.getEntity(nm.toLowerCase());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1006
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1007
        if ((ent == null) || !ent.isGeneral()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1008
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1009
            if (nm.length() == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1010
                error("invalid.entref", nm);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1011
                return new char[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1012
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1013
            /* given that there is not a match restore the entity reference */
9213
856f16c729a5 7003777: Nonexistent html entities not parsed properly.
rupashka
parents: 7668
diff changeset
  1014
            String str = "&" + nm + (semicolon ? ";" : "");
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1015
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1016
            char b[] = new char[str.length()];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1017
            str.getChars(0, b.length, b, 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1018
            return b;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1019
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1020
        return ent.getData();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1021
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1022
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1023
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1024
     * Converts numeric character reference to Unicode character.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1025
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1026
     * Normally the code in a reference should be always converted
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1027
     * to the Unicode character with the same code, but due to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1028
     * wide usage of Cp1252 charset most browsers map numeric references
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1029
     * in the range 130-159 (which are control chars in Unicode set)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1030
     * to displayable characters with other codes.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1031
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1032
     * @param c the code of numeric character reference.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1033
     * @return the character corresponding to the reference code.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1034
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1035
    private char mapNumericReference(char c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1036
        if (c < 130 || c > 159) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1037
            return c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1038
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1039
        return cp1252Map[c - 130];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1040
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1041
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1042
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1043
     * Parse a comment. [92] 391:7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1044
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1045
    void parseComment() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1046
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1047
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1048
            int c = ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1049
            switch (c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1050
              case '-':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1051
                  /** Presuming that the start string of a comment "<!--" has
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1052
                      already been parsed, the '-' character is valid only as
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1053
                      part of a comment termination and further more it must
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1054
                      be present in even numbers. Hence if strict is true, we
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1055
                      presume the comment has been terminated and return.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1056
                      However if strict is false, then there is no even number
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1057
                      requirement and this character can appear anywhere in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1058
                      comment.  The parser reads on until it sees the following
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1059
                      pattern: "-->" or "--!>".
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1060
                   **/
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1061
                if (!strict && (strpos != 0) && (str[strpos - 1] == '-')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1062
                    if ((ch = readCh()) == '>') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1063
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1064
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1065
                    if (ch == '!') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1066
                        if ((ch = readCh()) == '>') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1067
                            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1068
                        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1069
                            /* to account for extra read()'s that happened */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1070
                            addString('-');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1071
                            addString('!');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1072
                            continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1073
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1074
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1075
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1076
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1077
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1078
                if ((ch = readCh()) == '-') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1079
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1080
                    if (strict || ch == '>') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1081
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1082
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1083
                    if (ch == '!') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1084
                        if ((ch = readCh()) == '>') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1085
                            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1086
                        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1087
                            /* to account for extra read()'s that happened */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1088
                            addString('-');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1089
                            addString('!');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1090
                            continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1091
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1092
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1093
                    /* to account for the extra read() */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1094
                    addString('-');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1095
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1096
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1097
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1098
              case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1099
                  handleEOFInComment();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1100
                  return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1101
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1102
              case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1103
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1104
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1105
                lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1106
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1107
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1108
              case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1109
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1110
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1111
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1112
              case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1113
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1114
                if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1115
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1116
                    crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1117
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1118
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1119
                    crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1120
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1121
                c = '\n';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1122
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1123
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1124
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1125
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1126
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1127
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1128
            addString(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1129
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1130
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1131
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1132
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1133
     * Parse literal content. [46] 343:1 and [47] 344:1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1134
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1135
    void parseLiteral(boolean replace) throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1136
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1137
            int c = ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1138
            switch (c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1139
              case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1140
                error("eof.literal", stack.elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1141
                endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1142
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1143
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1144
              case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1145
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1146
                int i = textpos - (stack.elem.name.length() + 2), j = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1147
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1148
                // match end tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1149
                if ((i >= 0) && (text[i++] == '<') && (text[i] == '/')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1150
                    while ((++i < textpos) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1151
                           (Character.toLowerCase(text[i]) == stack.elem.name.charAt(j++)));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1152
                    if (i == textpos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1153
                        textpos -= (stack.elem.name.length() + 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1154
                        if ((textpos > 0) && (text[textpos-1] == '\n')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1155
                            textpos--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1156
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1157
                        endTag(false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1158
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1159
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1160
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1161
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1162
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1163
              case '&':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1164
                char data[] = parseEntityReference();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1165
                if (textpos + data.length > text.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1166
                    char newtext[] = new char[Math.max(textpos + data.length + 128, text.length * 2)];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1167
                    System.arraycopy(text, 0, newtext, 0, text.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1168
                    text = newtext;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1169
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1170
                System.arraycopy(data, 0, text, textpos, data.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1171
                textpos += data.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1172
                continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1173
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1174
              case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1175
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1176
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1177
                lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1178
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1179
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1180
              case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1181
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1182
                if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1183
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1184
                    crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1185
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1186
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1187
                    crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1188
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1189
                c = '\n';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1190
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1191
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1192
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1193
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1194
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1195
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1196
            // output character
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1197
            if (textpos == text.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1198
                char newtext[] = new char[text.length + 128];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1199
                System.arraycopy(text, 0, newtext, 0, text.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1200
                text = newtext;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1201
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1202
            text[textpos++] = (char)c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1203
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1204
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1205
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1206
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1207
     * Parse attribute value. [33] 331:1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1208
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1209
    String parseAttributeValue(boolean lower) throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1210
        int delim = -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1211
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1212
        // Check for a delimiter
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1213
        switch(ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1214
          case '\'':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1215
          case '"':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1216
            delim = ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1217
            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1218
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1219
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1220
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1221
        // Parse the rest of the value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1222
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1223
            int c = ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1224
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1225
            switch (c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1226
              case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1227
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1228
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1229
                lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1230
                if (delim < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1231
                    return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1232
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1233
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1234
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1235
              case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1236
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1237
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1238
                if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1239
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1240
                    crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1241
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1242
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1243
                    crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1244
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1245
                if (delim < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1246
                    return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1247
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1248
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1249
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1250
              case '\t':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1251
                  if (delim < 0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1252
                      c = ' ';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1253
              case ' ':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1254
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1255
                if (delim < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1256
                    return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1257
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1258
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1259
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1260
              case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1261
              case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1262
                if (delim < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1263
                    return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1264
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1265
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1266
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1267
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1268
              case '\'':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1269
              case '"':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1270
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1271
                if (c == delim) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1272
                    return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1273
                } else if (delim == -1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1274
                    error("attvalerr");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1275
                    if (strict || ch == ' ') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1276
                        return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1277
                    } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1278
                        continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1279
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1280
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1281
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1282
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1283
            case '=':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1284
                if (delim < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1285
                    /* In SGML a construct like <img src=/cgi-bin/foo?x=1>
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1286
                       is considered invalid since an = sign can only be contained
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1287
                       in an attributes value if the string is quoted.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1288
                       */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1289
                    error("attvalerr");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1290
                    /* If strict is true then we return with the string we have thus far.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1291
                       Otherwise we accept the = sign as part of the attribute's value and
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1292
                       process the rest of the img tag. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1293
                    if (strict) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1294
                        return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1295
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1296
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1297
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1298
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1299
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1300
              case '&':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1301
                if (strict && delim < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1302
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1303
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1304
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1305
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1306
                char data[] = parseEntityReference();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1307
                for (int i = 0 ; i < data.length ; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1308
                    c = data[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1309
                    addString((lower && (c >= 'A') && (c <= 'Z')) ? 'a' + c - 'A' : c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1310
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1311
                continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1312
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1313
              case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1314
                return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1315
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1316
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1317
                if (lower && (c >= 'A') && (c <= 'Z')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1318
                    c = 'a' + c - 'A';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1319
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1320
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1321
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1322
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1323
            addString(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1324
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1325
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1326
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1327
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1328
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1329
     * Parse attribute specification List. [31] 327:17
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1330
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1331
    void parseAttributeSpecificationList(Element elem) throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1332
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1333
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1334
            skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1335
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1336
            switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1337
              case '/':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1338
              case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1339
              case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1340
              case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1341
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1342
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1343
              case '-':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1344
                if ((ch = readCh()) == '-') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1345
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1346
                    parseComment();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1347
                    strpos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1348
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1349
                    error("invalid.tagchar", "-", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1350
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1351
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1352
                continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1353
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1354
1287
a04aca99c77a 6722802: Code improvement and warnings removing from the javax.swing.text package
rupashka
parents: 438
diff changeset
  1355
            AttributeList att;
a04aca99c77a 6722802: Code improvement and warnings removing from the javax.swing.text package
rupashka
parents: 438
diff changeset
  1356
            String attname;
a04aca99c77a 6722802: Code improvement and warnings removing from the javax.swing.text package
rupashka
parents: 438
diff changeset
  1357
            String attvalue;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1358
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1359
            if (parseIdentifier(true)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1360
                attname = getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1361
                skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1362
                if (ch == '=') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1363
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1364
                    skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1365
                    att = elem.getAttribute(attname);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1366
//  Bug ID 4102750
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1367
//  Load the NAME of an Attribute Case Sensitive
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1368
//  The case of the NAME  must be intact
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1369
//  MG 021898
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1370
                    attvalue = parseAttributeValue((att != null) && (att.type != CDATA) && (att.type != NOTATION) && (att.type != NAME));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1371
//                  attvalue = parseAttributeValue((att != null) && (att.type != CDATA) && (att.type != NOTATION));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1372
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1373
                    attvalue = attname;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1374
                    att = elem.getAttributeByValue(attvalue);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1375
                    if (att == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1376
                        att = elem.getAttribute(attname);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1377
                        if (att != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1378
                            attvalue = att.getValue();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1379
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1380
                        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1381
                            // Make it null so that NULL_ATTRIBUTE_VALUE is
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1382
                            // used
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1383
                            attvalue = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1384
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1385
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1386
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1387
            } else if (!strict && ch == ',') { // allows for comma separated attribute-value pairs
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1388
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1389
                continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1390
            } else if (!strict && ch == '"') { // allows for quoted attributes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1391
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1392
                skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1393
                if (parseIdentifier(true)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1394
                    attname = getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1395
                    if (ch == '"') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1396
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1397
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1398
                    skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1399
                    if (ch == '=') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1400
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1401
                        skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1402
                        att = elem.getAttribute(attname);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1403
                        attvalue = parseAttributeValue((att != null) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1404
                                                (att.type != CDATA) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1405
                                                (att.type != NOTATION));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1406
                    } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1407
                        attvalue = attname;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1408
                        att = elem.getAttributeByValue(attvalue);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1409
                        if (att == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1410
                            att = elem.getAttribute(attname);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1411
                            if (att != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1412
                                attvalue = att.getValue();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1413
                            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1414
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1415
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1416
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1417
                    char str[] = {(char)ch};
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1418
                    error("invalid.tagchar", new String(str), elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1419
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1420
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1421
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1422
            } else if (!strict && (attributes.isEmpty()) && (ch == '=')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1423
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1424
                skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1425
                attname = elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1426
                att = elem.getAttribute(attname);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1427
                attvalue = parseAttributeValue((att != null) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1428
                                               (att.type != CDATA) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1429
                                               (att.type != NOTATION));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1430
            } else if (!strict && (ch == '=')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1431
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1432
                skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1433
                attvalue = parseAttributeValue(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1434
                error("attvalerr");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1435
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1436
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1437
                char str[] = {(char)ch};
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1438
                error("invalid.tagchar", new String(str), elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1439
                if (!strict) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1440
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1441
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1442
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1443
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1444
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1445
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1446
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1447
            if (att != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1448
                attname = att.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1449
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1450
                error("invalid.tagatt", attname, elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1451
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1452
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1453
            // Check out the value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1454
            if (attributes.isDefined(attname)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1455
                error("multi.tagatt", attname, elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1456
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1457
            if (attvalue == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1458
                attvalue = ((att != null) && (att.value != null)) ? att.value :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1459
                    HTML.NULL_ATTRIBUTE_VALUE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1460
            } else if ((att != null) && (att.values != null) && !att.values.contains(attvalue)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1461
                error("invalid.tagattval", attname, elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1462
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1463
            HTML.Attribute attkey = HTML.getAttributeKey(attname);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1464
            if (attkey == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1465
                attributes.addAttribute(attname, attvalue);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1466
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1467
                attributes.addAttribute(attkey, attvalue);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1468
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1469
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1470
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1471
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1472
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1473
     * Parses th Document Declaration Type markup declaration.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1474
     * Currently ignores it.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1475
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1476
    public String parseDTDMarkup() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1477
7014
eb4fcf73ee99 6432566: Replace usage of StringBuffer with StringBuilder in Swing
rupashka
parents: 5506
diff changeset
  1478
        StringBuilder strBuff = new StringBuilder();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1479
        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1480
        while(true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1481
            switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1482
            case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1483
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1484
                return strBuff.toString();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1485
            case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1486
                error("invalid.markup");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1487
                return strBuff.toString();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1488
            case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1489
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1490
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1491
                lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1492
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1493
            case '"':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1494
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1495
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1496
            case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1497
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1498
                if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1499
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1500
                    crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1501
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1502
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1503
                    crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1504
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1505
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1506
            default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1507
                strBuff.append((char)(ch & 0xFF));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1508
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1509
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1510
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1511
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1512
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1513
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1514
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1515
     * Parse markup declarations.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1516
     * Currently only handles the Document Type Declaration markup.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1517
     * Returns true if it is a markup declaration false otherwise.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1518
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1519
    protected boolean parseMarkupDeclarations(StringBuffer strBuff) throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1520
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1521
        /* Currently handles only the DOCTYPE */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1522
        if ((strBuff.length() == "DOCTYPE".length()) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1523
            (strBuff.toString().toUpperCase().equals("DOCTYPE"))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1524
            parseDTDMarkup();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1525
            return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1526
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1527
        return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1528
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1529
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1530
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1531
     * Parse an invalid tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1532
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1533
    void parseInvalidTag() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1534
        // ignore all data upto the close bracket '>'
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1535
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1536
            skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1537
            switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1538
              case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1539
              case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1540
                  ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1541
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1542
              case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1543
                  return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1544
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1545
                  ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1546
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1547
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1548
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1549
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1550
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1551
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1552
     * Parse a start or end tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1553
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1554
    void parseTag() throws IOException {
1287
a04aca99c77a 6722802: Code improvement and warnings removing from the javax.swing.text package
rupashka
parents: 438
diff changeset
  1555
        Element elem;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1556
        boolean net = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1557
        boolean warned = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1558
        boolean unknown = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1559
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1560
        switch (ch = readCh()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1561
          case '!':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1562
            switch (ch = readCh()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1563
              case '-':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1564
                // Parse comment. [92] 391:7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1565
                while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1566
                    if (ch == '-') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1567
                        if (!strict || ((ch = readCh()) == '-')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1568
                            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1569
                            if (!strict && ch == '-') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1570
                                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1571
                            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1572
                            // send over any text you might see
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1573
                            // before parsing and sending the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1574
                            // comment
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1575
                            if (textpos != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1576
                                char newtext[] = new char[textpos];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1577
                                System.arraycopy(text, 0, newtext, 0, textpos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1578
                                handleText(newtext);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1579
                                lastBlockStartPos = currentBlockStartPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1580
                                textpos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1581
                            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1582
                            parseComment();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1583
                            last = makeTag(dtd.getElement("comment"), true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1584
                            handleComment(getChars(0));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1585
                            continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1586
                        } else if (!warned) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1587
                            warned = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1588
                            error("invalid.commentchar", "-");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1589
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1590
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1591
                    skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1592
                    switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1593
                      case '-':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1594
                        continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1595
                      case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1596
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1597
                      case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1598
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1599
                      default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1600
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1601
                        if (!warned) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1602
                            warned = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1603
                            error("invalid.commentchar",
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1604
                                  String.valueOf((char)ch));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1605
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1606
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1607
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1608
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1609
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1610
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1611
                // deal with marked sections
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1612
                StringBuffer strBuff = new StringBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1613
                while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1614
                    strBuff.append((char)ch);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1615
                    if (parseMarkupDeclarations(strBuff)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1616
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1617
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1618
                    switch(ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1619
                      case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1620
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1621
                      case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1622
                        error("invalid.markup");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1623
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1624
                      case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1625
                        ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1626
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1627
                        lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1628
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1629
                      case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1630
                        ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1631
                        if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1632
                            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1633
                            crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1634
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1635
                        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1636
                            crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1637
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1638
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1639
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1640
                      default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1641
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1642
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1643
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1644
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1645
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1646
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1647
          case '/':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1648
            // parse end tag [19] 317:4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1649
            switch (ch = readCh()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1650
              case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1651
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1652
              case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1653
                // empty end tag. either </> or </<
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1654
                if (recent == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1655
                    error("invalid.shortend");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1656
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1657
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1658
                elem = recent;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1659
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1660
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1661
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1662
                if (!parseIdentifier(true)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1663
                    error("expected.endtagname");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1664
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1665
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1666
                skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1667
                switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1668
                  case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1669
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1670
                  case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1671
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1672
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1673
                  default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1674
                    error("expected", "'>'");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1675
                    while ((ch != -1) && (ch != '\n') && (ch != '>')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1676
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1677
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1678
                    if (ch == '>') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1679
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1680
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1681
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1682
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1683
                String elemStr = getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1684
                if (!dtd.elementExists(elemStr)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1685
                    error("end.unrecognized", elemStr);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1686
                    // Ignore RE before end tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1687
                    if ((textpos > 0) && (text[textpos-1] == '\n')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1688
                        textpos--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1689
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1690
                    elem = dtd.getElement("unknown");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1691
                    elem.name = elemStr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1692
                    unknown = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1693
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1694
                    elem = dtd.getElement(elemStr);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1695
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1696
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1697
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1698
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1699
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1700
            // If the stack is null, we're seeing end tags without any begin
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1701
            // tags.  Ignore them.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1702
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1703
            if (stack == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1704
                error("end.extra.tag", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1705
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1706
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1707
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1708
            // Ignore RE before end tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1709
            if ((textpos > 0) && (text[textpos-1] == '\n')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1710
                // In a pre tag, if there are blank lines
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1711
                // we do not want to remove the newline
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1712
                // before the end tag.  Hence this code.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1713
                //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1714
                if (stack.pre) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1715
                    if ((textpos > 1) && (text[textpos-2] != '\n')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1716
                        textpos--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1717
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1718
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1719
                    textpos--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1720
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1721
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1722
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1723
            // If the end tag is a form, since we did not put it
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1724
            // on the tag stack, there is no corresponding start
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1725
            // start tag to find. Hence do not touch the tag stack.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1726
            //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1727
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1728
            /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1729
            if (!strict && elem.getName().equals("form")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1730
                if (lastFormSent != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1731
                    handleEndTag(lastFormSent);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1732
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1733
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1734
                    // do nothing.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1735
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1736
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1737
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1738
            */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1739
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1740
            if (unknown) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1741
                // we will not see a corresponding start tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1742
                // on the the stack.  If we are seeing an
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1743
                // end tag, lets send this on as an empty
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1744
                // tag with the end tag attribute set to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1745
                // true.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1746
                TagElement t = makeTag(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1747
                handleText(t);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1748
                attributes.addAttribute(HTML.Attribute.ENDTAG, "true");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1749
                handleEmptyTag(makeTag(elem));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1750
                unknown = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1751
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1752
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1753
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1754
            // find the corresponding start tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1755
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1756
            // A commonly occuring error appears to be the insertion
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1757
            // of extra end tags in a table.  The intent here is ignore
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1758
            // such extra end tags.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1759
            //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1760
            if (!strict) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1761
                String stackElem = stack.elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1762
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1763
                if (stackElem.equals("table")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1764
                    // If it isnt a valid end tag ignore it and return
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1765
                    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1766
                    if (!elem.getName().equals(stackElem)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1767
                        error("tag.ignore", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1768
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1769
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1770
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1771
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1772
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1773
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1774
                if (stackElem.equals("tr") ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1775
                    stackElem.equals("td")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1776
                    if ((!elem.getName().equals("table")) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1777
                        (!elem.getName().equals(stackElem))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1778
                        error("tag.ignore", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1779
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1780
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1781
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1782
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1783
            TagStack sp = stack;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1784
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1785
            while ((sp != null) && (elem != sp.elem)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1786
                sp = sp.next;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1787
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1788
            if (sp == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1789
                error("unmatched.endtag", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1790
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1791
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1792
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1793
            // People put font ending tags in the darndest places.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1794
            // Don't close other contexts based on them being between
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1795
            // a font tag and the corresponding end tag.  Instead,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1796
            // ignore the end tag like it doesn't exist and allow the end
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1797
            // of the document to close us out.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1798
            String elemName = elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1799
            if (stack != sp &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1800
                (elemName.equals("font") ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1801
                 elemName.equals("center"))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1802
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1803
                // Since closing out a center tag can have real wierd
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1804
                // effects on the formatting,  make sure that tags
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1805
                // for which omitting an end tag is legimitate
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1806
                // get closed out.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1807
                //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1808
                if (elemName.equals("center")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1809
                    while(stack.elem.omitEnd() && stack != sp) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1810
                        endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1811
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1812
                    if (stack.elem == elem) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1813
                        endTag(false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1814
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1815
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1816
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1817
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1818
            // People do the same thing with center tags.  In this
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1819
            // case we would like to close off the center tag but
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1820
            // not necessarily all enclosing tags.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1821
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1822
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1823
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1824
            // end tags
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1825
            while (stack != sp) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1826
                endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1827
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1828
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1829
            endTag(false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1830
            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1831
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1832
          case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1833
            error("eof");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1834
            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1835
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1836
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1837
        // start tag [14] 314:1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1838
        if (!parseIdentifier(true)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1839
            elem = recent;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1840
            if ((ch != '>') || (elem == null)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1841
                error("expected.tagname");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1842
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1843
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1844
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1845
            String elemStr = getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1846
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1847
            if (elemStr.equals("image")) {
438
2ae294e4518c 6613529: Avoid duplicate object creation within JDK packages
dav
parents: 2
diff changeset
  1848
                elemStr = "img";
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1849
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1850
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1851
            /* determine if this element is part of the dtd. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1852
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1853
            if (!dtd.elementExists(elemStr)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1854
                //              parseInvalidTag();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1855
                error("tag.unrecognized ", elemStr);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1856
                elem = dtd.getElement("unknown");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1857
                elem.name = elemStr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1858
                unknown = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1859
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1860
                elem = dtd.getElement(elemStr);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1861
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1862
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1863
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1864
        // Parse attributes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1865
        parseAttributeSpecificationList(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1866
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1867
        switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1868
          case '/':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1869
            net = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1870
          case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1871
            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1872
            if (ch == '>' && net) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1873
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1874
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1875
          case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1876
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1877
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1878
          default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1879
            error("expected", "'>'");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1880
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1881
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1882
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1883
        if (!strict) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1884
          if (elem.getName().equals("script")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1885
            error("javascript.unsupported");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1886
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1887
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1888
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1889
        // ignore RE after start tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1890
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1891
        if (!elem.isEmpty())  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1892
            if (ch == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1893
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1894
                lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1895
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1896
            } else if (ch == '\r') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1897
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1898
                if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1899
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1900
                    crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1901
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1902
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1903
                    crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1904
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1905
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1906
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1907
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1908
        // ensure a legal context for the tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1909
        TagElement tag = makeTag(elem, false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1910
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1911
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1912
        /** In dealing with forms, we have decided to treat
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1913
            them as legal in any context.  Also, even though
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1914
            they do have a start and an end tag, we will
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1915
            not put this tag on the stack.  This is to deal
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1916
            several pages in the web oasis that choose to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1917
            start and end forms in any possible location. **/
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1918
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1919
        /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1920
        if (!strict && elem.getName().equals("form")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1921
            if (lastFormSent == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1922
                lastFormSent = tag;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1923
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1924
                handleEndTag(lastFormSent);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1925
                lastFormSent = tag;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1926
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1927
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1928
        */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1929
            // Smlly, if a tag is unknown, we will apply
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1930
            // no legalTagContext logic to it.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1931
            //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1932
            if (!unknown) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1933
                legalTagContext(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1934
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1935
                // If skip tag is true,  this implies that
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1936
                // the tag was illegal and that the error
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1937
                // recovery strategy adopted is to ignore
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1938
                // the tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1939
                if (!strict && skipTag) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1940
                    skipTag = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1941
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1942
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1943
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1944
            /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1945
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1946
            */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1947
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1948
        startTag(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1949
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1950
        if (!elem.isEmpty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1951
            switch (elem.getType()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1952
              case CDATA:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1953
                parseLiteral(false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1954
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1955
              case RCDATA:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1956
                parseLiteral(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1957
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1958
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1959
                if (stack != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1960
                    stack.net = net;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1961
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1962
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1963
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1964
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1965
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1966
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1967
    private static final String START_COMMENT = "<!--";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1968
    private static final String END_COMMENT = "-->";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1969
    private static final char[] SCRIPT_END_TAG = "</script>".toCharArray();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1970
    private static final char[] SCRIPT_END_TAG_UPPER_CASE =
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1971
                                        "</SCRIPT>".toCharArray();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1972
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1973
    void parseScript() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1974
        char[] charsToAdd = new char[SCRIPT_END_TAG.length];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1975
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1976
        /* Here, ch should be the first character after <script> */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1977
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1978
            int i = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1979
            while (i < SCRIPT_END_TAG.length
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1980
                       && (SCRIPT_END_TAG[i] == ch
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1981
                           || SCRIPT_END_TAG_UPPER_CASE[i] == ch)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1982
                charsToAdd[i] = (char) ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1983
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1984
                i++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1985
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1986
            if (i == SCRIPT_END_TAG.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1987
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1988
                /*  '</script>' tag detected */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1989
                /* Here, ch == '>' */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1990
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1991
                /* Here, ch == the first character after </script> */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1992
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1993
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1994
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1995
                /* To account for extra read()'s that happened */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1996
                for (int j = 0; j < i; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1997
                    addString(charsToAdd[j]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1998
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1999
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2000
                switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2001
                case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2002
                    error("eof.script");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2003
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2004
                case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2005
                    ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2006
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2007
                    lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2008
                    addString('\n');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2009
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2010
                case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2011
                    ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2012
                    if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2013
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2014
                        crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2015
                    } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2016
                        crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2017
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2018
                    addString('\n');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2019
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2020
                default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2021
                    addString(ch);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2022
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2023
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2024
                } // switch
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2025
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2026
        } // while
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2027
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2028
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2029
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2030
     * Parse Content. [24] 320:1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2031
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2032
    void parseContent() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2033
        Thread curThread = Thread.currentThread();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2034
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2035
        for (;;) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2036
            if (curThread.isInterrupted()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2037
                curThread.interrupt(); // resignal the interrupt
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2038
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2039
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2040
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2041
            int c = ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2042
            currentBlockStartPos = currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2043
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2044
            if (recent == dtd.script) { // means: if after starting <script> tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2045
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2046
                /* Here, ch has to be the first character after <script> */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2047
                parseScript();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2048
                last = makeTag(dtd.getElement("comment"), true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2049
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2050
                /* Remove leading and trailing HTML comment declarations */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2051
                String str = new String(getChars(0)).trim();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2052
                int minLength = START_COMMENT.length() + END_COMMENT.length();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2053
                if (str.startsWith(START_COMMENT) && str.endsWith(END_COMMENT)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2054
                       && str.length() >= (minLength)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2055
                    str = str.substring(START_COMMENT.length(),
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2056
                                      str.length() - END_COMMENT.length());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2057
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2058
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2059
                /* Handle resulting chars as comment */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2060
                handleComment(str.toCharArray());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2061
                endTag(false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2062
                lastBlockStartPos = currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2063
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2064
                switch (c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2065
                  case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2066
                    parseTag();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2067
                    lastBlockStartPos = currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2068
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2069
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2070
                  case '/':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2071
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2072
                    if ((stack != null) && stack.net) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2073
                        // null end tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2074
                        endTag(false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2075
                        continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2076
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2077
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2078
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2079
                  case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2080
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2081
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2082
                  case '&':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2083
                    if (textpos == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2084
                        if (!legalElementContext(dtd.pcdata)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2085
                            error("unexpected.pcdata");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2086
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2087
                        if (last.breaksFlow()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2088
                            space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2089
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2090
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2091
                    char data[] = parseEntityReference();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2092
                    if (textpos + data.length + 1 > text.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2093
                        char newtext[] = new char[Math.max(textpos + data.length + 128, text.length * 2)];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2094
                        System.arraycopy(text, 0, newtext, 0, text.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2095
                        text = newtext;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2096
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2097
                    if (space) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2098
                        space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2099
                        text[textpos++] = ' ';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2100
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2101
                    System.arraycopy(data, 0, text, textpos, data.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2102
                    textpos += data.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2103
                    ignoreSpace = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2104
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2105
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2106
                  case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2107
                    ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2108
                    lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2109
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2110
                    if ((stack != null) && stack.pre) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2111
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2112
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2113
                    if (textpos == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2114
                        lastBlockStartPos = currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2115
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2116
                    if (!ignoreSpace) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2117
                        space = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2118
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2119
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2120
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2121
                  case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2122
                    ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2123
                    c = '\n';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2124
                    if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2125
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2126
                        crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2127
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2128
                    else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2129
                        crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2130
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2131
                    if ((stack != null) && stack.pre) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2132
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2133
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2134
                    if (textpos == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2135
                        lastBlockStartPos = currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2136
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2137
                    if (!ignoreSpace) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2138
                        space = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2139
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2140
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2141
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2142
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2143
                  case '\t':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2144
                  case ' ':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2145
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2146
                    if ((stack != null) && stack.pre) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2147
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2148
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2149
                    if (textpos == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2150
                        lastBlockStartPos = currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2151
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2152
                    if (!ignoreSpace) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2153
                        space = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2154
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2155
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2156
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2157
                  default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2158
                    if (textpos == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2159
                        if (!legalElementContext(dtd.pcdata)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2160
                            error("unexpected.pcdata");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2161
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2162
                        if (last.breaksFlow()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2163
                            space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2164
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2165
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2166
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2167
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2168
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2169
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2170
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2171
            // enlarge buffer if needed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2172
            if (textpos + 2 > text.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2173
                char newtext[] = new char[text.length + 128];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2174
                System.arraycopy(text, 0, newtext, 0, text.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2175
                text = newtext;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2176
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2177
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2178
            // output pending space
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2179
            if (space) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2180
                if (textpos == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2181
                    lastBlockStartPos--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2182
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2183
                text[textpos++] = ' ';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2184
                space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2185
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2186
            text[textpos++] = (char)c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2187
            ignoreSpace = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2188
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2189
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2190
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2191
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2192
     * Returns the end of line string. This will return the end of line
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2193
     * string that has been encountered the most, one of \r, \n or \r\n.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2194
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2195
    String getEndOfLineString() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2196
        if (crlfCount >= crCount) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2197
            if (lfCount >= crlfCount) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2198
                return "\n";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2199
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2200
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2201
                return "\r\n";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2202
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2203
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2204
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2205
            if (crCount > lfCount) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2206
                return "\r";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2207
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2208
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2209
                return "\n";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2210
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2211
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2212
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2213
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2214
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2215
     * Parse an HTML stream, given a DTD.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2216
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2217
    public synchronized void parse(Reader in) throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2218
        this.in = in;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2219
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2220
        this.ln = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2221
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2222
        seenHtml = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2223
        seenHead = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2224
        seenBody = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2225
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2226
        crCount = lfCount = crlfCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2227
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2228
        try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2229
            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2230
            text = new char[1024];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2231
            str = new char[128];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2232
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2233
            parseContent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2234
            // NOTE: interruption may have occurred.  Control flows out
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2235
            // of here normally.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2236
            while (stack != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2237
                endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2238
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2239
            in.close();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2240
        } catch (IOException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2241
            errorContext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2242
            error("ioexception");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2243
            throw e;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2244
        } catch (Exception e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2245
            errorContext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2246
            error("exception", e.getClass().getName(), e.getMessage());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2247
            e.printStackTrace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2248
        } catch (ThreadDeath e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2249
            errorContext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2250
            error("terminated");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2251
            e.printStackTrace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2252
            throw e;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2253
        } finally {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2254
            for (; stack != null ; stack = stack.next) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2255
                handleEndTag(stack.tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2256
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2257
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2258
            text = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2259
            str = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2260
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2261
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2262
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2263
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2264
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2265
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2266
     * Input cache.  This is much faster than calling down to a synchronized
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2267
     * method of BufferedReader for each byte.  Measurements done 5/30/97
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2268
     * show that there's no point in having a bigger buffer:  Increasing
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2269
     * the buffer to 8192 had no measurable impact for a program discarding
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2270
     * one character at a time (reading from an http URL to a local machine).
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2271
     * NOTE: If the current encoding is bogus, and we read too much
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2272
     * (past the content-type) we may suffer a MalformedInputException. For
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2273
     * this reason the initial size is 1 and when the body is encountered the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2274
     * size is adjusted to 256.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2275
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2276
    private char buf[] = new char[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2277
    private int pos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2278
    private int len;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2279
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2280
        tracks position relative to the beginning of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2281
        document.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2282
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2283
    private int currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2284
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2285
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2286
    private final int readCh() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2287
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2288
        if (pos >= len) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2289
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2290
            // This loop allows us to ignore interrupts if the flag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2291
            // says so
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2292
            for (;;) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2293
                try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2294
                    len = in.read(buf);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2295
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2296
                } catch (InterruptedIOException ex) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2297
                    throw ex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2298
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2299
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2300
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2301
            if (len <= 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2302
                return -1;      // eof
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2303
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2304
            pos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2305
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2306
        ++currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2307
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2308
        return buf[pos++];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2309
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2310
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2311
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2312
    protected int getCurrentPos() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2313
        return currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2314
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2315
}