jdk/src/share/classes/javax/swing/text/html/parser/Parser.java
author malenkov
Tue, 29 Oct 2013 17:01:06 +0400
changeset 21278 ef8a3a2a72f2
parent 20169 d7fa6d7586c9
child 23262 41f2413bba45
permissions -rw-r--r--
8022746: List of spelling errors in API doc Reviewed-by: alexsch, smarks
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     1
/*
17678
ec24ad8455ec 7011777: JDK 6 parses html text with script tags within comments differently from previous releases
mcherkas
parents: 14309
diff changeset
     2
 * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load
duke
parents:
diff changeset
     4
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load
duke
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 1299
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 1299
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    10
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load
duke
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load
duke
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
90ce3da70b43 Initial load
duke
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load
duke
parents:
diff changeset
    15
 * accompanied this code).
90ce3da70b43 Initial load
duke
parents:
diff changeset
    16
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load
duke
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    20
 *
5506
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 1299
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 1299
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
202f599c92aa 6943119: Rebrand source copyright notices
ohair
parents: 1299
diff changeset
    23
 * questions.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    24
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    25
90ce3da70b43 Initial load
duke
parents:
diff changeset
    26
package javax.swing.text.html.parser;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    27
90ce3da70b43 Initial load
duke
parents:
diff changeset
    28
import javax.swing.text.SimpleAttributeSet;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    29
import javax.swing.text.html.HTML;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    30
import javax.swing.text.ChangedCharSetException;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    31
import java.io.*;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    32
import java.util.Hashtable;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    33
import java.util.Properties;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    34
import java.util.Vector;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    35
import java.util.Enumeration;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    36
import java.net.URL;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    37
90ce3da70b43 Initial load
duke
parents:
diff changeset
    38
import sun.misc.MessageUtils;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    39
90ce3da70b43 Initial load
duke
parents:
diff changeset
    40
/**
90ce3da70b43 Initial load
duke
parents:
diff changeset
    41
 * A simple DTD-driven HTML parser. The parser reads an
90ce3da70b43 Initial load
duke
parents:
diff changeset
    42
 * HTML file from an InputStream and calls various methods
90ce3da70b43 Initial load
duke
parents:
diff changeset
    43
 * (which should be overridden in a subclass) when tags and
90ce3da70b43 Initial load
duke
parents:
diff changeset
    44
 * data are encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    45
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    46
 * Unfortunately there are many badly implemented HTML parsers
90ce3da70b43 Initial load
duke
parents:
diff changeset
    47
 * out there, and as a result there are many badly formatted
90ce3da70b43 Initial load
duke
parents:
diff changeset
    48
 * HTML files. This parser attempts to parse most HTML files.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    49
 * This means that the implementation sometimes deviates from
90ce3da70b43 Initial load
duke
parents:
diff changeset
    50
 * the SGML specification in favor of HTML.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    51
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    52
 * The parser treats \r and \r\n as \n. Newlines after starttags
90ce3da70b43 Initial load
duke
parents:
diff changeset
    53
 * and before end tags are ignored just as specified in the SGML/HTML
90ce3da70b43 Initial load
duke
parents:
diff changeset
    54
 * specification.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    55
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    56
 * The html spec does not specify how spaces are to be coalesced very well.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    57
 * Specifically, the following scenarios are not discussed (note that a
90ce3da70b43 Initial load
duke
parents:
diff changeset
    58
 * space should be used here, but I am using &amp;nbsp to force the space to
90ce3da70b43 Initial load
duke
parents:
diff changeset
    59
 * be displayed):
90ce3da70b43 Initial load
duke
parents:
diff changeset
    60
 * <p>
20169
d7fa6d7586c9 8025085: [javadoc] some errors in javax/swing
yan
parents: 17678
diff changeset
    61
 * '&lt;b&gt;blah&nbsp;&lt;i&gt;&nbsp;&lt;strike&gt;&nbsp;foo' which can be treated as:
d7fa6d7586c9 8025085: [javadoc] some errors in javax/swing
yan
parents: 17678
diff changeset
    62
 * '&lt;b&gt;blah&nbsp;&lt;i&gt;&lt;strike&gt;foo'
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    63
 * <p>as well as:
20169
d7fa6d7586c9 8025085: [javadoc] some errors in javax/swing
yan
parents: 17678
diff changeset
    64
 * '&lt;p&gt;&lt;a href="xx"&gt;&nbsp;&lt;em&gt;Using&lt;/em&gt;&lt;/a&gt;&lt;/p&gt;'
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    65
 * which appears to be treated as:
20169
d7fa6d7586c9 8025085: [javadoc] some errors in javax/swing
yan
parents: 17678
diff changeset
    66
 * '&lt;p&gt;&lt;a href="xx"&gt;&lt;em&gt;Using&lt;/em&gt;&lt;/a&gt;&lt;/p&gt;'
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
    67
 * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
    68
 * If <code>strict</code> is false, when a tag that breaks flow,
90ce3da70b43 Initial load
duke
parents:
diff changeset
    69
 * (<code>TagElement.breaksFlows</code>) or trailing whitespace is
90ce3da70b43 Initial load
duke
parents:
diff changeset
    70
 * encountered, all whitespace will be ignored until a non whitespace
90ce3da70b43 Initial load
duke
parents:
diff changeset
    71
 * character is encountered. This appears to give behavior closer to
90ce3da70b43 Initial load
duke
parents:
diff changeset
    72
 * the popular browsers.
90ce3da70b43 Initial load
duke
parents:
diff changeset
    73
 *
90ce3da70b43 Initial load
duke
parents:
diff changeset
    74
 * @see DTD
90ce3da70b43 Initial load
duke
parents:
diff changeset
    75
 * @see TagElement
90ce3da70b43 Initial load
duke
parents:
diff changeset
    76
 * @see SimpleAttributeSet
90ce3da70b43 Initial load
duke
parents:
diff changeset
    77
 * @author Arthur van Hoff
90ce3da70b43 Initial load
duke
parents:
diff changeset
    78
 * @author Sunita Mani
90ce3da70b43 Initial load
duke
parents:
diff changeset
    79
 */
90ce3da70b43 Initial load
duke
parents:
diff changeset
    80
public
90ce3da70b43 Initial load
duke
parents:
diff changeset
    81
class Parser implements DTDConstants {
90ce3da70b43 Initial load
duke
parents:
diff changeset
    82
90ce3da70b43 Initial load
duke
parents:
diff changeset
    83
    private char text[] = new char[1024];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    84
    private int textpos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    85
    private TagElement last;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    86
    private boolean space;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    87
90ce3da70b43 Initial load
duke
parents:
diff changeset
    88
    private char str[] = new char[128];
90ce3da70b43 Initial load
duke
parents:
diff changeset
    89
    private int strpos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    90
90ce3da70b43 Initial load
duke
parents:
diff changeset
    91
    protected DTD dtd = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    92
90ce3da70b43 Initial load
duke
parents:
diff changeset
    93
    private int ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    94
    private int ln;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    95
    private Reader in;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    96
90ce3da70b43 Initial load
duke
parents:
diff changeset
    97
    private Element recent;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    98
    private TagStack stack;
90ce3da70b43 Initial load
duke
parents:
diff changeset
    99
    private boolean skipTag = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   100
    private TagElement lastFormSent = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   101
    private SimpleAttributeSet attributes = new SimpleAttributeSet();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   102
90ce3da70b43 Initial load
duke
parents:
diff changeset
   103
    // State for <html>, <head> and <body>.  Since people like to slap
90ce3da70b43 Initial load
duke
parents:
diff changeset
   104
    // together HTML documents without thinking, occasionally they
90ce3da70b43 Initial load
duke
parents:
diff changeset
   105
    // have multiple instances of these tags.  These booleans track
90ce3da70b43 Initial load
duke
parents:
diff changeset
   106
    // the first sightings of these tags so they can be safely ignored
90ce3da70b43 Initial load
duke
parents:
diff changeset
   107
    // by the parser if repeated.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   108
    private boolean seenHtml = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   109
    private boolean seenHead = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   110
    private boolean seenBody = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   111
90ce3da70b43 Initial load
duke
parents:
diff changeset
   112
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   113
     * The html spec does not specify how spaces are coalesced very well.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   114
     * If strict == false, ignoreSpace is used to try and mimic the behavior
90ce3da70b43 Initial load
duke
parents:
diff changeset
   115
     * of the popular browsers.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   116
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   117
     * The problematic scenarios are:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   118
     * '&lt;b>blah &lt;i> &lt;strike> foo' which can be treated as:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   119
     * '&lt;b>blah &lt;i>&lt;strike>foo'
90ce3da70b43 Initial load
duke
parents:
diff changeset
   120
     * as well as:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   121
     * '&lt;p>&lt;a href="xx"> &lt;em>Using&lt;/em>&lt;/a>&lt;/p>'
90ce3da70b43 Initial load
duke
parents:
diff changeset
   122
     * which appears to be treated as:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   123
     * '&lt;p>&lt;a href="xx">&lt;em>Using&lt;/em>&lt;/a>&lt;/p>'
90ce3da70b43 Initial load
duke
parents:
diff changeset
   124
     * <p>
90ce3da70b43 Initial load
duke
parents:
diff changeset
   125
     * When a tag that breaks flow, or trailing whitespace is encountered
90ce3da70b43 Initial load
duke
parents:
diff changeset
   126
     * ignoreSpace is set to true. From then on, all whitespace will be
90ce3da70b43 Initial load
duke
parents:
diff changeset
   127
     * ignored.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   128
     * ignoreSpace will be set back to false the first time a
90ce3da70b43 Initial load
duke
parents:
diff changeset
   129
     * non whitespace character is encountered. This appears to give
90ce3da70b43 Initial load
duke
parents:
diff changeset
   130
     * behavior closer to the popular browsers.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   131
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   132
    private boolean ignoreSpace;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   133
90ce3da70b43 Initial load
duke
parents:
diff changeset
   134
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   135
     * This flag determines whether or not the Parser will be strict
90ce3da70b43 Initial load
duke
parents:
diff changeset
   136
     * in enforcing SGML compatibility.  If false, it will be lenient
90ce3da70b43 Initial load
duke
parents:
diff changeset
   137
     * with certain common classes of erroneous HTML constructs.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   138
     * Strict or not, in either case an error will be recorded.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   139
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
   140
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   141
    protected boolean strict = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   142
90ce3da70b43 Initial load
duke
parents:
diff changeset
   143
90ce3da70b43 Initial load
duke
parents:
diff changeset
   144
    /** Number of \r\n's encountered. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   145
    private int crlfCount;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   146
    /** Number of \r's encountered. A \r\n will not increment this. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   147
    private int crCount;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   148
    /** Number of \n's encountered. A \r\n will not increment this. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   149
    private int lfCount;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   150
90ce3da70b43 Initial load
duke
parents:
diff changeset
   151
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   152
    // To correctly identify the start of a tag/comment/text we need two
90ce3da70b43 Initial load
duke
parents:
diff changeset
   153
    // ivars. Two are needed as handleText isn't invoked until the tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
   154
    // after the text has been parsed, that is the parser parses the text,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   155
    // then a tag, then invokes handleText followed by handleStart.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   156
    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   157
    /** The start position of the current block. Block is overloaded here,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   158
     * it really means the current start position for the current comment,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   159
     * tag, text. Use getBlockStartPosition to access this. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   160
    private int currentBlockStartPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   161
    /** Start position of the last block. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   162
    private int lastBlockStartPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   163
90ce3da70b43 Initial load
duke
parents:
diff changeset
   164
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   165
     * array for mapping numeric references in range
90ce3da70b43 Initial load
duke
parents:
diff changeset
   166
     * 130-159 to displayable Unicode characters.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   167
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   168
    private static final char[] cp1252Map = {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   169
        8218,  // &#130;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   170
        402,   // &#131;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   171
        8222,  // &#132;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   172
        8230,  // &#133;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   173
        8224,  // &#134;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   174
        8225,  // &#135;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   175
        710,   // &#136;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   176
        8240,  // &#137;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   177
        352,   // &#138;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   178
        8249,  // &#139;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   179
        338,   // &#140;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   180
        141,   // &#141;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   181
        142,   // &#142;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   182
        143,   // &#143;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   183
        144,   // &#144;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   184
        8216,  // &#145;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   185
        8217,  // &#146;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   186
        8220,  // &#147;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   187
        8221,  // &#148;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   188
        8226,  // &#149;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   189
        8211,  // &#150;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   190
        8212,  // &#151;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   191
        732,   // &#152;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   192
        8482,  // &#153;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   193
        353,   // &#154;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   194
        8250,  // &#155;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   195
        339,   // &#156;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   196
        157,   // &#157;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   197
        158,   // &#158;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   198
        376    // &#159;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   199
    };
90ce3da70b43 Initial load
duke
parents:
diff changeset
   200
90ce3da70b43 Initial load
duke
parents:
diff changeset
   201
    public Parser(DTD dtd) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   202
        this.dtd = dtd;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   203
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   204
90ce3da70b43 Initial load
duke
parents:
diff changeset
   205
90ce3da70b43 Initial load
duke
parents:
diff changeset
   206
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   207
     * @return the line number of the line currently being parsed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   208
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   209
    protected int getCurrentLine() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   210
        return ln;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   211
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   212
90ce3da70b43 Initial load
duke
parents:
diff changeset
   213
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   214
     * Returns the start position of the current block. Block is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   215
     * overloaded here, it really means the current start position for
90ce3da70b43 Initial load
duke
parents:
diff changeset
   216
     * the current comment tag, text, block.... This is provided for
90ce3da70b43 Initial load
duke
parents:
diff changeset
   217
     * subclassers that wish to know the start of the current block when
90ce3da70b43 Initial load
duke
parents:
diff changeset
   218
     * called with one of the handleXXX methods.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   219
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   220
    int getBlockStartPosition() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   221
        return Math.max(0, lastBlockStartPos - 1);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   222
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   223
90ce3da70b43 Initial load
duke
parents:
diff changeset
   224
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   225
     * Makes a TagElement.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   226
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   227
    protected TagElement makeTag(Element elem, boolean fictional) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   228
        return new TagElement(elem, fictional);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   229
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   230
90ce3da70b43 Initial load
duke
parents:
diff changeset
   231
    protected TagElement makeTag(Element elem) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   232
        return makeTag(elem, false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   233
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   234
90ce3da70b43 Initial load
duke
parents:
diff changeset
   235
    protected SimpleAttributeSet getAttributes() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   236
        return attributes;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   237
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   238
90ce3da70b43 Initial load
duke
parents:
diff changeset
   239
    protected void flushAttributes() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   240
        attributes.removeAttributes(attributes);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   241
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   242
90ce3da70b43 Initial load
duke
parents:
diff changeset
   243
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   244
     * Called when PCDATA is encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   245
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   246
    protected void handleText(char text[]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   247
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   248
90ce3da70b43 Initial load
duke
parents:
diff changeset
   249
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   250
     * Called when an HTML title tag is encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   251
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   252
    protected void handleTitle(char text[]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   253
        // default behavior is to call handleText. Subclasses
90ce3da70b43 Initial load
duke
parents:
diff changeset
   254
        // can override if necessary.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   255
        handleText(text);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   256
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   257
90ce3da70b43 Initial load
duke
parents:
diff changeset
   258
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   259
     * Called when an HTML comment is encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   260
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   261
    protected void handleComment(char text[]) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   262
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   263
90ce3da70b43 Initial load
duke
parents:
diff changeset
   264
    protected void handleEOFInComment() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   265
        // We've reached EOF.  Our recovery strategy is to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   266
        // see if we have more than one line in the comment;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   267
        // if so, we pretend that the comment was an unterminated
90ce3da70b43 Initial load
duke
parents:
diff changeset
   268
        // single line comment, and reparse the lines after the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   269
        // first line as normal HTML content.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   270
90ce3da70b43 Initial load
duke
parents:
diff changeset
   271
        int commentEndPos = strIndexOf('\n');
90ce3da70b43 Initial load
duke
parents:
diff changeset
   272
        if (commentEndPos >= 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   273
            handleComment(getChars(0, commentEndPos));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   274
            try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   275
                in.close();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   276
                in = new CharArrayReader(getChars(commentEndPos + 1));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   277
                ch = '>';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   278
            } catch (IOException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   279
                error("ioexception");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   280
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   281
90ce3da70b43 Initial load
duke
parents:
diff changeset
   282
            resetStrBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   283
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   284
            // no newline, so signal an error
90ce3da70b43 Initial load
duke
parents:
diff changeset
   285
            error("eof.comment");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   286
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   287
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   288
90ce3da70b43 Initial load
duke
parents:
diff changeset
   289
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   290
     * Called when an empty tag is encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   291
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   292
    protected void handleEmptyTag(TagElement tag) throws ChangedCharSetException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   293
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   294
90ce3da70b43 Initial load
duke
parents:
diff changeset
   295
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   296
     * Called when a start tag is encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   297
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   298
    protected void handleStartTag(TagElement tag) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   299
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   300
90ce3da70b43 Initial load
duke
parents:
diff changeset
   301
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   302
     * Called when an end tag is encountered.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   303
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   304
    protected void handleEndTag(TagElement tag) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   305
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   306
90ce3da70b43 Initial load
duke
parents:
diff changeset
   307
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   308
     * An error has occurred.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   309
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   310
    protected void handleError(int ln, String msg) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   311
        /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   312
        Thread.dumpStack();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   313
        System.out.println("**** " + stack);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   314
        System.out.println("line " + ln + ": error: " + msg);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   315
        System.out.println();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   316
        */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   317
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   318
90ce3da70b43 Initial load
duke
parents:
diff changeset
   319
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   320
     * Output text.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   321
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   322
    void handleText(TagElement tag) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   323
        if (tag.breaksFlow()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   324
            space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   325
            if (!strict) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   326
                ignoreSpace = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   327
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   328
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   329
        if (textpos == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   330
            if ((!space) || (stack == null) || last.breaksFlow() ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   331
                !stack.advance(dtd.pcdata)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   332
                last = tag;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   333
                space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   334
                lastBlockStartPos = currentBlockStartPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   335
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   336
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   337
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   338
        if (space) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   339
            if (!ignoreSpace) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   340
                // enlarge buffer if needed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   341
                if (textpos + 1 > text.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   342
                    char newtext[] = new char[text.length + 200];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   343
                    System.arraycopy(text, 0, newtext, 0, text.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   344
                    text = newtext;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   345
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   346
90ce3da70b43 Initial load
duke
parents:
diff changeset
   347
                // output pending space
90ce3da70b43 Initial load
duke
parents:
diff changeset
   348
                text[textpos++] = ' ';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   349
                if (!strict && !tag.getElement().isEmpty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   350
                    ignoreSpace = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   351
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   352
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   353
            space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   354
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   355
        char newtext[] = new char[textpos];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   356
        System.arraycopy(text, 0, newtext, 0, textpos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   357
        // Handles cases of bad html where the title tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
   358
        // was getting lost when we did error recovery.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   359
        if (tag.getElement().getName().equals("title")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   360
            handleTitle(newtext);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   361
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   362
            handleText(newtext);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   363
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   364
        lastBlockStartPos = currentBlockStartPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   365
        textpos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   366
        last = tag;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   367
        space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   368
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   369
90ce3da70b43 Initial load
duke
parents:
diff changeset
   370
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   371
     * Invoke the error handler.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   372
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   373
    protected void error(String err, String arg1, String arg2,
90ce3da70b43 Initial load
duke
parents:
diff changeset
   374
        String arg3) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   375
        handleError(ln, err + " " + arg1 + " " + arg2 + " " + arg3);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   376
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   377
90ce3da70b43 Initial load
duke
parents:
diff changeset
   378
    protected void error(String err, String arg1, String arg2) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   379
        error(err, arg1, arg2, "?");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   380
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   381
    protected void error(String err, String arg1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   382
        error(err, arg1, "?", "?");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   383
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   384
    protected void error(String err) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   385
        error(err, "?", "?", "?");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   386
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   387
90ce3da70b43 Initial load
duke
parents:
diff changeset
   388
90ce3da70b43 Initial load
duke
parents:
diff changeset
   389
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   390
     * Handle a start tag. The new tag is pushed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   391
     * onto the tag stack. The attribute list is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   392
     * checked for required attributes.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   393
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   394
    protected void startTag(TagElement tag) throws ChangedCharSetException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   395
        Element elem = tag.getElement();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   396
90ce3da70b43 Initial load
duke
parents:
diff changeset
   397
        // If the tag is an empty tag and texpos != 0
90ce3da70b43 Initial load
duke
parents:
diff changeset
   398
        // this implies that there is text before the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   399
        // start tag that needs to be processed before
90ce3da70b43 Initial load
duke
parents:
diff changeset
   400
        // handling the tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   401
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   402
        if (!elem.isEmpty() ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   403
                    ((last != null) && !last.breaksFlow()) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   404
                    (textpos != 0)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   405
            handleText(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   406
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   407
            // this variable gets updated in handleText().
90ce3da70b43 Initial load
duke
parents:
diff changeset
   408
            // Since in this case we do not call handleText()
90ce3da70b43 Initial load
duke
parents:
diff changeset
   409
            // we need to update it here.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   410
            //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   411
            last = tag;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   412
            // Note that we should really check last.breakFlows before
90ce3da70b43 Initial load
duke
parents:
diff changeset
   413
            // assuming this should be false.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   414
            space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   415
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   416
        lastBlockStartPos = currentBlockStartPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   417
90ce3da70b43 Initial load
duke
parents:
diff changeset
   418
        // check required attributes
90ce3da70b43 Initial load
duke
parents:
diff changeset
   419
        for (AttributeList a = elem.atts ; a != null ; a = a.next) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   420
            if ((a.modifier == REQUIRED) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   421
                ((attributes.isEmpty()) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   422
                 ((!attributes.isDefined(a.name)) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   423
                  (!attributes.isDefined(HTML.getAttributeKey(a.name)))))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   424
                error("req.att ", a.getName(), elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   425
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   426
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   427
90ce3da70b43 Initial load
duke
parents:
diff changeset
   428
        if (elem.isEmpty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   429
            handleEmptyTag(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   430
            /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
   431
        } else if (elem.getName().equals("form")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   432
            handleStartTag(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   433
            */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   434
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   435
            recent = elem;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   436
            stack = new TagStack(tag, stack);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   437
            handleStartTag(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   438
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   439
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   440
90ce3da70b43 Initial load
duke
parents:
diff changeset
   441
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   442
     * Handle an end tag. The end tag is popped
90ce3da70b43 Initial load
duke
parents:
diff changeset
   443
     * from the tag stack.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   444
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   445
    protected void endTag(boolean omitted) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   446
        handleText(stack.tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   447
90ce3da70b43 Initial load
duke
parents:
diff changeset
   448
        if (omitted && !stack.elem.omitEnd()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   449
            error("end.missing", stack.elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   450
        } else if (!stack.terminate()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   451
            error("end.unexpected", stack.elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   452
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   453
90ce3da70b43 Initial load
duke
parents:
diff changeset
   454
        // handle the tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
   455
        handleEndTag(stack.tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   456
        stack = stack.next;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   457
        recent = (stack != null) ? stack.elem : null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   458
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   459
90ce3da70b43 Initial load
duke
parents:
diff changeset
   460
90ce3da70b43 Initial load
duke
parents:
diff changeset
   461
    boolean ignoreElement(Element elem) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   462
90ce3da70b43 Initial load
duke
parents:
diff changeset
   463
        String stackElement = stack.elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   464
        String elemName = elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   465
        /* We ignore all elements that are not valid in the context of
90ce3da70b43 Initial load
duke
parents:
diff changeset
   466
           a table except <td>, <th> (these we handle in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   467
           legalElementContext()) and #pcdata.  We also ignore the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   468
           <font> tag in the context of <ul> and <ol> We additonally
90ce3da70b43 Initial load
duke
parents:
diff changeset
   469
           ignore the <meta> and the <style> tag if the body tag has
90ce3da70b43 Initial load
duke
parents:
diff changeset
   470
           been seen. **/
90ce3da70b43 Initial load
duke
parents:
diff changeset
   471
        if ((elemName.equals("html") && seenHtml) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   472
            (elemName.equals("head") && seenHead) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   473
            (elemName.equals("body") && seenBody)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   474
            return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   475
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   476
        if (elemName.equals("dt") || elemName.equals("dd")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   477
            TagStack s = stack;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   478
            while (s != null && !s.elem.getName().equals("dl")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   479
                s = s.next;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   480
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   481
            if (s == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   482
                return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   483
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   484
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   485
90ce3da70b43 Initial load
duke
parents:
diff changeset
   486
        if (((stackElement.equals("table")) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   487
             (!elemName.equals("#pcdata")) && (!elemName.equals("input"))) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   488
            ((elemName.equals("font")) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   489
             (stackElement.equals("ul") || stackElement.equals("ol"))) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   490
            (elemName.equals("meta") && stack != null) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   491
            (elemName.equals("style") && seenBody) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   492
            (stackElement.equals("table") && elemName.equals("a"))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   493
            return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   494
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   495
        return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   496
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   497
90ce3da70b43 Initial load
duke
parents:
diff changeset
   498
90ce3da70b43 Initial load
duke
parents:
diff changeset
   499
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   500
     * Marks the first time a tag has been seen in a document
90ce3da70b43 Initial load
duke
parents:
diff changeset
   501
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   502
90ce3da70b43 Initial load
duke
parents:
diff changeset
   503
    protected void markFirstTime(Element elem) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   504
        String elemName = elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   505
        if (elemName.equals("html")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   506
            seenHtml = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   507
        } else if (elemName.equals("head")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   508
            seenHead = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   509
        } else if (elemName.equals("body")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   510
            if (buf.length == 1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   511
                // Refer to note in definition of buf for details on this.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   512
                char[] newBuf = new char[256];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   513
90ce3da70b43 Initial load
duke
parents:
diff changeset
   514
                newBuf[0] = buf[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   515
                buf = newBuf;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   516
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   517
            seenBody = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   518
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   519
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   520
90ce3da70b43 Initial load
duke
parents:
diff changeset
   521
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   522
     * Create a legal content for an element.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   523
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   524
    boolean legalElementContext(Element elem) throws ChangedCharSetException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   525
90ce3da70b43 Initial load
duke
parents:
diff changeset
   526
        // System.out.println("-- legalContext -- " + elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   527
90ce3da70b43 Initial load
duke
parents:
diff changeset
   528
        // Deal with the empty stack
90ce3da70b43 Initial load
duke
parents:
diff changeset
   529
        if (stack == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   530
            // System.out.println("-- stack is empty");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   531
            if (elem != dtd.html) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   532
                // System.out.println("-- pushing html");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   533
                startTag(makeTag(dtd.html, true));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   534
                return legalElementContext(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   535
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   536
            return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   537
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   538
90ce3da70b43 Initial load
duke
parents:
diff changeset
   539
        // Is it allowed in the current context
90ce3da70b43 Initial load
duke
parents:
diff changeset
   540
        if (stack.advance(elem)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   541
            // System.out.println("-- legal context");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   542
            markFirstTime(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   543
            return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   544
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   545
        boolean insertTag = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   546
90ce3da70b43 Initial load
duke
parents:
diff changeset
   547
        // The use of all error recovery strategies are contingent
90ce3da70b43 Initial load
duke
parents:
diff changeset
   548
        // on the value of the strict property.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   549
        //
21278
ef8a3a2a72f2 8022746: List of spelling errors in API doc
malenkov
parents: 20169
diff changeset
   550
        // These are commonly occurring errors.  if insertTag is true,
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   551
        // then we want to adopt an error recovery strategy that
90ce3da70b43 Initial load
duke
parents:
diff changeset
   552
        // involves attempting to insert an additional tag to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   553
        // legalize the context.  The two errors addressed here
90ce3da70b43 Initial load
duke
parents:
diff changeset
   554
        // are:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   555
        // 1) when a <td> or <th> is seen soon after a <table> tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   556
        //    In this case we insert a <tr>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   557
        // 2) when any other tag apart from a <tr> is seen
90ce3da70b43 Initial load
duke
parents:
diff changeset
   558
        //    in the context of a <tr>.  In this case we would
90ce3da70b43 Initial load
duke
parents:
diff changeset
   559
        //    like to add a <td>.  If a <tr> is seen within a
90ce3da70b43 Initial load
duke
parents:
diff changeset
   560
        //    <tr> context, then we will close out the current
90ce3da70b43 Initial load
duke
parents:
diff changeset
   561
        //    <tr>.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   562
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   563
        // This insertion strategy is handled later in the method.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   564
        // The reason for checking this now, is that in other cases
90ce3da70b43 Initial load
duke
parents:
diff changeset
   565
        // we would like to apply other error recovery strategies for example
90ce3da70b43 Initial load
duke
parents:
diff changeset
   566
        // ignoring tags.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   567
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   568
        // In certain cases it is better to ignore a tag than try to
90ce3da70b43 Initial load
duke
parents:
diff changeset
   569
        // fix the situation.  So the first test is to see if this
90ce3da70b43 Initial load
duke
parents:
diff changeset
   570
        // is what we need to do.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   571
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   572
        String stackElemName = stack.elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   573
        String elemName = elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   574
90ce3da70b43 Initial load
duke
parents:
diff changeset
   575
90ce3da70b43 Initial load
duke
parents:
diff changeset
   576
        if (!strict &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   577
            ((stackElemName.equals("table") && elemName.equals("td")) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   578
             (stackElemName.equals("table") && elemName.equals("th")) ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   579
             (stackElemName.equals("tr") && !elemName.equals("tr")))){
90ce3da70b43 Initial load
duke
parents:
diff changeset
   580
             insertTag = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   581
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   582
90ce3da70b43 Initial load
duke
parents:
diff changeset
   583
90ce3da70b43 Initial load
duke
parents:
diff changeset
   584
        if (!strict && !insertTag && (stack.elem.getName() != elem.getName() ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   585
                                      elem.getName().equals("body"))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   586
            if (skipTag = ignoreElement(elem)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   587
                error("tag.ignore", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   588
                return skipTag;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   589
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   590
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   591
90ce3da70b43 Initial load
duke
parents:
diff changeset
   592
        // Check for anything after the start of the table besides tr, td, th
90ce3da70b43 Initial load
duke
parents:
diff changeset
   593
        // or caption, and if those aren't there, insert the <tr> and call
90ce3da70b43 Initial load
duke
parents:
diff changeset
   594
        // legalElementContext again.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   595
        if (!strict && stackElemName.equals("table") &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   596
            !elemName.equals("tr") && !elemName.equals("td") &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   597
            !elemName.equals("th") && !elemName.equals("caption")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   598
            Element e = dtd.getElement("tr");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   599
            TagElement t = makeTag(e, true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   600
            legalTagContext(t);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   601
            startTag(t);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   602
            error("start.missing", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   603
            return legalElementContext(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   604
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   605
90ce3da70b43 Initial load
duke
parents:
diff changeset
   606
        // They try to find a legal context by checking if the current
90ce3da70b43 Initial load
duke
parents:
diff changeset
   607
        // tag is valid in an enclosing context.  If so
90ce3da70b43 Initial load
duke
parents:
diff changeset
   608
        // close out the tags by outputing end tags and then
21278
ef8a3a2a72f2 8022746: List of spelling errors in API doc
malenkov
parents: 20169
diff changeset
   609
        // insert the current tag.  If the tags that are
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   610
        // being closed out do not have an optional end tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
   611
        // specification in the DTD then an html error is
90ce3da70b43 Initial load
duke
parents:
diff changeset
   612
        // reported.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   613
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   614
        if (!insertTag && stack.terminate() && (!strict || stack.elem.omitEnd())) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   615
            for (TagStack s = stack.next ; s != null ; s = s.next) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   616
                if (s.advance(elem)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   617
                    while (stack != s) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   618
                        endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   619
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   620
                    return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   621
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   622
                if (!s.terminate() || (strict && !s.elem.omitEnd())) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   623
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   624
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   625
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   626
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   627
90ce3da70b43 Initial load
duke
parents:
diff changeset
   628
        // Check if we know what tag is expected next.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   629
        // If so insert the tag.  Report an error if the
90ce3da70b43 Initial load
duke
parents:
diff changeset
   630
        // tag does not have its start tag spec in the DTD as optional.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   631
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   632
        Element next = stack.first();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   633
        if (next != null && (!strict || next.omitStart()) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
   634
           !(next==dtd.head && elem==dtd.pcdata) ) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   635
            // System.out.println("-- omitting start tag: " + next);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   636
            TagElement t = makeTag(next, true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   637
            legalTagContext(t);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   638
            startTag(t);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   639
            if (!next.omitStart()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   640
                error("start.missing", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   641
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   642
            return legalElementContext(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   643
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   644
90ce3da70b43 Initial load
duke
parents:
diff changeset
   645
90ce3da70b43 Initial load
duke
parents:
diff changeset
   646
        // Traverse the list of expected elements and determine if adding
90ce3da70b43 Initial load
duke
parents:
diff changeset
   647
        // any of these elements would make for a legal context.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   648
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   649
90ce3da70b43 Initial load
duke
parents:
diff changeset
   650
        if (!strict) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   651
            ContentModel content = stack.contentModel();
1287
a04aca99c77a 6722802: Code improvement and warnings removing from the javax.swing.text package
rupashka
parents: 438
diff changeset
   652
            Vector<Element> elemVec = new Vector<Element>();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   653
            if (content != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   654
                content.getElements(elemVec);
1287
a04aca99c77a 6722802: Code improvement and warnings removing from the javax.swing.text package
rupashka
parents: 438
diff changeset
   655
                for (Element e : elemVec) {
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   656
                    // Ensure that this element has not been included as
90ce3da70b43 Initial load
duke
parents:
diff changeset
   657
                    // part of the exclusions in the DTD.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   658
                    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   659
                    if (stack.excluded(e.getIndex())) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   660
                        continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   661
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   662
90ce3da70b43 Initial load
duke
parents:
diff changeset
   663
                    boolean reqAtts = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   664
90ce3da70b43 Initial load
duke
parents:
diff changeset
   665
                    for (AttributeList a = e.getAttributes(); a != null ; a = a.next) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   666
                        if (a.modifier == REQUIRED) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   667
                            reqAtts = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   668
                            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   669
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   670
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   671
                    // Ensure that no tag that has required attributes
90ce3da70b43 Initial load
duke
parents:
diff changeset
   672
                    // gets inserted.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   673
                    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   674
                    if (reqAtts) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   675
                        continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   676
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   677
90ce3da70b43 Initial load
duke
parents:
diff changeset
   678
                    ContentModel m = e.getContent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   679
                    if (m != null && m.first(elem)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   680
                        // System.out.println("-- adding a legal tag: " + e);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   681
                        TagElement t = makeTag(e, true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   682
                        legalTagContext(t);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   683
                        startTag(t);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   684
                        error("start.missing", e.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   685
                        return legalElementContext(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   686
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   687
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   688
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   689
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   690
90ce3da70b43 Initial load
duke
parents:
diff changeset
   691
        // Check if the stack can be terminated.  If so add the appropriate
90ce3da70b43 Initial load
duke
parents:
diff changeset
   692
        // end tag.  Report an error if the tag being ended does not have its
90ce3da70b43 Initial load
duke
parents:
diff changeset
   693
        // end tag spec in the DTD as optional.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   694
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
   695
        if (stack.terminate() && (stack.elem != dtd.body) && (!strict || stack.elem.omitEnd())) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   696
            // System.out.println("-- omitting end tag: " + stack.elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   697
            if (!stack.elem.omitEnd()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   698
                error("end.missing", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   699
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   700
90ce3da70b43 Initial load
duke
parents:
diff changeset
   701
            endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   702
            return legalElementContext(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   703
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   704
90ce3da70b43 Initial load
duke
parents:
diff changeset
   705
        // At this point we know that something is screwed up.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   706
        return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   707
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   708
90ce3da70b43 Initial load
duke
parents:
diff changeset
   709
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   710
     * Create a legal context for a tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   711
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   712
    void legalTagContext(TagElement tag) throws ChangedCharSetException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   713
        if (legalElementContext(tag.getElement())) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   714
            markFirstTime(tag.getElement());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   715
            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   716
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   717
90ce3da70b43 Initial load
duke
parents:
diff changeset
   718
        // Avoid putting a block tag in a flow tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   719
        if (tag.breaksFlow() && (stack != null) && !stack.tag.breaksFlow()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   720
            endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   721
            legalTagContext(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   722
            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   723
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   724
90ce3da70b43 Initial load
duke
parents:
diff changeset
   725
        // Avoid putting something wierd in the head of the document.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   726
        for (TagStack s = stack ; s != null ; s = s.next) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   727
            if (s.tag.getElement() == dtd.head) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   728
                while (stack != s) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   729
                    endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   730
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   731
                endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   732
                legalTagContext(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   733
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   734
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   735
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   736
90ce3da70b43 Initial load
duke
parents:
diff changeset
   737
        // Everything failed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   738
        error("tag.unexpected", tag.getElement().getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
   739
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   740
90ce3da70b43 Initial load
duke
parents:
diff changeset
   741
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   742
     * Error context. Something went wrong, make sure we are in
90ce3da70b43 Initial load
duke
parents:
diff changeset
   743
     * the document's body context
90ce3da70b43 Initial load
duke
parents:
diff changeset
   744
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   745
    void errorContext() throws ChangedCharSetException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   746
        for (; (stack != null) && (stack.tag.getElement() != dtd.body) ; stack = stack.next) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   747
            handleEndTag(stack.tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   748
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   749
        if (stack == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   750
            legalElementContext(dtd.body);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   751
            startTag(makeTag(dtd.body, true));
90ce3da70b43 Initial load
duke
parents:
diff changeset
   752
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   753
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   754
90ce3da70b43 Initial load
duke
parents:
diff changeset
   755
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   756
     * Add a char to the string buffer.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   757
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   758
    void addString(int c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   759
        if (strpos  == str.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   760
            char newstr[] = new char[str.length + 128];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   761
            System.arraycopy(str, 0, newstr, 0, str.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   762
            str = newstr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   763
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   764
        str[strpos++] = (char)c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   765
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   766
90ce3da70b43 Initial load
duke
parents:
diff changeset
   767
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   768
     * Get the string that's been accumulated.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   769
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   770
    String getString(int pos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   771
        char newStr[] = new char[strpos - pos];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   772
        System.arraycopy(str, pos, newStr, 0, strpos - pos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   773
        strpos = pos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   774
        return new String(newStr);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   775
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   776
90ce3da70b43 Initial load
duke
parents:
diff changeset
   777
    char[] getChars(int pos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   778
        char newStr[] = new char[strpos - pos];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   779
        System.arraycopy(str, pos, newStr, 0, strpos - pos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   780
        strpos = pos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   781
        return newStr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   782
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   783
90ce3da70b43 Initial load
duke
parents:
diff changeset
   784
    char[] getChars(int pos, int endPos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   785
        char newStr[] = new char[endPos - pos];
90ce3da70b43 Initial load
duke
parents:
diff changeset
   786
        System.arraycopy(str, pos, newStr, 0, endPos - pos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   787
        // REMIND: it's not clear whether this version should set strpos or not
90ce3da70b43 Initial load
duke
parents:
diff changeset
   788
        // strpos = pos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   789
        return newStr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   790
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   791
90ce3da70b43 Initial load
duke
parents:
diff changeset
   792
    void resetStrBuffer() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   793
        strpos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   794
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   795
90ce3da70b43 Initial load
duke
parents:
diff changeset
   796
    int strIndexOf(char target) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   797
        for (int i = 0; i < strpos; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   798
            if (str[i] == target) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   799
                return i;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   800
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   801
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   802
90ce3da70b43 Initial load
duke
parents:
diff changeset
   803
        return -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   804
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   805
90ce3da70b43 Initial load
duke
parents:
diff changeset
   806
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   807
     * Skip space.
90ce3da70b43 Initial load
duke
parents:
diff changeset
   808
     * [5] 297:5
90ce3da70b43 Initial load
duke
parents:
diff changeset
   809
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   810
    void skipSpace() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   811
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   812
            switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   813
              case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   814
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   815
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   816
                lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   817
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   818
90ce3da70b43 Initial load
duke
parents:
diff changeset
   819
              case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   820
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   821
                if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   822
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   823
                    crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   824
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   825
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   826
                    crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   827
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   828
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   829
              case ' ':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   830
              case '\t':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   831
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   832
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   833
90ce3da70b43 Initial load
duke
parents:
diff changeset
   834
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   835
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   836
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   837
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   838
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   839
90ce3da70b43 Initial load
duke
parents:
diff changeset
   840
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   841
     * Parse identifier. Uppercase characters are folded
90ce3da70b43 Initial load
duke
parents:
diff changeset
   842
     * to lowercase when lower is true. Returns falsed if
90ce3da70b43 Initial load
duke
parents:
diff changeset
   843
     * no identifier is found. [55] 346:17
90ce3da70b43 Initial load
duke
parents:
diff changeset
   844
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   845
    boolean parseIdentifier(boolean lower) throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   846
        switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   847
          case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   848
          case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   849
          case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   850
          case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   851
          case 'Y': case 'Z':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   852
            if (lower) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   853
                ch = 'a' + (ch - 'A');
90ce3da70b43 Initial load
duke
parents:
diff changeset
   854
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   855
90ce3da70b43 Initial load
duke
parents:
diff changeset
   856
          case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   857
          case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   858
          case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   859
          case 's': case 't': case 'u': case 'v': case 'w': case 'x':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   860
          case 'y': case 'z':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   861
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   862
90ce3da70b43 Initial load
duke
parents:
diff changeset
   863
          default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   864
            return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   865
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   866
90ce3da70b43 Initial load
duke
parents:
diff changeset
   867
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   868
            addString(ch);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   869
90ce3da70b43 Initial load
duke
parents:
diff changeset
   870
            switch (ch = readCh()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   871
              case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   872
              case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   873
              case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   874
              case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   875
              case 'Y': case 'Z':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   876
                if (lower) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   877
                    ch = 'a' + (ch - 'A');
90ce3da70b43 Initial load
duke
parents:
diff changeset
   878
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   879
90ce3da70b43 Initial load
duke
parents:
diff changeset
   880
              case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   881
              case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   882
              case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   883
              case 's': case 't': case 'u': case 'v': case 'w': case 'x':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   884
              case 'y': case 'z':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   885
90ce3da70b43 Initial load
duke
parents:
diff changeset
   886
              case '0': case '1': case '2': case '3': case '4':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   887
              case '5': case '6': case '7': case '8': case '9':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   888
90ce3da70b43 Initial load
duke
parents:
diff changeset
   889
              case '.': case '-':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   890
90ce3da70b43 Initial load
duke
parents:
diff changeset
   891
              case '_': // not officially allowed
90ce3da70b43 Initial load
duke
parents:
diff changeset
   892
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   893
90ce3da70b43 Initial load
duke
parents:
diff changeset
   894
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
   895
                return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   896
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   897
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   898
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   899
90ce3da70b43 Initial load
duke
parents:
diff changeset
   900
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
   901
     * Parse an entity reference. [59] 350:17
90ce3da70b43 Initial load
duke
parents:
diff changeset
   902
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
   903
    private char[] parseEntityReference() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   904
        int pos = strpos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   905
90ce3da70b43 Initial load
duke
parents:
diff changeset
   906
        if ((ch = readCh()) == '#') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   907
            int n = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   908
            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   909
            if ((ch >= '0') && (ch <= '9') ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   910
                    ch == 'x' || ch == 'X') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   911
90ce3da70b43 Initial load
duke
parents:
diff changeset
   912
                if ((ch >= '0') && (ch <= '9')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   913
                    // parse decimal reference
90ce3da70b43 Initial load
duke
parents:
diff changeset
   914
                    while ((ch >= '0') && (ch <= '9')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   915
                        n = (n * 10) + ch - '0';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   916
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   917
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   918
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   919
                    // parse hexadecimal reference
90ce3da70b43 Initial load
duke
parents:
diff changeset
   920
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   921
                    char lch = (char) Character.toLowerCase(ch);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   922
                    while ((lch >= '0') && (lch <= '9') ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
   923
                            (lch >= 'a') && (lch <= 'f')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   924
                        if (lch >= '0' && lch <= '9') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   925
                            n = (n * 16) + lch - '0';
90ce3da70b43 Initial load
duke
parents:
diff changeset
   926
                        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   927
                            n = (n * 16) + lch - 'a' + 10;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   928
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   929
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   930
                        lch = (char) Character.toLowerCase(ch);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   931
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   932
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   933
                switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   934
                    case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   935
                        ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   936
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   937
                        lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   938
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   939
90ce3da70b43 Initial load
duke
parents:
diff changeset
   940
                    case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   941
                        ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   942
                        if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   943
                            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   944
                            crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   945
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   946
                        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   947
                            crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   948
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   949
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   950
90ce3da70b43 Initial load
duke
parents:
diff changeset
   951
                    case ';':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   952
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   953
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   954
                }
14309
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
   955
                char data[] = mapNumericReference(n);
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   956
                return data;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   957
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   958
            addString('#');
90ce3da70b43 Initial load
duke
parents:
diff changeset
   959
            if (!parseIdentifier(false)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   960
                error("ident.expected");
90ce3da70b43 Initial load
duke
parents:
diff changeset
   961
                strpos = pos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   962
                char data[] = {'&', '#'};
90ce3da70b43 Initial load
duke
parents:
diff changeset
   963
                return data;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   964
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   965
        } else if (!parseIdentifier(false)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   966
            char data[] = {'&'};
90ce3da70b43 Initial load
duke
parents:
diff changeset
   967
            return data;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   968
        }
9213
856f16c729a5 7003777: Nonexistent html entities not parsed properly.
rupashka
parents: 7668
diff changeset
   969
856f16c729a5 7003777: Nonexistent html entities not parsed properly.
rupashka
parents: 7668
diff changeset
   970
        boolean semicolon = false;
856f16c729a5 7003777: Nonexistent html entities not parsed properly.
rupashka
parents: 7668
diff changeset
   971
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   972
        switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   973
          case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   974
            ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   975
            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   976
            lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   977
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   978
90ce3da70b43 Initial load
duke
parents:
diff changeset
   979
          case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
   980
            ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   981
            if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   982
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   983
                crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   984
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   985
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
   986
                crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   987
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   988
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   989
90ce3da70b43 Initial load
duke
parents:
diff changeset
   990
          case ';':
9213
856f16c729a5 7003777: Nonexistent html entities not parsed properly.
rupashka
parents: 7668
diff changeset
   991
            semicolon = true;
856f16c729a5 7003777: Nonexistent html entities not parsed properly.
rupashka
parents: 7668
diff changeset
   992
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
   993
            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
   994
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
   995
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
   996
90ce3da70b43 Initial load
duke
parents:
diff changeset
   997
        String nm = getString(pos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   998
        Entity ent = dtd.getEntity(nm);
90ce3da70b43 Initial load
duke
parents:
diff changeset
   999
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1000
        // entities are case sensitive - however if strict
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1001
        // is false then we will try to make a match by
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1002
        // converting the string to all lowercase.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1003
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1004
        if (!strict && (ent == null)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1005
            ent = dtd.getEntity(nm.toLowerCase());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1006
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1007
        if ((ent == null) || !ent.isGeneral()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1008
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1009
            if (nm.length() == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1010
                error("invalid.entref", nm);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1011
                return new char[0];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1012
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1013
            /* given that there is not a match restore the entity reference */
9213
856f16c729a5 7003777: Nonexistent html entities not parsed properly.
rupashka
parents: 7668
diff changeset
  1014
            String str = "&" + nm + (semicolon ? ";" : "");
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1015
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1016
            char b[] = new char[str.length()];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1017
            str.getChars(0, b.length, b, 0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1018
            return b;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1019
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1020
        return ent.getData();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1021
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1022
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1023
    /**
14309
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
  1024
     * Converts numeric character reference to char array.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1025
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1026
     * Normally the code in a reference should be always converted
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1027
     * to the Unicode character with the same code, but due to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1028
     * wide usage of Cp1252 charset most browsers map numeric references
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1029
     * in the range 130-159 (which are control chars in Unicode set)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1030
     * to displayable characters with other codes.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1031
     *
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1032
     * @param c the code of numeric character reference.
14309
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
  1033
     * @return a char array corresponding to the reference code.
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1034
     */
14309
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
  1035
    private char[] mapNumericReference(int c) {
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
  1036
        char[] data;
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
  1037
        if (c >= 0xffff) { // outside unicode BMP.
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
  1038
            try {
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
  1039
                data = Character.toChars(c);
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
  1040
            } catch (IllegalArgumentException e) {
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
  1041
                data = new char[0];
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
  1042
            }
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
  1043
        } else {
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
  1044
            data = new char[1];
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
  1045
            data[0] = (c < 130 || c > 159) ? (char) c : cp1252Map[c - 130];
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1046
        }
14309
425e2c6b5941 2229575: Swing HTML parser can't properly decode codepoints outside the Unicode Plane 0 into a surrogate pair
VKARNAUK
parents: 12999
diff changeset
  1047
        return data;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1048
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1049
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1050
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1051
     * Parse a comment. [92] 391:7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1052
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1053
    void parseComment() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1054
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1055
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1056
            int c = ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1057
            switch (c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1058
              case '-':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1059
                  /** Presuming that the start string of a comment "<!--" has
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1060
                      already been parsed, the '-' character is valid only as
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1061
                      part of a comment termination and further more it must
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1062
                      be present in even numbers. Hence if strict is true, we
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1063
                      presume the comment has been terminated and return.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1064
                      However if strict is false, then there is no even number
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1065
                      requirement and this character can appear anywhere in the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1066
                      comment.  The parser reads on until it sees the following
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1067
                      pattern: "-->" or "--!>".
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1068
                   **/
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1069
                if (!strict && (strpos != 0) && (str[strpos - 1] == '-')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1070
                    if ((ch = readCh()) == '>') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1071
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1072
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1073
                    if (ch == '!') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1074
                        if ((ch = readCh()) == '>') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1075
                            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1076
                        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1077
                            /* to account for extra read()'s that happened */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1078
                            addString('-');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1079
                            addString('!');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1080
                            continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1081
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1082
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1083
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1084
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1085
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1086
                if ((ch = readCh()) == '-') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1087
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1088
                    if (strict || ch == '>') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1089
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1090
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1091
                    if (ch == '!') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1092
                        if ((ch = readCh()) == '>') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1093
                            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1094
                        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1095
                            /* to account for extra read()'s that happened */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1096
                            addString('-');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1097
                            addString('!');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1098
                            continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1099
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1100
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1101
                    /* to account for the extra read() */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1102
                    addString('-');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1103
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1104
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1105
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1106
              case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1107
                  handleEOFInComment();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1108
                  return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1109
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1110
              case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1111
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1112
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1113
                lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1114
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1115
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1116
              case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1117
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1118
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1119
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1120
              case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1121
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1122
                if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1123
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1124
                    crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1125
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1126
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1127
                    crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1128
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1129
                c = '\n';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1130
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1131
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1132
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1133
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1134
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1135
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1136
            addString(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1137
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1138
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1139
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1140
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1141
     * Parse literal content. [46] 343:1 and [47] 344:1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1142
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1143
    void parseLiteral(boolean replace) throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1144
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1145
            int c = ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1146
            switch (c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1147
              case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1148
                error("eof.literal", stack.elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1149
                endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1150
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1151
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1152
              case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1153
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1154
                int i = textpos - (stack.elem.name.length() + 2), j = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1155
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1156
                // match end tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1157
                if ((i >= 0) && (text[i++] == '<') && (text[i] == '/')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1158
                    while ((++i < textpos) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1159
                           (Character.toLowerCase(text[i]) == stack.elem.name.charAt(j++)));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1160
                    if (i == textpos) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1161
                        textpos -= (stack.elem.name.length() + 2);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1162
                        if ((textpos > 0) && (text[textpos-1] == '\n')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1163
                            textpos--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1164
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1165
                        endTag(false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1166
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1167
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1168
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1169
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1170
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1171
              case '&':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1172
                char data[] = parseEntityReference();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1173
                if (textpos + data.length > text.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1174
                    char newtext[] = new char[Math.max(textpos + data.length + 128, text.length * 2)];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1175
                    System.arraycopy(text, 0, newtext, 0, text.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1176
                    text = newtext;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1177
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1178
                System.arraycopy(data, 0, text, textpos, data.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1179
                textpos += data.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1180
                continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1181
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1182
              case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1183
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1184
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1185
                lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1186
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1187
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1188
              case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1189
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1190
                if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1191
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1192
                    crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1193
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1194
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1195
                    crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1196
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1197
                c = '\n';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1198
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1199
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1200
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1201
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1202
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1203
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1204
            // output character
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1205
            if (textpos == text.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1206
                char newtext[] = new char[text.length + 128];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1207
                System.arraycopy(text, 0, newtext, 0, text.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1208
                text = newtext;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1209
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1210
            text[textpos++] = (char)c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1211
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1212
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1213
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1214
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1215
     * Parse attribute value. [33] 331:1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1216
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1217
    String parseAttributeValue(boolean lower) throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1218
        int delim = -1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1219
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1220
        // Check for a delimiter
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1221
        switch(ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1222
          case '\'':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1223
          case '"':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1224
            delim = ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1225
            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1226
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1227
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1228
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1229
        // Parse the rest of the value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1230
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1231
            int c = ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1232
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1233
            switch (c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1234
              case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1235
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1236
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1237
                lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1238
                if (delim < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1239
                    return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1240
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1241
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1242
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1243
              case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1244
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1245
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1246
                if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1247
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1248
                    crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1249
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1250
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1251
                    crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1252
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1253
                if (delim < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1254
                    return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1255
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1256
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1257
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1258
              case '\t':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1259
                  if (delim < 0)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1260
                      c = ' ';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1261
              case ' ':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1262
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1263
                if (delim < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1264
                    return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1265
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1266
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1267
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1268
              case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1269
              case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1270
                if (delim < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1271
                    return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1272
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1273
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1274
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1275
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1276
              case '\'':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1277
              case '"':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1278
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1279
                if (c == delim) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1280
                    return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1281
                } else if (delim == -1) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1282
                    error("attvalerr");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1283
                    if (strict || ch == ' ') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1284
                        return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1285
                    } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1286
                        continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1287
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1288
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1289
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1290
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1291
            case '=':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1292
                if (delim < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1293
                    /* In SGML a construct like <img src=/cgi-bin/foo?x=1>
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1294
                       is considered invalid since an = sign can only be contained
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1295
                       in an attributes value if the string is quoted.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1296
                       */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1297
                    error("attvalerr");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1298
                    /* If strict is true then we return with the string we have thus far.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1299
                       Otherwise we accept the = sign as part of the attribute's value and
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1300
                       process the rest of the img tag. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1301
                    if (strict) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1302
                        return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1303
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1304
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1305
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1306
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1307
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1308
              case '&':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1309
                if (strict && delim < 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1310
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1311
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1312
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1313
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1314
                char data[] = parseEntityReference();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1315
                for (int i = 0 ; i < data.length ; i++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1316
                    c = data[i];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1317
                    addString((lower && (c >= 'A') && (c <= 'Z')) ? 'a' + c - 'A' : c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1318
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1319
                continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1320
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1321
              case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1322
                return getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1323
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1324
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1325
                if (lower && (c >= 'A') && (c <= 'Z')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1326
                    c = 'a' + c - 'A';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1327
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1328
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1329
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1330
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1331
            addString(c);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1332
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1333
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1334
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1335
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1336
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1337
     * Parse attribute specification List. [31] 327:17
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1338
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1339
    void parseAttributeSpecificationList(Element elem) throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1340
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1341
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1342
            skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1343
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1344
            switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1345
              case '/':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1346
              case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1347
              case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1348
              case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1349
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1350
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1351
              case '-':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1352
                if ((ch = readCh()) == '-') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1353
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1354
                    parseComment();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1355
                    strpos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1356
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1357
                    error("invalid.tagchar", "-", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1358
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1359
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1360
                continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1361
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1362
1287
a04aca99c77a 6722802: Code improvement and warnings removing from the javax.swing.text package
rupashka
parents: 438
diff changeset
  1363
            AttributeList att;
a04aca99c77a 6722802: Code improvement and warnings removing from the javax.swing.text package
rupashka
parents: 438
diff changeset
  1364
            String attname;
a04aca99c77a 6722802: Code improvement and warnings removing from the javax.swing.text package
rupashka
parents: 438
diff changeset
  1365
            String attvalue;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1366
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1367
            if (parseIdentifier(true)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1368
                attname = getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1369
                skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1370
                if (ch == '=') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1371
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1372
                    skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1373
                    att = elem.getAttribute(attname);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1374
//  Bug ID 4102750
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1375
//  Load the NAME of an Attribute Case Sensitive
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1376
//  The case of the NAME  must be intact
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1377
//  MG 021898
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1378
                    attvalue = parseAttributeValue((att != null) && (att.type != CDATA) && (att.type != NOTATION) && (att.type != NAME));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1379
//                  attvalue = parseAttributeValue((att != null) && (att.type != CDATA) && (att.type != NOTATION));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1380
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1381
                    attvalue = attname;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1382
                    att = elem.getAttributeByValue(attvalue);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1383
                    if (att == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1384
                        att = elem.getAttribute(attname);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1385
                        if (att != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1386
                            attvalue = att.getValue();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1387
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1388
                        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1389
                            // Make it null so that NULL_ATTRIBUTE_VALUE is
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1390
                            // used
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1391
                            attvalue = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1392
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1393
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1394
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1395
            } else if (!strict && ch == ',') { // allows for comma separated attribute-value pairs
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1396
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1397
                continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1398
            } else if (!strict && ch == '"') { // allows for quoted attributes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1399
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1400
                skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1401
                if (parseIdentifier(true)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1402
                    attname = getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1403
                    if (ch == '"') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1404
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1405
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1406
                    skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1407
                    if (ch == '=') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1408
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1409
                        skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1410
                        att = elem.getAttribute(attname);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1411
                        attvalue = parseAttributeValue((att != null) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1412
                                                (att.type != CDATA) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1413
                                                (att.type != NOTATION));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1414
                    } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1415
                        attvalue = attname;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1416
                        att = elem.getAttributeByValue(attvalue);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1417
                        if (att == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1418
                            att = elem.getAttribute(attname);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1419
                            if (att != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1420
                                attvalue = att.getValue();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1421
                            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1422
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1423
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1424
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1425
                    char str[] = {(char)ch};
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1426
                    error("invalid.tagchar", new String(str), elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1427
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1428
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1429
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1430
            } else if (!strict && (attributes.isEmpty()) && (ch == '=')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1431
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1432
                skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1433
                attname = elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1434
                att = elem.getAttribute(attname);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1435
                attvalue = parseAttributeValue((att != null) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1436
                                               (att.type != CDATA) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1437
                                               (att.type != NOTATION));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1438
            } else if (!strict && (ch == '=')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1439
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1440
                skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1441
                attvalue = parseAttributeValue(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1442
                error("attvalerr");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1443
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1444
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1445
                char str[] = {(char)ch};
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1446
                error("invalid.tagchar", new String(str), elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1447
                if (!strict) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1448
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1449
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1450
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1451
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1452
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1453
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1454
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1455
            if (att != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1456
                attname = att.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1457
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1458
                error("invalid.tagatt", attname, elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1459
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1460
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1461
            // Check out the value
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1462
            if (attributes.isDefined(attname)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1463
                error("multi.tagatt", attname, elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1464
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1465
            if (attvalue == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1466
                attvalue = ((att != null) && (att.value != null)) ? att.value :
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1467
                    HTML.NULL_ATTRIBUTE_VALUE;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1468
            } else if ((att != null) && (att.values != null) && !att.values.contains(attvalue)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1469
                error("invalid.tagattval", attname, elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1470
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1471
            HTML.Attribute attkey = HTML.getAttributeKey(attname);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1472
            if (attkey == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1473
                attributes.addAttribute(attname, attvalue);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1474
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1475
                attributes.addAttribute(attkey, attvalue);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1476
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1477
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1478
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1479
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1480
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1481
     * Parses th Document Declaration Type markup declaration.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1482
     * Currently ignores it.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1483
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1484
    public String parseDTDMarkup() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1485
7014
eb4fcf73ee99 6432566: Replace usage of StringBuffer with StringBuilder in Swing
rupashka
parents: 5506
diff changeset
  1486
        StringBuilder strBuff = new StringBuilder();
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1487
        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1488
        while(true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1489
            switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1490
            case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1491
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1492
                return strBuff.toString();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1493
            case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1494
                error("invalid.markup");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1495
                return strBuff.toString();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1496
            case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1497
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1498
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1499
                lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1500
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1501
            case '"':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1502
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1503
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1504
            case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1505
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1506
                if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1507
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1508
                    crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1509
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1510
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1511
                    crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1512
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1513
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1514
            default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1515
                strBuff.append((char)(ch & 0xFF));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1516
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1517
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1518
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1519
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1520
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1521
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1522
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1523
     * Parse markup declarations.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1524
     * Currently only handles the Document Type Declaration markup.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1525
     * Returns true if it is a markup declaration false otherwise.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1526
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1527
    protected boolean parseMarkupDeclarations(StringBuffer strBuff) throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1528
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1529
        /* Currently handles only the DOCTYPE */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1530
        if ((strBuff.length() == "DOCTYPE".length()) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1531
            (strBuff.toString().toUpperCase().equals("DOCTYPE"))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1532
            parseDTDMarkup();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1533
            return true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1534
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1535
        return false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1536
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1537
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1538
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1539
     * Parse an invalid tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1540
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1541
    void parseInvalidTag() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1542
        // ignore all data upto the close bracket '>'
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1543
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1544
            skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1545
            switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1546
              case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1547
              case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1548
                  ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1549
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1550
              case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1551
                  return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1552
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1553
                  ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1554
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1555
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1556
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1557
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1558
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1559
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1560
     * Parse a start or end tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1561
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1562
    void parseTag() throws IOException {
1287
a04aca99c77a 6722802: Code improvement and warnings removing from the javax.swing.text package
rupashka
parents: 438
diff changeset
  1563
        Element elem;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1564
        boolean net = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1565
        boolean warned = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1566
        boolean unknown = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1567
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1568
        switch (ch = readCh()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1569
          case '!':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1570
            switch (ch = readCh()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1571
              case '-':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1572
                // Parse comment. [92] 391:7
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1573
                while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1574
                    if (ch == '-') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1575
                        if (!strict || ((ch = readCh()) == '-')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1576
                            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1577
                            if (!strict && ch == '-') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1578
                                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1579
                            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1580
                            // send over any text you might see
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1581
                            // before parsing and sending the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1582
                            // comment
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1583
                            if (textpos != 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1584
                                char newtext[] = new char[textpos];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1585
                                System.arraycopy(text, 0, newtext, 0, textpos);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1586
                                handleText(newtext);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1587
                                lastBlockStartPos = currentBlockStartPos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1588
                                textpos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1589
                            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1590
                            parseComment();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1591
                            last = makeTag(dtd.getElement("comment"), true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1592
                            handleComment(getChars(0));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1593
                            continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1594
                        } else if (!warned) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1595
                            warned = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1596
                            error("invalid.commentchar", "-");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1597
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1598
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1599
                    skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1600
                    switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1601
                      case '-':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1602
                        continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1603
                      case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1604
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1605
                      case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1606
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1607
                      default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1608
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1609
                        if (!warned) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1610
                            warned = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1611
                            error("invalid.commentchar",
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1612
                                  String.valueOf((char)ch));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1613
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1614
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1615
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1616
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1617
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1618
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1619
                // deal with marked sections
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1620
                StringBuffer strBuff = new StringBuffer();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1621
                while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1622
                    strBuff.append((char)ch);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1623
                    if (parseMarkupDeclarations(strBuff)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1624
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1625
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1626
                    switch(ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1627
                      case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1628
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1629
                      case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1630
                        error("invalid.markup");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1631
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1632
                      case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1633
                        ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1634
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1635
                        lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1636
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1637
                      case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1638
                        ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1639
                        if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1640
                            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1641
                            crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1642
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1643
                        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1644
                            crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1645
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1646
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1647
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1648
                      default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1649
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1650
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1651
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1652
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1653
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1654
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1655
          case '/':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1656
            // parse end tag [19] 317:4
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1657
            switch (ch = readCh()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1658
              case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1659
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1660
              case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1661
                // empty end tag. either </> or </<
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1662
                if (recent == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1663
                    error("invalid.shortend");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1664
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1665
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1666
                elem = recent;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1667
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1668
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1669
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1670
                if (!parseIdentifier(true)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1671
                    error("expected.endtagname");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1672
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1673
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1674
                skipSpace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1675
                switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1676
                  case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1677
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1678
                  case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1679
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1680
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1681
                  default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1682
                    error("expected", "'>'");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1683
                    while ((ch != -1) && (ch != '\n') && (ch != '>')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1684
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1685
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1686
                    if (ch == '>') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1687
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1688
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1689
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1690
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1691
                String elemStr = getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1692
                if (!dtd.elementExists(elemStr)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1693
                    error("end.unrecognized", elemStr);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1694
                    // Ignore RE before end tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1695
                    if ((textpos > 0) && (text[textpos-1] == '\n')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1696
                        textpos--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1697
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1698
                    elem = dtd.getElement("unknown");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1699
                    elem.name = elemStr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1700
                    unknown = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1701
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1702
                    elem = dtd.getElement(elemStr);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1703
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1704
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1705
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1706
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1707
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1708
            // If the stack is null, we're seeing end tags without any begin
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1709
            // tags.  Ignore them.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1710
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1711
            if (stack == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1712
                error("end.extra.tag", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1713
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1714
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1715
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1716
            // Ignore RE before end tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1717
            if ((textpos > 0) && (text[textpos-1] == '\n')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1718
                // In a pre tag, if there are blank lines
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1719
                // we do not want to remove the newline
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1720
                // before the end tag.  Hence this code.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1721
                //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1722
                if (stack.pre) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1723
                    if ((textpos > 1) && (text[textpos-2] != '\n')) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1724
                        textpos--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1725
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1726
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1727
                    textpos--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1728
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1729
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1730
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1731
            // If the end tag is a form, since we did not put it
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1732
            // on the tag stack, there is no corresponding start
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1733
            // start tag to find. Hence do not touch the tag stack.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1734
            //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1735
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1736
            /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1737
            if (!strict && elem.getName().equals("form")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1738
                if (lastFormSent != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1739
                    handleEndTag(lastFormSent);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1740
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1741
                } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1742
                    // do nothing.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1743
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1744
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1745
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1746
            */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1747
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1748
            if (unknown) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1749
                // we will not see a corresponding start tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1750
                // on the the stack.  If we are seeing an
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1751
                // end tag, lets send this on as an empty
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1752
                // tag with the end tag attribute set to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1753
                // true.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1754
                TagElement t = makeTag(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1755
                handleText(t);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1756
                attributes.addAttribute(HTML.Attribute.ENDTAG, "true");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1757
                handleEmptyTag(makeTag(elem));
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1758
                unknown = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1759
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1760
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1761
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1762
            // find the corresponding start tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1763
21278
ef8a3a2a72f2 8022746: List of spelling errors in API doc
malenkov
parents: 20169
diff changeset
  1764
            // A commonly occurring error appears to be the insertion
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1765
            // of extra end tags in a table.  The intent here is ignore
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1766
            // such extra end tags.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1767
            //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1768
            if (!strict) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1769
                String stackElem = stack.elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1770
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1771
                if (stackElem.equals("table")) {
21278
ef8a3a2a72f2 8022746: List of spelling errors in API doc
malenkov
parents: 20169
diff changeset
  1772
                    // If it is not a valid end tag ignore it and return
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1773
                    //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1774
                    if (!elem.getName().equals(stackElem)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1775
                        error("tag.ignore", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1776
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1777
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1778
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1779
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1780
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1781
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1782
                if (stackElem.equals("tr") ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1783
                    stackElem.equals("td")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1784
                    if ((!elem.getName().equals("table")) &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1785
                        (!elem.getName().equals(stackElem))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1786
                        error("tag.ignore", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1787
                        return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1788
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1789
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1790
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1791
            TagStack sp = stack;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1792
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1793
            while ((sp != null) && (elem != sp.elem)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1794
                sp = sp.next;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1795
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1796
            if (sp == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1797
                error("unmatched.endtag", elem.getName());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1798
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1799
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1800
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1801
            // People put font ending tags in the darndest places.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1802
            // Don't close other contexts based on them being between
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1803
            // a font tag and the corresponding end tag.  Instead,
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1804
            // ignore the end tag like it doesn't exist and allow the end
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1805
            // of the document to close us out.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1806
            String elemName = elem.getName();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1807
            if (stack != sp &&
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1808
                (elemName.equals("font") ||
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1809
                 elemName.equals("center"))) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1810
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1811
                // Since closing out a center tag can have real wierd
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1812
                // effects on the formatting,  make sure that tags
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1813
                // for which omitting an end tag is legimitate
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1814
                // get closed out.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1815
                //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1816
                if (elemName.equals("center")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1817
                    while(stack.elem.omitEnd() && stack != sp) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1818
                        endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1819
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1820
                    if (stack.elem == elem) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1821
                        endTag(false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1822
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1823
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1824
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1825
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1826
            // People do the same thing with center tags.  In this
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1827
            // case we would like to close off the center tag but
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1828
            // not necessarily all enclosing tags.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1829
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1830
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1831
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1832
            // end tags
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1833
            while (stack != sp) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1834
                endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1835
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1836
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1837
            endTag(false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1838
            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1839
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1840
          case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1841
            error("eof");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1842
            return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1843
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1844
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1845
        // start tag [14] 314:1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1846
        if (!parseIdentifier(true)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1847
            elem = recent;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1848
            if ((ch != '>') || (elem == null)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1849
                error("expected.tagname");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1850
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1851
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1852
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1853
            String elemStr = getString(0);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1854
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1855
            if (elemStr.equals("image")) {
438
2ae294e4518c 6613529: Avoid duplicate object creation within JDK packages
dav
parents: 2
diff changeset
  1856
                elemStr = "img";
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1857
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1858
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1859
            /* determine if this element is part of the dtd. */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1860
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1861
            if (!dtd.elementExists(elemStr)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1862
                //              parseInvalidTag();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1863
                error("tag.unrecognized ", elemStr);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1864
                elem = dtd.getElement("unknown");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1865
                elem.name = elemStr;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1866
                unknown = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1867
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1868
                elem = dtd.getElement(elemStr);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1869
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1870
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1871
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1872
        // Parse attributes
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1873
        parseAttributeSpecificationList(elem);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1874
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1875
        switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1876
          case '/':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1877
            net = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1878
          case '>':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1879
            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1880
            if (ch == '>' && net) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1881
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1882
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1883
          case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1884
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1885
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1886
          default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1887
            error("expected", "'>'");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1888
            break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1889
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1890
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1891
        if (!strict) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1892
          if (elem.getName().equals("script")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1893
            error("javascript.unsupported");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1894
          }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1895
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1896
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1897
        // ignore RE after start tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1898
        //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1899
        if (!elem.isEmpty())  {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1900
            if (ch == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1901
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1902
                lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1903
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1904
            } else if (ch == '\r') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1905
                ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1906
                if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1907
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1908
                    crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1909
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1910
                else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1911
                    crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1912
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1913
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1914
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1915
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1916
        // ensure a legal context for the tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1917
        TagElement tag = makeTag(elem, false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1918
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1919
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1920
        /** In dealing with forms, we have decided to treat
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1921
            them as legal in any context.  Also, even though
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1922
            they do have a start and an end tag, we will
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1923
            not put this tag on the stack.  This is to deal
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1924
            several pages in the web oasis that choose to
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1925
            start and end forms in any possible location. **/
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1926
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1927
        /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1928
        if (!strict && elem.getName().equals("form")) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1929
            if (lastFormSent == null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1930
                lastFormSent = tag;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1931
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1932
                handleEndTag(lastFormSent);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1933
                lastFormSent = tag;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1934
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1935
        } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1936
        */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1937
            // Smlly, if a tag is unknown, we will apply
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1938
            // no legalTagContext logic to it.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1939
            //
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1940
            if (!unknown) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1941
                legalTagContext(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1942
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1943
                // If skip tag is true,  this implies that
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1944
                // the tag was illegal and that the error
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1945
                // recovery strategy adopted is to ignore
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1946
                // the tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1947
                if (!strict && skipTag) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1948
                    skipTag = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1949
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1950
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1951
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1952
            /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1953
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1954
            */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1955
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1956
        startTag(tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1957
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1958
        if (!elem.isEmpty()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1959
            switch (elem.getType()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1960
              case CDATA:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1961
                parseLiteral(false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1962
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1963
              case RCDATA:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1964
                parseLiteral(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1965
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1966
              default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1967
                if (stack != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1968
                    stack.net = net;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1969
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1970
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1971
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1972
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1973
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1974
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1975
    private static final String START_COMMENT = "<!--";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1976
    private static final String END_COMMENT = "-->";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1977
    private static final char[] SCRIPT_END_TAG = "</script>".toCharArray();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1978
    private static final char[] SCRIPT_END_TAG_UPPER_CASE =
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1979
                                        "</SCRIPT>".toCharArray();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1980
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1981
    void parseScript() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1982
        char[] charsToAdd = new char[SCRIPT_END_TAG.length];
17678
ec24ad8455ec 7011777: JDK 6 parses html text with script tags within comments differently from previous releases
mcherkas
parents: 14309
diff changeset
  1983
        boolean insideComment = false;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1984
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1985
        /* Here, ch should be the first character after <script> */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1986
        while (true) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1987
            int i = 0;
17678
ec24ad8455ec 7011777: JDK 6 parses html text with script tags within comments differently from previous releases
mcherkas
parents: 14309
diff changeset
  1988
            while (!insideComment && i < SCRIPT_END_TAG.length
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1989
                       && (SCRIPT_END_TAG[i] == ch
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1990
                           || SCRIPT_END_TAG_UPPER_CASE[i] == ch)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1991
                charsToAdd[i] = (char) ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1992
                ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1993
                i++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1994
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1995
            if (i == SCRIPT_END_TAG.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1996
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1997
                /*  '</script>' tag detected */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1998
                /* Here, ch == the first character after </script> */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  1999
                return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2000
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2001
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2002
                /* To account for extra read()'s that happened */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2003
                for (int j = 0; j < i; j++) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2004
                    addString(charsToAdd[j]);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2005
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2006
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2007
                switch (ch) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2008
                case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2009
                    error("eof.script");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2010
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2011
                case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2012
                    ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2013
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2014
                    lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2015
                    addString('\n');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2016
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2017
                case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2018
                    ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2019
                    if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2020
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2021
                        crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2022
                    } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2023
                        crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2024
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2025
                    addString('\n');
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2026
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2027
                default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2028
                    addString(ch);
17678
ec24ad8455ec 7011777: JDK 6 parses html text with script tags within comments differently from previous releases
mcherkas
parents: 14309
diff changeset
  2029
                    String str = new String(getChars(0, strpos));
ec24ad8455ec 7011777: JDK 6 parses html text with script tags within comments differently from previous releases
mcherkas
parents: 14309
diff changeset
  2030
                    if (!insideComment && str.endsWith(START_COMMENT)) {
ec24ad8455ec 7011777: JDK 6 parses html text with script tags within comments differently from previous releases
mcherkas
parents: 14309
diff changeset
  2031
                        insideComment = true;
ec24ad8455ec 7011777: JDK 6 parses html text with script tags within comments differently from previous releases
mcherkas
parents: 14309
diff changeset
  2032
                    }
ec24ad8455ec 7011777: JDK 6 parses html text with script tags within comments differently from previous releases
mcherkas
parents: 14309
diff changeset
  2033
                    if (insideComment && str.endsWith(END_COMMENT)) {
ec24ad8455ec 7011777: JDK 6 parses html text with script tags within comments differently from previous releases
mcherkas
parents: 14309
diff changeset
  2034
                        insideComment = false;
ec24ad8455ec 7011777: JDK 6 parses html text with script tags within comments differently from previous releases
mcherkas
parents: 14309
diff changeset
  2035
                    }
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2036
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2037
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2038
                } // switch
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2039
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2040
        } // while
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2041
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2042
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2043
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2044
     * Parse Content. [24] 320:1
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2045
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2046
    void parseContent() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2047
        Thread curThread = Thread.currentThread();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2048
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2049
        for (;;) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2050
            if (curThread.isInterrupted()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2051
                curThread.interrupt(); // resignal the interrupt
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2052
                break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2053
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2054
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2055
            int c = ch;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2056
            currentBlockStartPos = currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2057
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2058
            if (recent == dtd.script) { // means: if after starting <script> tag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2059
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2060
                /* Here, ch has to be the first character after <script> */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2061
                parseScript();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2062
                last = makeTag(dtd.getElement("comment"), true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2063
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2064
                /* Remove leading and trailing HTML comment declarations */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2065
                String str = new String(getChars(0)).trim();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2066
                int minLength = START_COMMENT.length() + END_COMMENT.length();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2067
                if (str.startsWith(START_COMMENT) && str.endsWith(END_COMMENT)
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2068
                       && str.length() >= (minLength)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2069
                    str = str.substring(START_COMMENT.length(),
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2070
                                      str.length() - END_COMMENT.length());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2071
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2072
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2073
                /* Handle resulting chars as comment */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2074
                handleComment(str.toCharArray());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2075
                endTag(false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2076
                lastBlockStartPos = currentPosition;
12999
d0cec5582bd7 7165725: JAVA6 HTML PARSER CANNOT PARSE MULTIPLE SCRIPT TAGS IN A LINE CORRECTLY
rupashka
parents: 9213
diff changeset
  2077
d0cec5582bd7 7165725: JAVA6 HTML PARSER CANNOT PARSE MULTIPLE SCRIPT TAGS IN A LINE CORRECTLY
rupashka
parents: 9213
diff changeset
  2078
                continue;
2
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2079
            } else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2080
                switch (c) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2081
                  case '<':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2082
                    parseTag();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2083
                    lastBlockStartPos = currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2084
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2085
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2086
                  case '/':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2087
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2088
                    if ((stack != null) && stack.net) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2089
                        // null end tag.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2090
                        endTag(false);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2091
                        continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2092
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2093
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2094
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2095
                  case -1:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2096
                    return;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2097
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2098
                  case '&':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2099
                    if (textpos == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2100
                        if (!legalElementContext(dtd.pcdata)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2101
                            error("unexpected.pcdata");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2102
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2103
                        if (last.breaksFlow()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2104
                            space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2105
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2106
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2107
                    char data[] = parseEntityReference();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2108
                    if (textpos + data.length + 1 > text.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2109
                        char newtext[] = new char[Math.max(textpos + data.length + 128, text.length * 2)];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2110
                        System.arraycopy(text, 0, newtext, 0, text.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2111
                        text = newtext;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2112
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2113
                    if (space) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2114
                        space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2115
                        text[textpos++] = ' ';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2116
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2117
                    System.arraycopy(data, 0, text, textpos, data.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2118
                    textpos += data.length;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2119
                    ignoreSpace = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2120
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2121
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2122
                  case '\n':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2123
                    ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2124
                    lfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2125
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2126
                    if ((stack != null) && stack.pre) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2127
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2128
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2129
                    if (textpos == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2130
                        lastBlockStartPos = currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2131
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2132
                    if (!ignoreSpace) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2133
                        space = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2134
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2135
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2136
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2137
                  case '\r':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2138
                    ln++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2139
                    c = '\n';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2140
                    if ((ch = readCh()) == '\n') {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2141
                        ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2142
                        crlfCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2143
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2144
                    else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2145
                        crCount++;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2146
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2147
                    if ((stack != null) && stack.pre) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2148
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2149
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2150
                    if (textpos == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2151
                        lastBlockStartPos = currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2152
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2153
                    if (!ignoreSpace) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2154
                        space = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2155
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2156
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2157
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2158
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2159
                  case '\t':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2160
                  case ' ':
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2161
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2162
                    if ((stack != null) && stack.pre) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2163
                        break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2164
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2165
                    if (textpos == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2166
                        lastBlockStartPos = currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2167
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2168
                    if (!ignoreSpace) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2169
                        space = true;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2170
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2171
                    continue;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2172
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2173
                  default:
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2174
                    if (textpos == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2175
                        if (!legalElementContext(dtd.pcdata)) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2176
                            error("unexpected.pcdata");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2177
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2178
                        if (last.breaksFlow()) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2179
                            space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2180
                        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2181
                    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2182
                    ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2183
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2184
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2185
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2186
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2187
            // enlarge buffer if needed
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2188
            if (textpos + 2 > text.length) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2189
                char newtext[] = new char[text.length + 128];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2190
                System.arraycopy(text, 0, newtext, 0, text.length);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2191
                text = newtext;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2192
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2193
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2194
            // output pending space
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2195
            if (space) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2196
                if (textpos == 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2197
                    lastBlockStartPos--;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2198
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2199
                text[textpos++] = ' ';
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2200
                space = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2201
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2202
            text[textpos++] = (char)c;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2203
            ignoreSpace = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2204
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2205
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2206
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2207
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2208
     * Returns the end of line string. This will return the end of line
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2209
     * string that has been encountered the most, one of \r, \n or \r\n.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2210
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2211
    String getEndOfLineString() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2212
        if (crlfCount >= crCount) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2213
            if (lfCount >= crlfCount) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2214
                return "\n";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2215
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2216
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2217
                return "\r\n";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2218
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2219
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2220
        else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2221
            if (crCount > lfCount) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2222
                return "\r";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2223
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2224
            else {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2225
                return "\n";
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2226
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2227
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2228
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2229
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2230
    /**
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2231
     * Parse an HTML stream, given a DTD.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2232
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2233
    public synchronized void parse(Reader in) throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2234
        this.in = in;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2235
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2236
        this.ln = 1;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2237
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2238
        seenHtml = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2239
        seenHead = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2240
        seenBody = false;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2241
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2242
        crCount = lfCount = crlfCount = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2243
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2244
        try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2245
            ch = readCh();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2246
            text = new char[1024];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2247
            str = new char[128];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2248
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2249
            parseContent();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2250
            // NOTE: interruption may have occurred.  Control flows out
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2251
            // of here normally.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2252
            while (stack != null) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2253
                endTag(true);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2254
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2255
            in.close();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2256
        } catch (IOException e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2257
            errorContext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2258
            error("ioexception");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2259
            throw e;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2260
        } catch (Exception e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2261
            errorContext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2262
            error("exception", e.getClass().getName(), e.getMessage());
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2263
            e.printStackTrace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2264
        } catch (ThreadDeath e) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2265
            errorContext();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2266
            error("terminated");
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2267
            e.printStackTrace();
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2268
            throw e;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2269
        } finally {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2270
            for (; stack != null ; stack = stack.next) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2271
                handleEndTag(stack.tag);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2272
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2273
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2274
            text = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2275
            str = null;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2276
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2277
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2278
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2279
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2280
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2281
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2282
     * Input cache.  This is much faster than calling down to a synchronized
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2283
     * method of BufferedReader for each byte.  Measurements done 5/30/97
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2284
     * show that there's no point in having a bigger buffer:  Increasing
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2285
     * the buffer to 8192 had no measurable impact for a program discarding
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2286
     * one character at a time (reading from an http URL to a local machine).
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2287
     * NOTE: If the current encoding is bogus, and we read too much
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2288
     * (past the content-type) we may suffer a MalformedInputException. For
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2289
     * this reason the initial size is 1 and when the body is encountered the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2290
     * size is adjusted to 256.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2291
     */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2292
    private char buf[] = new char[1];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2293
    private int pos;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2294
    private int len;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2295
    /*
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2296
        tracks position relative to the beginning of the
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2297
        document.
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2298
    */
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2299
    private int currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2300
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2301
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2302
    private final int readCh() throws IOException {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2303
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2304
        if (pos >= len) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2305
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2306
            // This loop allows us to ignore interrupts if the flag
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2307
            // says so
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2308
            for (;;) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2309
                try {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2310
                    len = in.read(buf);
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2311
                    break;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2312
                } catch (InterruptedIOException ex) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2313
                    throw ex;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2314
                }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2315
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2316
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2317
            if (len <= 0) {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2318
                return -1;      // eof
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2319
            }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2320
            pos = 0;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2321
        }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2322
        ++currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2323
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2324
        return buf[pos++];
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2325
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2326
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2327
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2328
    protected int getCurrentPos() {
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2329
        return currentPosition;
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2330
    }
90ce3da70b43 Initial load
duke
parents:
diff changeset
  2331
}