author | yan |
Fri, 16 May 2014 17:41:47 +0400 | |
changeset 24494 | 67129b9360d2 |
parent 23010 | 6dadb192ad81 |
child 25147 | fd9451d440ff |
permissions | -rw-r--r-- |
2 | 1 |
/* |
23010
6dadb192ad81
8029235: Update copyright year to match last edit in jdk8 jdk repository for 2013
lana
parents:
21278
diff
changeset
|
2 |
* Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. |
2 | 3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
5506 | 7 |
* published by the Free Software Foundation. Oracle designates this |
2 | 8 |
* particular file as subject to the "Classpath" exception as provided |
5506 | 9 |
* by Oracle in the LICENSE file that accompanied this code. |
2 | 10 |
* |
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
5506 | 21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
2 | 24 |
*/ |
25 |
||
26 |
package javax.swing.text.html.parser; |
|
27 |
||
28 |
import javax.swing.text.SimpleAttributeSet; |
|
29 |
import javax.swing.text.html.HTMLEditorKit; |
|
30 |
import javax.swing.text.html.HTML; |
|
31 |
import javax.swing.text.ChangedCharSetException; |
|
32 |
||
33 |
import java.util.*; |
|
34 |
import java.io.*; |
|
35 |
import java.net.*; |
|
36 |
||
37 |
/** |
|
38 |
* A Parser for HTML Documents (actually, you can specify a DTD, but |
|
39 |
* you should really only use this class with the html dtd in swing). |
|
40 |
* Reads an InputStream of HTML and |
|
41 |
* invokes the appropriate methods in the ParserCallback class. This |
|
42 |
* is the default parser used by HTMLEditorKit to parse HTML url's. |
|
43 |
* <p>This will message the callback for all valid tags, as well as |
|
44 |
* tags that are implied but not explicitly specified. For example, the |
|
45 |
* html string (<p>blah) only has a p tag defined. The callback |
|
46 |
* will see the following methods: |
|
47 |
* <ol><li><i>handleStartTag(html, ...)</i></li> |
|
48 |
* <li><i>handleStartTag(head, ...)</i></li> |
|
49 |
* <li><i>handleEndTag(head)</i></li> |
|
50 |
* <li><i>handleStartTag(body, ...)</i></li> |
|
20169 | 51 |
* <li><i>handleStartTag(p, ...)</i></li> |
52 |
* <li><i>handleText(...)</i></li> |
|
2 | 53 |
* <li><i>handleEndTag(p)</i></li> |
54 |
* <li><i>handleEndTag(body)</i></li> |
|
55 |
* <li><i>handleEndTag(html)</i></li> |
|
56 |
* </ol> |
|
57 |
* The items in <i>italic</i> are implied, that is, although they were not |
|
58 |
* explicitly specified, to be correct html they should have been present |
|
59 |
* (head isn't necessary, but it is still generated). For tags that |
|
60 |
* are implied, the AttributeSet argument will have a value of |
|
61 |
* <code>Boolean.TRUE</code> for the key |
|
62 |
* <code>HTMLEditorKit.ParserCallback.IMPLIED</code>. |
|
63 |
* <p>HTML.Attributes defines a type safe enumeration of html attributes. |
|
64 |
* If an attribute key of a tag is defined in HTML.Attribute, the |
|
65 |
* HTML.Attribute will be used as the key, otherwise a String will be used. |
|
66 |
* For example <p foo=bar class=neat> has two attributes. foo is |
|
67 |
* not defined in HTML.Attribute, where as class is, therefore the |
|
68 |
* AttributeSet will have two values in it, HTML.Attribute.CLASS with |
|
69 |
* a String value of 'neat' and the String key 'foo' with a String value of |
|
70 |
* 'bar'. |
|
71 |
* <p>The position argument will indicate the start of the tag, comment |
|
21278 | 72 |
* or text. Similar to arrays, the first character in the stream has a |
2 | 73 |
* position of 0. For tags that are |
74 |
* implied the position will indicate |
|
75 |
* the location of the next encountered tag. In the first example, |
|
76 |
* the implied start body and html tags will have the same position as the |
|
77 |
* p tag, and the implied end p, html and body tags will all have the same |
|
78 |
* position. |
|
79 |
* <p>As html skips whitespace the position for text will be the position |
|
80 |
* of the first valid character, eg in the string '\n\n\nblah' |
|
81 |
* the text 'blah' will have a position of 3, the newlines are skipped. |
|
82 |
* <p> |
|
83 |
* For attributes that do not have a value, eg in the html |
|
84 |
* string <code><foo blah></code> the attribute <code>blah</code> |
|
85 |
* does not have a value, there are two possible values that will be |
|
86 |
* placed in the AttributeSet's value: |
|
87 |
* <ul> |
|
88 |
* <li>If the DTD does not contain an definition for the element, or the |
|
89 |
* definition does not have an explicit value then the value in the |
|
90 |
* AttributeSet will be <code>HTML.NULL_ATTRIBUTE_VALUE</code>. |
|
91 |
* <li>If the DTD contains an explicit value, as in: |
|
92 |
* <code><!ATTLIST OPTION selected (selected) #IMPLIED></code> |
|
93 |
* this value from the dtd (in this case selected) will be used. |
|
94 |
* </ul> |
|
95 |
* <p> |
|
96 |
* Once the stream has been parsed, the callback is notified of the most |
|
97 |
* likely end of line string. The end of line string will be one of |
|
98 |
* \n, \r or \r\n, which ever is encountered the most in parsing the |
|
99 |
* stream. |
|
100 |
* |
|
101 |
* @author Sunita Mani |
|
102 |
*/ |
|
103 |
public class DocumentParser extends javax.swing.text.html.parser.Parser { |
|
104 |
||
105 |
private int inbody; |
|
106 |
private int intitle; |
|
107 |
private int inhead; |
|
108 |
private int instyle; |
|
109 |
private int inscript; |
|
110 |
private boolean seentitle; |
|
111 |
private HTMLEditorKit.ParserCallback callback = null; |
|
112 |
private boolean ignoreCharSet = false; |
|
113 |
private static final boolean debugFlag = false; |
|
114 |
||
115 |
public DocumentParser(DTD dtd) { |
|
116 |
super(dtd); |
|
117 |
} |
|
118 |
||
119 |
public void parse(Reader in, HTMLEditorKit.ParserCallback callback, boolean ignoreCharSet) throws IOException { |
|
120 |
this.ignoreCharSet = ignoreCharSet; |
|
121 |
this.callback = callback; |
|
122 |
parse(in); |
|
123 |
// end of line |
|
124 |
callback.handleEndOfLineString(getEndOfLineString()); |
|
125 |
} |
|
126 |
||
127 |
/** |
|
128 |
* Handle Start Tag. |
|
129 |
*/ |
|
130 |
protected void handleStartTag(TagElement tag) { |
|
131 |
||
132 |
Element elem = tag.getElement(); |
|
133 |
if (elem == dtd.body) { |
|
134 |
inbody++; |
|
135 |
} else if (elem == dtd.html) { |
|
136 |
} else if (elem == dtd.head) { |
|
137 |
inhead++; |
|
138 |
} else if (elem == dtd.title) { |
|
139 |
intitle++; |
|
140 |
} else if (elem == dtd.style) { |
|
141 |
instyle++; |
|
142 |
} else if (elem == dtd.script) { |
|
143 |
inscript++; |
|
144 |
} |
|
145 |
if (debugFlag) { |
|
146 |
if (tag.fictional()) { |
|
147 |
debug("Start Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos()); |
|
148 |
} else { |
|
149 |
debug("Start Tag: " + tag.getHTMLTag() + " attributes: " + |
|
150 |
getAttributes() + " pos: " + getCurrentPos()); |
|
151 |
} |
|
152 |
} |
|
153 |
if (tag.fictional()) { |
|
154 |
SimpleAttributeSet attrs = new SimpleAttributeSet(); |
|
155 |
attrs.addAttribute(HTMLEditorKit.ParserCallback.IMPLIED, |
|
156 |
Boolean.TRUE); |
|
157 |
callback.handleStartTag(tag.getHTMLTag(), attrs, |
|
158 |
getBlockStartPosition()); |
|
159 |
} else { |
|
160 |
callback.handleStartTag(tag.getHTMLTag(), getAttributes(), |
|
161 |
getBlockStartPosition()); |
|
162 |
flushAttributes(); |
|
163 |
} |
|
164 |
} |
|
165 |
||
166 |
||
167 |
protected void handleComment(char text[]) { |
|
168 |
if (debugFlag) { |
|
169 |
debug("comment: ->" + new String(text) + "<-" |
|
170 |
+ " pos: " + getCurrentPos()); |
|
171 |
} |
|
172 |
callback.handleComment(text, getBlockStartPosition()); |
|
173 |
} |
|
174 |
||
175 |
/** |
|
176 |
* Handle Empty Tag. |
|
177 |
*/ |
|
178 |
protected void handleEmptyTag(TagElement tag) throws ChangedCharSetException { |
|
179 |
||
180 |
Element elem = tag.getElement(); |
|
181 |
if (elem == dtd.meta && !ignoreCharSet) { |
|
182 |
SimpleAttributeSet atts = getAttributes(); |
|
183 |
if (atts != null) { |
|
184 |
String content = (String)atts.getAttribute(HTML.Attribute.CONTENT); |
|
185 |
if (content != null) { |
|
186 |
if ("content-type".equalsIgnoreCase((String)atts.getAttribute(HTML.Attribute.HTTPEQUIV))) { |
|
187 |
if (!content.equalsIgnoreCase("text/html") && |
|
188 |
!content.equalsIgnoreCase("text/plain")) { |
|
189 |
throw new ChangedCharSetException(content, false); |
|
190 |
} |
|
191 |
} else if ("charset" .equalsIgnoreCase((String)atts.getAttribute(HTML.Attribute.HTTPEQUIV))) { |
|
192 |
throw new ChangedCharSetException(content, true); |
|
193 |
} |
|
194 |
} |
|
195 |
} |
|
196 |
} |
|
197 |
if (inbody != 0 || elem == dtd.meta || elem == dtd.base || elem == dtd.isindex || elem == dtd.style || elem == dtd.link) { |
|
198 |
if (debugFlag) { |
|
199 |
if (tag.fictional()) { |
|
200 |
debug("Empty Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos()); |
|
201 |
} else { |
|
202 |
debug("Empty Tag: " + tag.getHTMLTag() + " attributes: " |
|
203 |
+ getAttributes() + " pos: " + getCurrentPos()); |
|
204 |
} |
|
205 |
} |
|
206 |
if (tag.fictional()) { |
|
207 |
SimpleAttributeSet attrs = new SimpleAttributeSet(); |
|
208 |
attrs.addAttribute(HTMLEditorKit.ParserCallback.IMPLIED, |
|
209 |
Boolean.TRUE); |
|
210 |
callback.handleSimpleTag(tag.getHTMLTag(), attrs, |
|
211 |
getBlockStartPosition()); |
|
212 |
} else { |
|
213 |
callback.handleSimpleTag(tag.getHTMLTag(), getAttributes(), |
|
214 |
getBlockStartPosition()); |
|
215 |
flushAttributes(); |
|
216 |
} |
|
217 |
} |
|
218 |
} |
|
219 |
||
220 |
/** |
|
221 |
* Handle End Tag. |
|
222 |
*/ |
|
223 |
protected void handleEndTag(TagElement tag) { |
|
224 |
Element elem = tag.getElement(); |
|
225 |
if (elem == dtd.body) { |
|
226 |
inbody--; |
|
227 |
} else if (elem == dtd.title) { |
|
228 |
intitle--; |
|
229 |
seentitle = true; |
|
230 |
} else if (elem == dtd.head) { |
|
231 |
inhead--; |
|
232 |
} else if (elem == dtd.style) { |
|
233 |
instyle--; |
|
234 |
} else if (elem == dtd.script) { |
|
235 |
inscript--; |
|
236 |
} |
|
237 |
if (debugFlag) { |
|
238 |
debug("End Tag: " + tag.getHTMLTag() + " pos: " + getCurrentPos()); |
|
239 |
} |
|
240 |
callback.handleEndTag(tag.getHTMLTag(), getBlockStartPosition()); |
|
241 |
||
242 |
} |
|
243 |
||
244 |
/** |
|
245 |
* Handle Text. |
|
246 |
*/ |
|
247 |
protected void handleText(char data[]) { |
|
248 |
if (data != null) { |
|
249 |
if (inscript != 0) { |
|
250 |
callback.handleComment(data, getBlockStartPosition()); |
|
251 |
return; |
|
252 |
} |
|
253 |
if (inbody != 0 || ((instyle != 0) || |
|
254 |
((intitle != 0) && !seentitle))) { |
|
255 |
if (debugFlag) { |
|
256 |
debug("text: ->" + new String(data) + "<-" + " pos: " + getCurrentPos()); |
|
257 |
} |
|
258 |
callback.handleText(data, getBlockStartPosition()); |
|
259 |
} |
|
260 |
} |
|
261 |
} |
|
262 |
||
263 |
/* |
|
264 |
* Error handling. |
|
265 |
*/ |
|
266 |
protected void handleError(int ln, String errorMsg) { |
|
267 |
if (debugFlag) { |
|
268 |
debug("Error: ->" + errorMsg + "<-" + " pos: " + getCurrentPos()); |
|
269 |
} |
|
270 |
/* PENDING: need to improve the error string. */ |
|
271 |
callback.handleError(errorMsg, getCurrentPos()); |
|
272 |
} |
|
273 |
||
274 |
||
275 |
/* |
|
276 |
* debug messages |
|
277 |
*/ |
|
278 |
private void debug(String msg) { |
|
279 |
System.out.println(msg); |
|
280 |
} |
|
281 |
} |