author | ksrini |
Fri, 09 Feb 2018 13:58:17 -0800 | |
changeset 48840 | 5e2d2067da48 |
parent 47216 | 71c04702a3d5 |
permissions | -rw-r--r-- |
43261 | 1 |
/* |
2 |
* Copyright (c) 2012,2016, Oracle and/or its affiliates. All rights reserved. |
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
7 |
* published by the Free Software Foundation. Oracle designates this |
|
8 |
* particular file as subject to the "Classpath" exception as provided |
|
9 |
* by Oracle in the LICENSE file that accompanied this code. |
|
10 |
* |
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
15 |
* accompanied this code). |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU General Public License version |
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
* |
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 |
* or visit www.oracle.com if you need additional information or have any |
|
23 |
* questions. |
|
24 |
*/ |
|
25 |
||
26 |
package com.sun.tools.javadoc.main; |
|
27 |
||
28 |
import java.util.Arrays; |
|
29 |
import java.util.HashMap; |
|
30 |
import java.util.HashSet; |
|
31 |
import java.util.Locale; |
|
32 |
import java.util.Map; |
|
33 |
import java.util.Set; |
|
34 |
||
35 |
import com.sun.tools.javadoc.main.JavaScriptScanner.TagParser.Kind; |
|
36 |
||
37 |
import static com.sun.tools.javac.util.LayoutCharacters.EOI; |
|
38 |
||
39 |
/** |
|
40 |
* Parser to detect use of JavaScript in documentation comments. |
|
41 |
*/ |
|
48840
5e2d2067da48
8194651: javadoc: mark the com.sun.javadoc API for removal
ksrini
parents:
47216
diff
changeset
|
42 |
@Deprecated(since="9", forRemoval=true) |
5e2d2067da48
8194651: javadoc: mark the com.sun.javadoc API for removal
ksrini
parents:
47216
diff
changeset
|
43 |
@SuppressWarnings("removal") |
43261 | 44 |
public class JavaScriptScanner { |
45 |
public static interface Reporter { |
|
46 |
void report(); |
|
47 |
} |
|
48 |
||
49 |
static class ParseException extends Exception { |
|
50 |
private static final long serialVersionUID = 0; |
|
51 |
ParseException(String key) { |
|
52 |
super(key); |
|
53 |
} |
|
54 |
} |
|
55 |
||
56 |
private Reporter reporter; |
|
57 |
||
58 |
/** The input buffer, index of most recent character read, |
|
59 |
* index of one past last character in buffer. |
|
60 |
*/ |
|
61 |
protected char[] buf; |
|
62 |
protected int bp; |
|
63 |
protected int buflen; |
|
64 |
||
65 |
/** The current character. |
|
66 |
*/ |
|
67 |
protected char ch; |
|
68 |
||
69 |
private boolean newline = true; |
|
70 |
||
71 |
Map<String, TagParser> tagParsers; |
|
72 |
Set<String> eventAttrs; |
|
73 |
Set<String> uriAttrs; |
|
74 |
||
75 |
public JavaScriptScanner() { |
|
76 |
initTagParsers(); |
|
77 |
initEventAttrs(); |
|
78 |
initURIAttrs(); |
|
79 |
} |
|
80 |
||
81 |
public void parse(String comment, Reporter r) { |
|
82 |
reporter = r; |
|
83 |
String c = comment; |
|
84 |
buf = new char[c.length() + 1]; |
|
85 |
c.getChars(0, c.length(), buf, 0); |
|
86 |
buf[buf.length - 1] = EOI; |
|
87 |
buflen = buf.length - 1; |
|
88 |
bp = -1; |
|
89 |
newline = true; |
|
90 |
nextChar(); |
|
91 |
||
92 |
blockContent(); |
|
93 |
blockTags(); |
|
94 |
} |
|
95 |
||
96 |
private void checkHtmlTag(String tag) { |
|
97 |
if (tag.equalsIgnoreCase("script")) { |
|
98 |
reporter.report(); |
|
99 |
} |
|
100 |
} |
|
101 |
||
102 |
private void checkHtmlAttr(String name, String value) { |
|
103 |
String n = name.toLowerCase(Locale.ENGLISH); |
|
104 |
if (eventAttrs.contains(n) |
|
105 |
|| uriAttrs.contains(n) |
|
106 |
&& value != null && value.toLowerCase(Locale.ENGLISH).trim().startsWith("javascript:")) { |
|
107 |
reporter.report(); |
|
108 |
} |
|
109 |
} |
|
110 |
||
111 |
void nextChar() { |
|
112 |
ch = buf[bp < buflen ? ++bp : buflen]; |
|
113 |
switch (ch) { |
|
114 |
case '\f': case '\n': case '\r': |
|
115 |
newline = true; |
|
116 |
} |
|
117 |
} |
|
118 |
||
119 |
/** |
|
120 |
* Read block content, consisting of text, html and inline tags. |
|
121 |
* Terminated by the end of input, or the beginning of the next block tag: |
|
122 |
* i.e. @ as the first non-whitespace character on a line. |
|
123 |
*/ |
|
124 |
@SuppressWarnings("fallthrough") |
|
125 |
protected void blockContent() { |
|
126 |
||
127 |
loop: |
|
128 |
while (bp < buflen) { |
|
129 |
switch (ch) { |
|
130 |
case '\n': case '\r': case '\f': |
|
131 |
newline = true; |
|
132 |
// fallthrough |
|
133 |
||
134 |
case ' ': case '\t': |
|
135 |
nextChar(); |
|
136 |
break; |
|
137 |
||
138 |
case '&': |
|
139 |
entity(null); |
|
140 |
break; |
|
141 |
||
142 |
case '<': |
|
143 |
html(); |
|
144 |
break; |
|
145 |
||
146 |
case '>': |
|
147 |
newline = false; |
|
148 |
nextChar(); |
|
149 |
break; |
|
150 |
||
151 |
case '{': |
|
152 |
inlineTag(null); |
|
153 |
break; |
|
154 |
||
155 |
case '@': |
|
156 |
if (newline) { |
|
157 |
break loop; |
|
158 |
} |
|
159 |
// fallthrough |
|
160 |
||
161 |
default: |
|
162 |
newline = false; |
|
163 |
nextChar(); |
|
164 |
} |
|
165 |
} |
|
166 |
} |
|
167 |
||
168 |
/** |
|
169 |
* Read a series of block tags, including their content. |
|
170 |
* Standard tags parse their content appropriately. |
|
171 |
* Non-standard tags are represented by {@link UnknownBlockTag}. |
|
172 |
*/ |
|
173 |
protected void blockTags() { |
|
174 |
while (ch == '@') |
|
175 |
blockTag(); |
|
176 |
} |
|
177 |
||
178 |
/** |
|
179 |
* Read a single block tag, including its content. |
|
180 |
* Standard tags parse their content appropriately. |
|
181 |
* Non-standard tags are represented by {@link UnknownBlockTag}. |
|
182 |
*/ |
|
183 |
protected void blockTag() { |
|
184 |
int p = bp; |
|
185 |
try { |
|
186 |
nextChar(); |
|
187 |
if (isIdentifierStart(ch)) { |
|
188 |
String name = readTagName(); |
|
189 |
TagParser tp = tagParsers.get(name); |
|
190 |
if (tp == null) { |
|
191 |
blockContent(); |
|
192 |
} else { |
|
193 |
switch (tp.getKind()) { |
|
194 |
case BLOCK: |
|
195 |
tp.parse(p); |
|
196 |
return; |
|
197 |
case INLINE: |
|
198 |
return; |
|
199 |
} |
|
200 |
} |
|
201 |
} |
|
202 |
blockContent(); |
|
203 |
} catch (ParseException e) { |
|
204 |
blockContent(); |
|
205 |
} |
|
206 |
} |
|
207 |
||
208 |
protected void inlineTag(Void list) { |
|
209 |
newline = false; |
|
210 |
nextChar(); |
|
211 |
if (ch == '@') { |
|
212 |
inlineTag(); |
|
213 |
} |
|
214 |
} |
|
215 |
||
216 |
/** |
|
217 |
* Read a single inline tag, including its content. |
|
218 |
* Standard tags parse their content appropriately. |
|
219 |
* Non-standard tags are represented by {@link UnknownBlockTag}. |
|
220 |
* Malformed tags may be returned as {@link Erroneous}. |
|
221 |
*/ |
|
222 |
protected void inlineTag() { |
|
223 |
int p = bp - 1; |
|
224 |
try { |
|
225 |
nextChar(); |
|
226 |
if (isIdentifierStart(ch)) { |
|
227 |
String name = readTagName(); |
|
228 |
TagParser tp = tagParsers.get(name); |
|
229 |
||
230 |
if (tp == null) { |
|
231 |
skipWhitespace(); |
|
232 |
inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); |
|
233 |
nextChar(); |
|
234 |
} else { |
|
235 |
skipWhitespace(); |
|
236 |
if (tp.getKind() == TagParser.Kind.INLINE) { |
|
237 |
tp.parse(p); |
|
238 |
} else { // handle block tags (ex: @see) in inline content |
|
239 |
inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip content |
|
240 |
nextChar(); |
|
241 |
} |
|
242 |
} |
|
243 |
} |
|
244 |
} catch (ParseException e) { |
|
245 |
} |
|
246 |
} |
|
247 |
||
248 |
private static enum WhitespaceRetentionPolicy { |
|
249 |
RETAIN_ALL, |
|
250 |
REMOVE_FIRST_SPACE, |
|
251 |
REMOVE_ALL |
|
252 |
} |
|
253 |
||
254 |
/** |
|
255 |
* Read plain text content of an inline tag. |
|
256 |
* Matching pairs of { } are skipped; the text is terminated by the first |
|
257 |
* unmatched }. It is an error if the beginning of the next tag is detected. |
|
258 |
*/ |
|
259 |
private void inlineText(WhitespaceRetentionPolicy whitespacePolicy) throws ParseException { |
|
260 |
switch (whitespacePolicy) { |
|
261 |
case REMOVE_ALL: |
|
262 |
skipWhitespace(); |
|
263 |
break; |
|
264 |
case REMOVE_FIRST_SPACE: |
|
265 |
if (ch == ' ') |
|
266 |
nextChar(); |
|
267 |
break; |
|
268 |
case RETAIN_ALL: |
|
269 |
default: |
|
270 |
// do nothing |
|
271 |
break; |
|
272 |
||
273 |
} |
|
274 |
int pos = bp; |
|
275 |
int depth = 1; |
|
276 |
||
277 |
loop: |
|
278 |
while (bp < buflen) { |
|
279 |
switch (ch) { |
|
280 |
case '\n': case '\r': case '\f': |
|
281 |
newline = true; |
|
282 |
break; |
|
283 |
||
284 |
case ' ': case '\t': |
|
285 |
break; |
|
286 |
||
287 |
case '{': |
|
288 |
newline = false; |
|
289 |
depth++; |
|
290 |
break; |
|
291 |
||
292 |
case '}': |
|
293 |
if (--depth == 0) { |
|
294 |
return; |
|
295 |
} |
|
296 |
newline = false; |
|
297 |
break; |
|
298 |
||
299 |
case '@': |
|
300 |
if (newline) |
|
301 |
break loop; |
|
302 |
newline = false; |
|
303 |
break; |
|
304 |
||
305 |
default: |
|
306 |
newline = false; |
|
307 |
break; |
|
308 |
} |
|
309 |
nextChar(); |
|
310 |
} |
|
311 |
throw new ParseException("dc.unterminated.inline.tag"); |
|
312 |
} |
|
313 |
||
314 |
/** |
|
315 |
* Read Java class name, possibly followed by member |
|
316 |
* Matching pairs of {@literal < >} are skipped. The text is terminated by the first |
|
317 |
* unmatched }. It is an error if the beginning of the next tag is detected. |
|
318 |
*/ |
|
319 |
// TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE |
|
320 |
// TODO: improve quality of parse to forbid bad constructions. |
|
321 |
// TODO: update to use ReferenceParser |
|
322 |
@SuppressWarnings("fallthrough") |
|
323 |
protected void reference(boolean allowMember) throws ParseException { |
|
324 |
int pos = bp; |
|
325 |
int depth = 0; |
|
326 |
||
327 |
// scan to find the end of the signature, by looking for the first |
|
328 |
// whitespace not enclosed in () or <>, or the end of the tag |
|
329 |
loop: |
|
330 |
while (bp < buflen) { |
|
331 |
switch (ch) { |
|
332 |
case '\n': case '\r': case '\f': |
|
333 |
newline = true; |
|
334 |
// fallthrough |
|
335 |
||
336 |
case ' ': case '\t': |
|
337 |
if (depth == 0) |
|
338 |
break loop; |
|
339 |
break; |
|
340 |
||
341 |
case '(': |
|
342 |
case '<': |
|
343 |
newline = false; |
|
344 |
depth++; |
|
345 |
break; |
|
346 |
||
347 |
case ')': |
|
348 |
case '>': |
|
349 |
newline = false; |
|
350 |
--depth; |
|
351 |
break; |
|
352 |
||
353 |
case '}': |
|
354 |
if (bp == pos) |
|
355 |
return; |
|
356 |
newline = false; |
|
357 |
break loop; |
|
358 |
||
359 |
case '@': |
|
360 |
if (newline) |
|
361 |
break loop; |
|
362 |
// fallthrough |
|
363 |
||
364 |
default: |
|
365 |
newline = false; |
|
366 |
||
367 |
} |
|
368 |
nextChar(); |
|
369 |
} |
|
370 |
||
371 |
if (depth != 0) |
|
372 |
throw new ParseException("dc.unterminated.signature"); |
|
373 |
} |
|
374 |
||
375 |
/** |
|
376 |
* Read Java identifier |
|
377 |
* Matching pairs of { } are skipped; the text is terminated by the first |
|
378 |
* unmatched }. It is an error if the beginning of the next tag is detected. |
|
379 |
*/ |
|
380 |
@SuppressWarnings("fallthrough") |
|
381 |
protected void identifier() throws ParseException { |
|
382 |
skipWhitespace(); |
|
383 |
int pos = bp; |
|
384 |
||
385 |
if (isJavaIdentifierStart(ch)) { |
|
386 |
readJavaIdentifier(); |
|
387 |
return; |
|
388 |
} |
|
389 |
||
390 |
throw new ParseException("dc.identifier.expected"); |
|
391 |
} |
|
392 |
||
393 |
/** |
|
394 |
* Read a quoted string. |
|
395 |
* It is an error if the beginning of the next tag is detected. |
|
396 |
*/ |
|
397 |
@SuppressWarnings("fallthrough") |
|
398 |
protected void quotedString() { |
|
399 |
int pos = bp; |
|
400 |
nextChar(); |
|
401 |
||
402 |
loop: |
|
403 |
while (bp < buflen) { |
|
404 |
switch (ch) { |
|
405 |
case '\n': case '\r': case '\f': |
|
406 |
newline = true; |
|
407 |
break; |
|
408 |
||
409 |
case ' ': case '\t': |
|
410 |
break; |
|
411 |
||
412 |
case '"': |
|
413 |
nextChar(); |
|
414 |
// trim trailing white-space? |
|
415 |
return; |
|
416 |
||
417 |
case '@': |
|
418 |
if (newline) |
|
419 |
break loop; |
|
420 |
||
421 |
} |
|
422 |
nextChar(); |
|
423 |
} |
|
424 |
} |
|
425 |
||
426 |
/** |
|
427 |
* Read a term ie. one word. |
|
428 |
* It is an error if the beginning of the next tag is detected. |
|
429 |
*/ |
|
430 |
@SuppressWarnings("fallthrough") |
|
431 |
protected void inlineWord() { |
|
432 |
int pos = bp; |
|
433 |
int depth = 0; |
|
434 |
loop: |
|
435 |
while (bp < buflen) { |
|
436 |
switch (ch) { |
|
437 |
case '\n': |
|
438 |
newline = true; |
|
439 |
// fallthrough |
|
440 |
||
441 |
case '\r': case '\f': case ' ': case '\t': |
|
442 |
return; |
|
443 |
||
444 |
case '@': |
|
445 |
if (newline) |
|
446 |
break loop; |
|
447 |
||
448 |
case '{': |
|
449 |
depth++; |
|
450 |
break; |
|
451 |
||
452 |
case '}': |
|
453 |
if (depth == 0 || --depth == 0) |
|
454 |
return; |
|
455 |
break; |
|
456 |
} |
|
457 |
newline = false; |
|
458 |
nextChar(); |
|
459 |
} |
|
460 |
} |
|
461 |
||
462 |
/** |
|
463 |
* Read general text content of an inline tag, including HTML entities and elements. |
|
464 |
* Matching pairs of { } are skipped; the text is terminated by the first |
|
465 |
* unmatched }. It is an error if the beginning of the next tag is detected. |
|
466 |
*/ |
|
467 |
@SuppressWarnings("fallthrough") |
|
468 |
private void inlineContent() { |
|
469 |
||
470 |
skipWhitespace(); |
|
471 |
int pos = bp; |
|
472 |
int depth = 1; |
|
473 |
||
474 |
loop: |
|
475 |
while (bp < buflen) { |
|
476 |
||
477 |
switch (ch) { |
|
478 |
case '\n': case '\r': case '\f': |
|
479 |
newline = true; |
|
480 |
// fall through |
|
481 |
||
482 |
case ' ': case '\t': |
|
483 |
nextChar(); |
|
484 |
break; |
|
485 |
||
486 |
case '&': |
|
487 |
entity(null); |
|
488 |
break; |
|
489 |
||
490 |
case '<': |
|
491 |
newline = false; |
|
492 |
html(); |
|
493 |
break; |
|
494 |
||
495 |
case '{': |
|
496 |
newline = false; |
|
497 |
depth++; |
|
498 |
nextChar(); |
|
499 |
break; |
|
500 |
||
501 |
case '}': |
|
502 |
newline = false; |
|
503 |
if (--depth == 0) { |
|
504 |
nextChar(); |
|
505 |
return; |
|
506 |
} |
|
507 |
nextChar(); |
|
508 |
break; |
|
509 |
||
510 |
case '@': |
|
511 |
if (newline) |
|
512 |
break loop; |
|
513 |
// fallthrough |
|
514 |
||
515 |
default: |
|
516 |
nextChar(); |
|
517 |
break; |
|
518 |
} |
|
519 |
} |
|
520 |
||
521 |
} |
|
522 |
||
523 |
protected void entity(Void list) { |
|
524 |
newline = false; |
|
525 |
entity(); |
|
526 |
} |
|
527 |
||
528 |
/** |
|
529 |
* Read an HTML entity. |
|
530 |
* {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; } |
|
531 |
*/ |
|
532 |
protected void entity() { |
|
533 |
nextChar(); |
|
534 |
String name = null; |
|
535 |
if (ch == '#') { |
|
536 |
int namep = bp; |
|
537 |
nextChar(); |
|
538 |
if (isDecimalDigit(ch)) { |
|
539 |
nextChar(); |
|
540 |
while (isDecimalDigit(ch)) |
|
541 |
nextChar(); |
|
542 |
name = new String(buf, namep, bp - namep); |
|
543 |
} else if (ch == 'x' || ch == 'X') { |
|
544 |
nextChar(); |
|
545 |
if (isHexDigit(ch)) { |
|
546 |
nextChar(); |
|
547 |
while (isHexDigit(ch)) |
|
548 |
nextChar(); |
|
549 |
name = new String(buf, namep, bp - namep); |
|
550 |
} |
|
551 |
} |
|
552 |
} else if (isIdentifierStart(ch)) { |
|
553 |
name = readIdentifier(); |
|
554 |
} |
|
555 |
||
556 |
if (name != null) { |
|
557 |
if (ch != ';') |
|
558 |
return; |
|
559 |
nextChar(); |
|
560 |
} |
|
561 |
} |
|
562 |
||
563 |
/** |
|
564 |
* Read the start or end of an HTML tag, or an HTML comment |
|
565 |
* {@literal <identifier attrs> } or {@literal </identifier> } |
|
566 |
*/ |
|
567 |
protected void html() { |
|
568 |
int p = bp; |
|
569 |
nextChar(); |
|
570 |
if (isIdentifierStart(ch)) { |
|
571 |
String name = readIdentifier(); |
|
572 |
checkHtmlTag(name); |
|
573 |
htmlAttrs(); |
|
574 |
if (ch == '/') { |
|
575 |
nextChar(); |
|
576 |
} |
|
577 |
if (ch == '>') { |
|
578 |
nextChar(); |
|
579 |
return; |
|
580 |
} |
|
581 |
} else if (ch == '/') { |
|
582 |
nextChar(); |
|
583 |
if (isIdentifierStart(ch)) { |
|
584 |
readIdentifier(); |
|
585 |
skipWhitespace(); |
|
586 |
if (ch == '>') { |
|
587 |
nextChar(); |
|
588 |
return; |
|
589 |
} |
|
590 |
} |
|
591 |
} else if (ch == '!') { |
|
592 |
nextChar(); |
|
593 |
if (ch == '-') { |
|
594 |
nextChar(); |
|
595 |
if (ch == '-') { |
|
596 |
nextChar(); |
|
597 |
while (bp < buflen) { |
|
598 |
int dash = 0; |
|
599 |
while (ch == '-') { |
|
600 |
dash++; |
|
601 |
nextChar(); |
|
602 |
} |
|
603 |
// Strictly speaking, a comment should not contain "--" |
|
604 |
// so dash > 2 is an error, dash == 2 implies ch == '>' |
|
605 |
// See http://www.w3.org/TR/html-markup/syntax.html#syntax-comments |
|
606 |
// for more details. |
|
607 |
if (dash >= 2 && ch == '>') { |
|
608 |
nextChar(); |
|
609 |
return; |
|
610 |
} |
|
611 |
||
612 |
nextChar(); |
|
613 |
} |
|
614 |
} |
|
615 |
} |
|
616 |
} |
|
617 |
||
618 |
bp = p + 1; |
|
619 |
ch = buf[bp]; |
|
620 |
} |
|
621 |
||
622 |
/** |
|
623 |
* Read a series of HTML attributes, terminated by {@literal > }. |
|
624 |
* Each attribute is of the form {@literal identifier[=value] }. |
|
625 |
* "value" may be unquoted, single-quoted, or double-quoted. |
|
626 |
*/ |
|
627 |
protected void htmlAttrs() { |
|
628 |
skipWhitespace(); |
|
629 |
||
630 |
loop: |
|
631 |
while (isIdentifierStart(ch)) { |
|
632 |
int namePos = bp; |
|
633 |
String name = readAttributeName(); |
|
634 |
skipWhitespace(); |
|
635 |
StringBuilder value = new StringBuilder(); |
|
636 |
if (ch == '=') { |
|
637 |
nextChar(); |
|
638 |
skipWhitespace(); |
|
639 |
if (ch == '\'' || ch == '"') { |
|
640 |
char quote = ch; |
|
641 |
nextChar(); |
|
642 |
while (bp < buflen && ch != quote) { |
|
643 |
if (newline && ch == '@') { |
|
644 |
// No point trying to read more. |
|
645 |
// In fact, all attrs get discarded by the caller |
|
646 |
// and superseded by a malformed.html node because |
|
647 |
// the html tag itself is not terminated correctly. |
|
648 |
break loop; |
|
649 |
} |
|
650 |
value.append(ch); |
|
651 |
nextChar(); |
|
652 |
} |
|
653 |
nextChar(); |
|
654 |
} else { |
|
655 |
while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) { |
|
656 |
value.append(ch); |
|
657 |
nextChar(); |
|
658 |
} |
|
659 |
} |
|
660 |
skipWhitespace(); |
|
661 |
} |
|
662 |
checkHtmlAttr(name, value.toString()); |
|
663 |
} |
|
664 |
} |
|
665 |
||
666 |
protected void attrValueChar(Void list) { |
|
667 |
switch (ch) { |
|
668 |
case '&': |
|
669 |
entity(list); |
|
670 |
break; |
|
671 |
||
672 |
case '{': |
|
673 |
inlineTag(list); |
|
674 |
break; |
|
675 |
||
676 |
default: |
|
677 |
nextChar(); |
|
678 |
} |
|
679 |
} |
|
680 |
||
681 |
protected boolean isIdentifierStart(char ch) { |
|
682 |
return Character.isUnicodeIdentifierStart(ch); |
|
683 |
} |
|
684 |
||
685 |
protected String readIdentifier() { |
|
686 |
int start = bp; |
|
687 |
nextChar(); |
|
688 |
while (bp < buflen && Character.isUnicodeIdentifierPart(ch)) |
|
689 |
nextChar(); |
|
690 |
return new String(buf, start, bp - start); |
|
691 |
} |
|
692 |
||
693 |
protected String readAttributeName() { |
|
694 |
int start = bp; |
|
695 |
nextChar(); |
|
696 |
while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '-')) |
|
697 |
nextChar(); |
|
698 |
return new String(buf, start, bp - start); |
|
699 |
} |
|
700 |
||
701 |
protected String readTagName() { |
|
702 |
int start = bp; |
|
703 |
nextChar(); |
|
704 |
while (bp < buflen |
|
705 |
&& (Character.isUnicodeIdentifierPart(ch) || ch == '.' |
|
706 |
|| ch == '-' || ch == ':')) { |
|
707 |
nextChar(); |
|
708 |
} |
|
709 |
return new String(buf, start, bp - start); |
|
710 |
} |
|
711 |
||
712 |
protected boolean isJavaIdentifierStart(char ch) { |
|
713 |
return Character.isJavaIdentifierStart(ch); |
|
714 |
} |
|
715 |
||
716 |
protected String readJavaIdentifier() { |
|
717 |
int start = bp; |
|
718 |
nextChar(); |
|
719 |
while (bp < buflen && Character.isJavaIdentifierPart(ch)) |
|
720 |
nextChar(); |
|
721 |
return new String(buf, start, bp - start); |
|
722 |
} |
|
723 |
||
724 |
protected boolean isDecimalDigit(char ch) { |
|
725 |
return ('0' <= ch && ch <= '9'); |
|
726 |
} |
|
727 |
||
728 |
protected boolean isHexDigit(char ch) { |
|
729 |
return ('0' <= ch && ch <= '9') |
|
730 |
|| ('a' <= ch && ch <= 'f') |
|
731 |
|| ('A' <= ch && ch <= 'F'); |
|
732 |
} |
|
733 |
||
734 |
protected boolean isUnquotedAttrValueTerminator(char ch) { |
|
735 |
switch (ch) { |
|
736 |
case '\f': case '\n': case '\r': case '\t': |
|
737 |
case ' ': |
|
738 |
case '"': case '\'': case '`': |
|
739 |
case '=': case '<': case '>': |
|
740 |
return true; |
|
741 |
default: |
|
742 |
return false; |
|
743 |
} |
|
744 |
} |
|
745 |
||
746 |
protected boolean isWhitespace(char ch) { |
|
747 |
return Character.isWhitespace(ch); |
|
748 |
} |
|
749 |
||
750 |
protected void skipWhitespace() { |
|
751 |
while (isWhitespace(ch)) { |
|
752 |
nextChar(); |
|
753 |
} |
|
754 |
} |
|
755 |
||
756 |
/** |
|
757 |
* @param start position of first character of string |
|
758 |
* @param end position of character beyond last character to be included |
|
759 |
*/ |
|
760 |
String newString(int start, int end) { |
|
761 |
return new String(buf, start, end - start); |
|
762 |
} |
|
763 |
||
764 |
static abstract class TagParser { |
|
765 |
enum Kind { INLINE, BLOCK } |
|
766 |
||
767 |
final Kind kind; |
|
768 |
final String name; |
|
769 |
||
770 |
||
771 |
TagParser(Kind k, String tk) { |
|
772 |
kind = k; |
|
773 |
name = tk; |
|
774 |
} |
|
775 |
||
776 |
TagParser(Kind k, String tk, boolean retainWhiteSpace) { |
|
777 |
this(k, tk); |
|
778 |
} |
|
779 |
||
780 |
Kind getKind() { |
|
781 |
return kind; |
|
782 |
} |
|
783 |
||
784 |
String getName() { |
|
785 |
return name; |
|
786 |
} |
|
787 |
||
788 |
abstract void parse(int pos) throws ParseException; |
|
789 |
} |
|
790 |
||
791 |
/** |
|
44810 | 792 |
* @see <a href="http://docs.oracle.com/javase/8/docs/technotes/tools/unix/javadoc.html#CHDJGIJB">Javadoc Tags</a> |
43261 | 793 |
*/ |
794 |
@SuppressWarnings("deprecation") |
|
795 |
private void initTagParsers() { |
|
796 |
TagParser[] parsers = { |
|
797 |
// @author name-text |
|
798 |
new TagParser(Kind.BLOCK, "author") { |
|
799 |
@Override |
|
800 |
public void parse(int pos) { |
|
801 |
blockContent(); |
|
802 |
} |
|
803 |
}, |
|
804 |
||
805 |
// {@code text} |
|
806 |
new TagParser(Kind.INLINE, "code", true) { |
|
807 |
@Override |
|
808 |
public void parse(int pos) throws ParseException { |
|
809 |
inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE); |
|
810 |
nextChar(); |
|
811 |
} |
|
812 |
}, |
|
813 |
||
814 |
// @deprecated deprecated-text |
|
815 |
new TagParser(Kind.BLOCK, "deprecated") { |
|
816 |
@Override |
|
817 |
public void parse(int pos) { |
|
818 |
blockContent(); |
|
819 |
} |
|
820 |
}, |
|
821 |
||
822 |
// {@docRoot} |
|
823 |
new TagParser(Kind.INLINE, "docRoot") { |
|
824 |
@Override |
|
825 |
public void parse(int pos) throws ParseException { |
|
826 |
if (ch == '}') { |
|
827 |
nextChar(); |
|
828 |
return; |
|
829 |
} |
|
830 |
inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content |
|
831 |
nextChar(); |
|
832 |
throw new ParseException("dc.unexpected.content"); |
|
833 |
} |
|
834 |
}, |
|
835 |
||
836 |
// @exception class-name description |
|
837 |
new TagParser(Kind.BLOCK, "exception") { |
|
838 |
@Override |
|
839 |
public void parse(int pos) throws ParseException { |
|
840 |
skipWhitespace(); |
|
841 |
reference(false); |
|
842 |
blockContent(); |
|
843 |
} |
|
844 |
}, |
|
845 |
||
846 |
// @hidden hidden-text |
|
847 |
new TagParser(Kind.BLOCK, "hidden") { |
|
848 |
@Override |
|
849 |
public void parse(int pos) { |
|
850 |
blockContent(); |
|
851 |
} |
|
852 |
}, |
|
853 |
||
854 |
// @index search-term options-description |
|
855 |
new TagParser(Kind.INLINE, "index") { |
|
856 |
@Override |
|
857 |
public void parse(int pos) throws ParseException { |
|
858 |
skipWhitespace(); |
|
859 |
if (ch == '}') { |
|
860 |
throw new ParseException("dc.no.content"); |
|
861 |
} |
|
862 |
if (ch == '"') quotedString(); else inlineWord(); |
|
863 |
skipWhitespace(); |
|
864 |
if (ch != '}') { |
|
865 |
inlineContent(); |
|
866 |
} else { |
|
867 |
nextChar(); |
|
868 |
} |
|
869 |
} |
|
870 |
}, |
|
871 |
||
872 |
// {@inheritDoc} |
|
873 |
new TagParser(Kind.INLINE, "inheritDoc") { |
|
874 |
@Override |
|
875 |
public void parse(int pos) throws ParseException { |
|
876 |
if (ch == '}') { |
|
877 |
nextChar(); |
|
878 |
return; |
|
879 |
} |
|
880 |
inlineText(WhitespaceRetentionPolicy.REMOVE_ALL); // skip unexpected content |
|
881 |
nextChar(); |
|
882 |
throw new ParseException("dc.unexpected.content"); |
|
883 |
} |
|
884 |
}, |
|
885 |
||
886 |
// {@link package.class#member label} |
|
887 |
new TagParser(Kind.INLINE, "link") { |
|
888 |
@Override |
|
889 |
public void parse(int pos) throws ParseException { |
|
890 |
reference(true); |
|
891 |
inlineContent(); |
|
892 |
} |
|
893 |
}, |
|
894 |
||
895 |
// {@linkplain package.class#member label} |
|
896 |
new TagParser(Kind.INLINE, "linkplain") { |
|
897 |
@Override |
|
898 |
public void parse(int pos) throws ParseException { |
|
899 |
reference(true); |
|
900 |
inlineContent(); |
|
901 |
} |
|
902 |
}, |
|
903 |
||
904 |
// {@literal text} |
|
905 |
new TagParser(Kind.INLINE, "literal", true) { |
|
906 |
@Override |
|
907 |
public void parse(int pos) throws ParseException { |
|
908 |
inlineText(WhitespaceRetentionPolicy.REMOVE_FIRST_SPACE); |
|
909 |
nextChar(); |
|
910 |
} |
|
911 |
}, |
|
912 |
||
913 |
// @param parameter-name description |
|
914 |
new TagParser(Kind.BLOCK, "param") { |
|
915 |
@Override |
|
916 |
public void parse(int pos) throws ParseException { |
|
917 |
skipWhitespace(); |
|
918 |
||
919 |
boolean typaram = false; |
|
920 |
if (ch == '<') { |
|
921 |
typaram = true; |
|
922 |
nextChar(); |
|
923 |
} |
|
924 |
||
925 |
identifier(); |
|
926 |
||
927 |
if (typaram) { |
|
928 |
if (ch != '>') |
|
929 |
throw new ParseException("dc.gt.expected"); |
|
930 |
nextChar(); |
|
931 |
} |
|
932 |
||
933 |
skipWhitespace(); |
|
934 |
blockContent(); |
|
935 |
} |
|
936 |
}, |
|
937 |
||
938 |
// @return description |
|
939 |
new TagParser(Kind.BLOCK, "return") { |
|
940 |
@Override |
|
941 |
public void parse(int pos) { |
|
942 |
blockContent(); |
|
943 |
} |
|
944 |
}, |
|
945 |
||
946 |
// @see reference | quoted-string | HTML |
|
947 |
new TagParser(Kind.BLOCK, "see") { |
|
948 |
@Override |
|
949 |
public void parse(int pos) throws ParseException { |
|
950 |
skipWhitespace(); |
|
951 |
switch (ch) { |
|
952 |
case '"': |
|
953 |
quotedString(); |
|
954 |
skipWhitespace(); |
|
955 |
if (ch == '@' |
|
956 |
|| ch == EOI && bp == buf.length - 1) { |
|
957 |
return; |
|
958 |
} |
|
959 |
break; |
|
960 |
||
961 |
case '<': |
|
962 |
blockContent(); |
|
963 |
return; |
|
964 |
||
965 |
case '@': |
|
966 |
if (newline) |
|
967 |
throw new ParseException("dc.no.content"); |
|
968 |
break; |
|
969 |
||
970 |
case EOI: |
|
971 |
if (bp == buf.length - 1) |
|
972 |
throw new ParseException("dc.no.content"); |
|
973 |
break; |
|
974 |
||
975 |
default: |
|
976 |
if (isJavaIdentifierStart(ch) || ch == '#') { |
|
977 |
reference(true); |
|
978 |
blockContent(); |
|
979 |
} |
|
980 |
} |
|
981 |
throw new ParseException("dc.unexpected.content"); |
|
982 |
} |
|
983 |
}, |
|
984 |
||
985 |
// @serialData data-description |
|
986 |
new TagParser(Kind.BLOCK, "@serialData") { |
|
987 |
@Override |
|
988 |
public void parse(int pos) { |
|
989 |
blockContent(); |
|
990 |
} |
|
991 |
}, |
|
992 |
||
993 |
// @serialField field-name field-type description |
|
994 |
new TagParser(Kind.BLOCK, "serialField") { |
|
995 |
@Override |
|
996 |
public void parse(int pos) throws ParseException { |
|
997 |
skipWhitespace(); |
|
998 |
identifier(); |
|
999 |
skipWhitespace(); |
|
1000 |
reference(false); |
|
1001 |
if (isWhitespace(ch)) { |
|
1002 |
skipWhitespace(); |
|
1003 |
blockContent(); |
|
1004 |
} |
|
1005 |
} |
|
1006 |
}, |
|
1007 |
||
1008 |
// @serial field-description | include | exclude |
|
1009 |
new TagParser(Kind.BLOCK, "serial") { |
|
1010 |
@Override |
|
1011 |
public void parse(int pos) { |
|
1012 |
blockContent(); |
|
1013 |
} |
|
1014 |
}, |
|
1015 |
||
1016 |
// @since since-text |
|
1017 |
new TagParser(Kind.BLOCK, "since") { |
|
1018 |
@Override |
|
1019 |
public void parse(int pos) { |
|
1020 |
blockContent(); |
|
1021 |
} |
|
1022 |
}, |
|
1023 |
||
1024 |
// @throws class-name description |
|
1025 |
new TagParser(Kind.BLOCK, "throws") { |
|
1026 |
@Override |
|
1027 |
public void parse(int pos) throws ParseException { |
|
1028 |
skipWhitespace(); |
|
1029 |
reference(false); |
|
1030 |
blockContent(); |
|
1031 |
} |
|
1032 |
}, |
|
1033 |
||
1034 |
// {@value package.class#field} |
|
1035 |
new TagParser(Kind.INLINE, "value") { |
|
1036 |
@Override |
|
1037 |
public void parse(int pos) throws ParseException { |
|
1038 |
reference(true); |
|
1039 |
skipWhitespace(); |
|
1040 |
if (ch == '}') { |
|
1041 |
nextChar(); |
|
1042 |
return; |
|
1043 |
} |
|
1044 |
nextChar(); |
|
1045 |
throw new ParseException("dc.unexpected.content"); |
|
1046 |
} |
|
1047 |
}, |
|
1048 |
||
1049 |
// @version version-text |
|
1050 |
new TagParser(Kind.BLOCK, "version") { |
|
1051 |
@Override |
|
1052 |
public void parse(int pos) { |
|
1053 |
blockContent(); |
|
1054 |
} |
|
1055 |
}, |
|
1056 |
}; |
|
1057 |
||
1058 |
tagParsers = new HashMap<>(); |
|
1059 |
for (TagParser p: parsers) |
|
1060 |
tagParsers.put(p.getName(), p); |
|
1061 |
||
1062 |
} |
|
1063 |
||
1064 |
private void initEventAttrs() { |
|
1065 |
eventAttrs = new HashSet<>(Arrays.asList( |
|
1066 |
// See https://www.w3.org/TR/html-markup/global-attributes.html#common.attrs.event-handler |
|
1067 |
"onabort", "onblur", "oncanplay", "oncanplaythrough", |
|
1068 |
"onchange", "onclick", "oncontextmenu", "ondblclick", |
|
1069 |
"ondrag", "ondragend", "ondragenter", "ondragleave", |
|
1070 |
"ondragover", "ondragstart", "ondrop", "ondurationchange", |
|
1071 |
"onemptied", "onended", "onerror", "onfocus", "oninput", |
|
1072 |
"oninvalid", "onkeydown", "onkeypress", "onkeyup", |
|
1073 |
"onload", "onloadeddata", "onloadedmetadata", "onloadstart", |
|
1074 |
"onmousedown", "onmousemove", "onmouseout", "onmouseover", |
|
1075 |
"onmouseup", "onmousewheel", "onpause", "onplay", |
|
1076 |
"onplaying", "onprogress", "onratechange", "onreadystatechange", |
|
1077 |
"onreset", "onscroll", "onseeked", "onseeking", |
|
1078 |
"onselect", "onshow", "onstalled", "onsubmit", "onsuspend", |
|
1079 |
"ontimeupdate", "onvolumechange", "onwaiting", |
|
1080 |
||
1081 |
// See https://www.w3.org/TR/html4/sgml/dtd.html |
|
1082 |
// Most of the attributes that take a %Script are also defined as event handlers |
|
1083 |
// in HTML 5. The one exception is onunload. |
|
1084 |
// "onchange", "onclick", "ondblclick", "onfocus", |
|
1085 |
// "onkeydown", "onkeypress", "onkeyup", "onload", |
|
1086 |
// "onmousedown", "onmousemove", "onmouseout", "onmouseover", |
|
1087 |
// "onmouseup", "onreset", "onselect", "onsubmit", |
|
1088 |
"onunload" |
|
1089 |
)); |
|
1090 |
} |
|
1091 |
||
1092 |
private void initURIAttrs() { |
|
1093 |
uriAttrs = new HashSet<>(Arrays.asList( |
|
1094 |
// See https://www.w3.org/TR/html4/sgml/dtd.html |
|
1095 |
// https://www.w3.org/TR/html5/ |
|
1096 |
// These are all the attributes that take a %URI or a valid URL potentially surrounded |
|
1097 |
// by spaces |
|
1098 |
"action", "cite", "classid", "codebase", "data", |
|
1099 |
"datasrc", "for", "href", "longdesc", "profile", |
|
1100 |
"src", "usemap" |
|
1101 |
)); |
|
1102 |
} |
|
1103 |
||
1104 |
} |