8216319: Refactor JavadocTester to allow more on-by-default checkers; add A11YChecker
Reviewed-by: hannesw
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/langtools/jdk/javadoc/lib/javadoc/tester/A11yChecker.java Tue Jan 15 14:18:44 2019 -0800
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package javadoc.tester;
+
+import java.io.PrintStream;
+import java.nio.file.Path;
+import java.util.Map;
+import java.util.Stack;
+import java.util.function.Function;
+
+/**
+ * A class to check various aspects of accessibility in a set of HTML files.
+ */
+public class A11yChecker extends HtmlChecker {
+
+ private boolean html5;
+
+ private int currLevel;
+ private int headingErrors;
+
+ private boolean inBody;
+ private boolean inNoScript;
+ private Stack<String> regions = new Stack<>();
+ private int regionErrors;
+
+ A11yChecker(PrintStream out, Function<Path,String> fileReader) {
+ super(out, fileReader);
+ }
+
+ int getErrorCount() {
+ return errors;
+ }
+
+ @Override
+ public void report() {
+ if (headingErrors == 0) {
+ out.println("All headings OK");
+ } else {
+ out.println(headingErrors + " bad headings");
+ }
+
+ if (regionErrors == 0) {
+ out.println("All regions OK");
+ } else {
+ out.println(regionErrors + " errors in regions");
+ }
+ }
+
+ @Override
+ public void startFile(Path path) {
+ html5 = false;
+ }
+
+ @Override
+ public void endFile() {
+ }
+
+ @Override
+ public void docType(String doctype) {
+ html5 = doctype.matches("(?i)<\\?doctype\\s+html>");
+ }
+
+ @Override
+ public void startElement(String name, Map<String,String> attrs, boolean selfClosing) {
+ switch (name) {
+ case "body":
+ inBody = true;
+ break;
+
+ case "h1": case "h2": case "h3": case "h4": case "h5": case "h6":
+ checkHeading(name);
+ break;
+
+ case "header": case "footer": case "main": case "nav": case "aside":
+ regions.push(name);
+ break;
+
+ case "noscript":
+ inNoScript = true;
+ break;
+ }
+ }
+
+ @Override
+ public void endElement(String name) {
+ switch (name) {
+ case "body":
+ inBody = false;
+ break;
+
+ case "header": case "footer": case "main": case "nav": case "aside":
+ if (regions.size() > 0 && regions.peek().equals(name)) {
+ regions.pop();
+ } else {
+ error(currFile, getLineNumber(), "unmatched tag: " + name);
+ regionErrors++;
+ }
+ break;
+
+ case "noscript":
+ inNoScript = false;
+ break;
+ }
+
+ }
+
+ private void checkHeading(String h) {
+ int level = Character.digit(h.charAt(1), 10);
+ if (level > currLevel + 1) {
+ headingErrors++;
+ StringBuilder sb = new StringBuilder();
+ String sep = "";
+ for (int i = currLevel + 1; i < level; i++) {
+ sb.append(sep).append("h").append(i);
+ sep = ", ";
+ }
+ error(currFile, getLineNumber(), "missing headings: " + sb);
+ }
+ currLevel = level;
+ }
+
+ @Override
+ public void content(String s) {
+ if (html5 && inBody && !inNoScript && !s.isBlank() && regions.isEmpty()) {
+ error(currFile, getLineNumber(), "content outside of any region");
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/langtools/jdk/javadoc/lib/javadoc/tester/HtmlChecker.java Tue Jan 15 14:18:44 2019 -0800
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package javadoc.tester;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.function.Function;
+
+public abstract class HtmlChecker extends HtmlParser {
+ static final Path currDir = Paths.get(".").toAbsolutePath().normalize();
+
+ protected Path currFile;
+ protected int files;
+ protected int errors;
+
+ HtmlChecker(PrintStream out, Function<Path,String> fileReader) {
+ super(out, fileReader);
+ }
+
+ void checkDirectory(Path dir) throws IOException {
+ checkFiles(List.of(dir), false, Collections.emptySet());
+ }
+
+ void checkFiles(List<Path> files, boolean skipSubdirs, Set<Path> excludeFiles) throws IOException {
+ for (Path file : files) {
+ Files.walkFileTree(file, new SimpleFileVisitor<Path>() {
+ int depth = 0;
+
+ @Override
+ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) {
+ if ((skipSubdirs && depth > 0) || excludeFiles.contains(dir)) {
+ return FileVisitResult.SKIP_SUBTREE;
+ }
+ depth++;
+ return FileVisitResult.CONTINUE;
+ }
+
+ @Override
+ public FileVisitResult visitFile(Path p, BasicFileAttributes attrs) {
+ if (excludeFiles.contains(p)) {
+ return FileVisitResult.CONTINUE;
+ }
+
+ if (Files.isRegularFile(p) && p.getFileName().toString().endsWith(".html")) {
+ checkFile(p);
+ }
+ return FileVisitResult.CONTINUE;
+ }
+
+ @Override
+ public FileVisitResult postVisitDirectory(Path dir, IOException e) throws IOException {
+ depth--;
+ return super.postVisitDirectory(dir, e);
+ }
+ });
+ }
+ }
+
+ void checkFile(Path file) {
+ try {
+ currFile = file.toAbsolutePath().normalize();
+ read(file);
+ files++;
+ } catch (IOException e) {
+ error(file, 0, e);
+ }
+ }
+
+ abstract void report();
+
+ int getErrorCount() {
+ return errors;
+ }
+
+ @Override
+ protected void error(Path file, int lineNumber, String message) {
+ super.error(relativePath(file), lineNumber, message);
+ errors++;
+ }
+
+ @Override
+ protected void error(Path file, int lineNumber, Throwable t) {
+ super.error(relativePath(file), lineNumber, t);
+ errors++;
+ }
+
+ protected Path relativePath(Path path) {
+ return path.startsWith(currDir) ? currDir.relativize(path) : path;
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/langtools/jdk/javadoc/lib/javadoc/tester/HtmlParser.java Tue Jan 15 14:18:44 2019 -0800
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package javadoc.tester;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.StringReader;
+import java.nio.file.Path;
+import java.util.LinkedHashMap;
+import java.util.Locale;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.regex.Pattern;
+
+/**
+ * A basic HTML parser. Override the protected methods as needed to get notified
+ * of significant items in any file that is read.
+ */
+public abstract class HtmlParser {
+
+ protected final PrintStream out;
+ protected final Function<Path,String> fileReader;
+
+ private Path file;
+ private StringReader in;
+ private int ch;
+ private int lineNumber;
+ private boolean inScript;
+ private boolean xml;
+
+ HtmlParser(PrintStream out, Function<Path,String> fileReader) {
+ this.out = out;
+ this.fileReader = fileReader;
+ }
+
+ /**
+ * Read a file.
+ * @param file the file to be read
+ * @throws IOException if an error occurs while reading the file
+ */
+ void read(Path file) throws IOException {
+ try (StringReader r = new StringReader(fileReader.apply(file))) {
+ this.file = file;
+ this.in = r;
+ StringBuilder content = new StringBuilder();
+
+ startFile(file);
+ try {
+ lineNumber = 1;
+ xml = false;
+ nextChar();
+
+ while (ch != -1) {
+ switch (ch) {
+
+ case '<':
+ content(content.toString());
+ content.setLength(0);
+ html();
+ break;
+
+ default:
+ content.append((char) ch);
+ if (ch == '\n') {
+ content(content.toString());
+ content.setLength(0);
+ }
+ nextChar();
+ }
+ }
+ } finally {
+ endFile();
+ }
+ } catch (IOException e) {
+ error(file, lineNumber, e);
+ } catch (Throwable t) {
+ error(file, lineNumber, t);
+ t.printStackTrace(out);
+ }
+ }
+
+
+ int getLineNumber() {
+ return lineNumber;
+ }
+
+ /**
+ * Called when a file has been opened, before parsing begins.
+ * This is always the first notification when reading a file.
+ * This implementation does nothing.
+ *
+ * @param file the file
+ */
+ protected void startFile(Path file) { }
+
+ /**
+ * Called when the parser has finished reading a file.
+ * This is always the last notification when reading a file,
+ * unless any errors occur while closing the file.
+ * This implementation does nothing.
+ */
+ protected void endFile() { }
+
+ /**
+ * Called when a doctype declaration is found, at the beginning of the file.
+ * This implementation does nothing.
+ * @param s the doctype declaration
+ */
+ protected void docType(String s) { }
+
+ /**
+ * Called when the opening tag of an HTML element is encountered.
+ * This implementation does nothing.
+ * @param name the name of the tag
+ * @param attrs the attribute
+ * @param selfClosing whether or not this is a self-closing tag
+ */
+ protected void startElement(String name, Map<String,String> attrs, boolean selfClosing) { }
+
+ /**
+ * Called when the closing tag of an HTML tag is encountered.
+ * This implementation does nothing.
+ * @param name the name of the tag
+ */
+ protected void endElement(String name) { }
+
+ /**
+ * Called for sequences of character content.
+ * @param content the character content
+ */
+ protected void content(String content) { }
+
+ /**
+ * Called when an error has been encountered.
+ * @param file the file being read
+ * @param lineNumber the line number of line containing the error
+ * @param message a description of the error
+ */
+ protected void error(Path file, int lineNumber, String message) {
+ out.println(file + ":" + lineNumber + ": " + message);
+ }
+
+ /**
+ * Called when an exception has been encountered.
+ * @param file the file being read
+ * @param lineNumber the line number of the line being read when the exception was found
+ * @param t the exception
+ */
+ protected void error(Path file, int lineNumber, Throwable t) {
+ out.println(file + ":" + lineNumber + ": " + t);
+ }
+
+ private void nextChar() throws IOException {
+ ch = in.read();
+ if (ch == '\n')
+ lineNumber++;
+ }
+
+ /**
+ * Read the start or end of an HTML tag, or an HTML comment
+ * {@literal <identifier attrs> } or {@literal </identifier> }
+ * @throws java.io.IOException if there is a problem reading the file
+ */
+ private void html() throws IOException {
+ nextChar();
+ if (isIdentifierStart((char) ch)) {
+ String name = readIdentifier().toLowerCase(Locale.US);
+ Map<String,String> attrs = htmlAttrs();
+ if (attrs != null) {
+ boolean selfClosing = false;
+ if (ch == '/') {
+ nextChar();
+ selfClosing = true;
+ }
+ if (ch == '>') {
+ nextChar();
+ startElement(name, attrs, selfClosing);
+ if (name.equals("script")) {
+ inScript = true;
+ }
+ return;
+ }
+ }
+ } else if (ch == '/') {
+ nextChar();
+ if (isIdentifierStart((char) ch)) {
+ String name = readIdentifier().toLowerCase(Locale.US);
+ skipWhitespace();
+ if (ch == '>') {
+ nextChar();
+ endElement(name);
+ if (name.equals("script")) {
+ inScript = false;
+ }
+ return;
+ }
+ }
+ } else if (ch == '!') {
+ nextChar();
+ if (ch == '-') {
+ nextChar();
+ if (ch == '-') {
+ nextChar();
+ while (ch != -1) {
+ int dash = 0;
+ while (ch == '-') {
+ dash++;
+ nextChar();
+ }
+ // Strictly speaking, a comment should not contain "--"
+ // so dash > 2 is an error, dash == 2 implies ch == '>'
+ // See http://www.w3.org/TR/html-markup/syntax.html#syntax-comments
+ // for more details.
+ if (dash >= 2 && ch == '>') {
+ nextChar();
+ return;
+ }
+
+ nextChar();
+ }
+ }
+ } else if (ch == '[') {
+ nextChar();
+ if (ch == 'C') {
+ nextChar();
+ if (ch == 'D') {
+ nextChar();
+ if (ch == 'A') {
+ nextChar();
+ if (ch == 'T') {
+ nextChar();
+ if (ch == 'A') {
+ nextChar();
+ if (ch == '[') {
+ while (true) {
+ nextChar();
+ if (ch == ']') {
+ nextChar();
+ if (ch == ']') {
+ nextChar();
+ if (ch == '>') {
+ nextChar();
+ return;
+ }
+ }
+ }
+ }
+
+ }
+ }
+ }
+ }
+ }
+ }
+ } else {
+ StringBuilder sb = new StringBuilder();
+ while (ch != -1 && ch != '>') {
+ sb.append((char) ch);
+ nextChar();
+ }
+ Pattern p = Pattern.compile("(?is)doctype\\s+html\\s?.*");
+ String s = sb.toString();
+ if (p.matcher(s).matches()) {
+ docType(s);
+ return;
+ }
+ }
+ } else if (ch == '?') {
+ nextChar();
+ if (ch == 'x') {
+ nextChar();
+ if (ch == 'm') {
+ nextChar();
+ if (ch == 'l') {
+ Map<String,String> attrs = htmlAttrs();
+ if (ch == '?') {
+ nextChar();
+ if (ch == '>') {
+ nextChar();
+ xml = true;
+ return;
+ }
+ }
+ }
+ }
+
+ }
+ }
+
+ if (!inScript) {
+ error(file, lineNumber, "bad html");
+ }
+ }
+
+ /**
+ * Read a series of HTML attributes, terminated by {@literal > }.
+ * Each attribute is of the form {@literal identifier[=value] }.
+ * "value" may be unquoted, single-quoted, or double-quoted.
+ */
+ private Map<String,String> htmlAttrs() throws IOException {
+ Map<String, String> map = new LinkedHashMap<>();
+ skipWhitespace();
+
+ loop:
+ while (isIdentifierStart((char) ch)) {
+ String name = readAttributeName().toLowerCase(Locale.US);
+ skipWhitespace();
+ String value = null;
+ if (ch == '=') {
+ nextChar();
+ skipWhitespace();
+ if (ch == '\'' || ch == '"') {
+ char quote = (char) ch;
+ nextChar();
+ StringBuilder sb = new StringBuilder();
+ while (ch != -1 && ch != quote) {
+ sb.append((char) ch);
+ nextChar();
+ }
+ value = sb.toString() // hack to replace common entities
+ .replace("<", "<")
+ .replace(">", ">")
+ .replace("&", "&");
+ nextChar();
+ } else {
+ StringBuilder sb = new StringBuilder();
+ while (ch != -1 && !isUnquotedAttrValueTerminator((char) ch)) {
+ sb.append((char) ch);
+ nextChar();
+ }
+ value = sb.toString();
+ }
+ skipWhitespace();
+ }
+ map.put(name, value);
+ }
+
+ return map;
+ }
+
+ private boolean isIdentifierStart(char ch) {
+ return Character.isUnicodeIdentifierStart(ch);
+ }
+
+ private String readIdentifier() throws IOException {
+ StringBuilder sb = new StringBuilder();
+ sb.append((char) ch);
+ nextChar();
+ while (ch != -1 && Character.isUnicodeIdentifierPart(ch)) {
+ sb.append((char) ch);
+ nextChar();
+ }
+ return sb.toString();
+ }
+
+ private String readAttributeName() throws IOException {
+ StringBuilder sb = new StringBuilder();
+ sb.append((char) ch);
+ nextChar();
+ while (ch != -1 && Character.isUnicodeIdentifierPart(ch)
+ || ch == '-'
+ || xml && ch == ':') {
+ sb.append((char) ch);
+ nextChar();
+ }
+ return sb.toString();
+ }
+
+ private boolean isWhitespace(char ch) {
+ return Character.isWhitespace(ch);
+ }
+
+ private void skipWhitespace() throws IOException {
+ while (isWhitespace((char) ch)) {
+ nextChar();
+ }
+ }
+
+ private boolean isUnquotedAttrValueTerminator(char ch) {
+ switch (ch) {
+ case '\f': case '\n': case '\r': case '\t':
+ case ' ':
+ case '"': case '\'': case '`':
+ case '=': case '<': case '>':
+ return true;
+ default:
+ return false;
+ }
+ }
+}
--- a/test/langtools/jdk/javadoc/lib/javadoc/tester/JavadocTester.java Tue Jan 15 22:54:09 2019 +0100
+++ b/test/langtools/jdk/javadoc/lib/javadoc/tester/JavadocTester.java Tue Jan 15 14:18:44 2019 -0800
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -23,18 +23,15 @@
package javadoc.tester;
-import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.FilenameFilter;
-import java.io.InputStreamReader;
import java.io.IOException;
import java.io.PrintStream;
import java.io.PrintWriter;
-import java.io.StringReader;
import java.io.StringWriter;
import java.lang.annotation.Annotation;
import java.lang.annotation.Retention;
@@ -42,36 +39,21 @@
import java.lang.ref.SoftReference;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
-import java.net.URI;
-import java.net.URISyntaxException;
import java.nio.charset.Charset;
-import java.nio.charset.CharsetDecoder;
-import java.nio.charset.CodingErrorAction;
import java.nio.charset.UnsupportedCharsetException;
-import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
-import java.nio.file.SimpleFileVisitor;
-import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
-import java.util.Comparator;
import java.util.EnumMap;
import java.util.HashMap;
-import java.util.LinkedHashMap;
import java.util.List;
-import java.util.Locale;
import java.util.Map;
import java.util.Objects;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
import java.util.function.Function;
-import java.util.regex.Pattern;
-import java.util.stream.Collectors;
/**
@@ -250,6 +232,7 @@
private DirectoryCheck outputDirectoryCheck = DirectoryCheck.EMPTY;
+ private boolean automaticCheckAccessibility = false;
private boolean automaticCheckLinks = true;
/** The current subtest number. Incremented when checking(...) is called. */
@@ -397,8 +380,13 @@
}
});
- if (automaticCheckLinks && exitCode == Exit.OK.code && outputDir.exists()) {
- checkLinks();
+ if (exitCode == Exit.OK.code && outputDir.exists()) {
+ if (automaticCheckLinks) {
+ checkLinks();
+ }
+ if (automaticCheckAccessibility) {
+ checkAccessibility();
+ }
}
}
@@ -531,6 +519,23 @@
}
}
+ public void checkAccessibility() {
+ checking("Check accessibility");
+ A11yChecker c = new A11yChecker(out, this::readFile);
+ try {
+ c.checkDirectory(outputDir.toPath());
+ c.report();
+ int errors = c.getErrorCount();
+ if (errors == 0) {
+ passed("No accessibility errors found");
+ } else {
+ failed(errors + " errors found when checking accessibility");
+ }
+ } catch (IOException e) {
+ failed("exception thrown when reading files: " + e);
+ }
+ }
+
public void checkLinks() {
checking("Check links");
LinkChecker c = new LinkChecker(out, this::readFile);
@@ -861,7 +866,6 @@
* @param baseDir1 the directory in which to locate the first file
* @param baseDir2 the directory in which to locate the second file
* @param file the file to compare in the two base directories
- * @param throwErrorIFNoMatch flag to indicate whether or not to throw
* an error if the files do not match.
* @return true if the files are the same and false otherwise.
*/
@@ -981,820 +985,4 @@
// Support classes for checkLinks
- /**
- * A basic HTML parser. Override the protected methods as needed to get notified
- * of significant items in any file that is read.
- */
- static abstract class HtmlParser {
-
- protected final PrintStream out;
- protected final Function<Path,String> fileReader;
-
- private Path file;
- private StringReader in;
- private int ch;
- private int lineNumber;
- private boolean inScript;
- private boolean xml;
-
- HtmlParser(PrintStream out, Function<Path,String> fileReader) {
- this.out = out;
- this.fileReader = fileReader;
- }
-
- /**
- * Read a file.
- * @param file the file to be read
- * @throws IOException if an error occurs while reading the file
- */
- void read(Path file) throws IOException {
- try (StringReader r = new StringReader(fileReader.apply(file))) {
- this.file = file;
- this.in = r;
-
- startFile(file);
- try {
- lineNumber = 1;
- xml = false;
- nextChar();
-
- while (ch != -1) {
- switch (ch) {
-
- case '<':
- html();
- break;
-
- default:
- nextChar();
- }
- }
- } finally {
- endFile();
- }
- } catch (IOException e) {
- error(file, lineNumber, e);
- } catch (Throwable t) {
- error(file, lineNumber, t);
- t.printStackTrace(out);
- }
- }
-
-
- int getLineNumber() {
- return lineNumber;
- }
-
- /**
- * Called when a file has been opened, before parsing begins.
- * This is always the first notification when reading a file.
- * This implementation does nothing.
- *
- * @param file the file
- */
- protected void startFile(Path file) { }
-
- /**
- * Called when the parser has finished reading a file.
- * This is always the last notification when reading a file,
- * unless any errors occur while closing the file.
- * This implementation does nothing.
- */
- protected void endFile() { }
-
- /**
- * Called when a doctype declaration is found, at the beginning of the file.
- * This implementation does nothing.
- * @param s the doctype declaration
- */
- protected void docType(String s) { }
-
- /**
- * Called when the opening tag of an HTML element is encountered.
- * This implementation does nothing.
- * @param name the name of the tag
- * @param attrs the attribute
- * @param selfClosing whether or not this is a self-closing tag
- */
- protected void startElement(String name, Map<String,String> attrs, boolean selfClosing) { }
-
- /**
- * Called when the closing tag of an HTML tag is encountered.
- * This implementation does nothing.
- * @param name the name of the tag
- */
- protected void endElement(String name) { }
-
- /**
- * Called when an error has been encountered.
- * @param file the file being read
- * @param lineNumber the line number of line containing the error
- * @param message a description of the error
- */
- protected void error(Path file, int lineNumber, String message) {
- out.println(file + ":" + lineNumber + ": " + message);
- }
-
- /**
- * Called when an exception has been encountered.
- * @param file the file being read
- * @param lineNumber the line number of the line being read when the exception was found
- * @param t the exception
- */
- protected void error(Path file, int lineNumber, Throwable t) {
- out.println(file + ":" + lineNumber + ": " + t);
- }
-
- private void nextChar() throws IOException {
- ch = in.read();
- if (ch == '\n')
- lineNumber++;
- }
-
- /**
- * Read the start or end of an HTML tag, or an HTML comment
- * {@literal <identifier attrs> } or {@literal </identifier> }
- * @throws java.io.IOException if there is a problem reading the file
- */
- private void html() throws IOException {
- nextChar();
- if (isIdentifierStart((char) ch)) {
- String name = readIdentifier().toLowerCase(Locale.US);
- Map<String,String> attrs = htmlAttrs();
- if (attrs != null) {
- boolean selfClosing = false;
- if (ch == '/') {
- nextChar();
- selfClosing = true;
- }
- if (ch == '>') {
- nextChar();
- startElement(name, attrs, selfClosing);
- if (name.equals("script")) {
- inScript = true;
- }
- return;
- }
- }
- } else if (ch == '/') {
- nextChar();
- if (isIdentifierStart((char) ch)) {
- String name = readIdentifier().toLowerCase(Locale.US);
- skipWhitespace();
- if (ch == '>') {
- nextChar();
- endElement(name);
- if (name.equals("script")) {
- inScript = false;
- }
- return;
- }
- }
- } else if (ch == '!') {
- nextChar();
- if (ch == '-') {
- nextChar();
- if (ch == '-') {
- nextChar();
- while (ch != -1) {
- int dash = 0;
- while (ch == '-') {
- dash++;
- nextChar();
- }
- // Strictly speaking, a comment should not contain "--"
- // so dash > 2 is an error, dash == 2 implies ch == '>'
- // See http://www.w3.org/TR/html-markup/syntax.html#syntax-comments
- // for more details.
- if (dash >= 2 && ch == '>') {
- nextChar();
- return;
- }
-
- nextChar();
- }
- }
- } else if (ch == '[') {
- nextChar();
- if (ch == 'C') {
- nextChar();
- if (ch == 'D') {
- nextChar();
- if (ch == 'A') {
- nextChar();
- if (ch == 'T') {
- nextChar();
- if (ch == 'A') {
- nextChar();
- if (ch == '[') {
- while (true) {
- nextChar();
- if (ch == ']') {
- nextChar();
- if (ch == ']') {
- nextChar();
- if (ch == '>') {
- nextChar();
- return;
- }
- }
- }
- }
-
- }
- }
- }
- }
- }
- }
- } else {
- StringBuilder sb = new StringBuilder();
- while (ch != -1 && ch != '>') {
- sb.append((char) ch);
- nextChar();
- }
- Pattern p = Pattern.compile("(?is)doctype\\s+html\\s?.*");
- String s = sb.toString();
- if (p.matcher(s).matches()) {
- docType(s);
- return;
- }
- }
- } else if (ch == '?') {
- nextChar();
- if (ch == 'x') {
- nextChar();
- if (ch == 'm') {
- nextChar();
- if (ch == 'l') {
- Map<String,String> attrs = htmlAttrs();
- if (ch == '?') {
- nextChar();
- if (ch == '>') {
- nextChar();
- xml = true;
- return;
- }
- }
- }
- }
-
- }
- }
-
- if (!inScript) {
- error(file, lineNumber, "bad html");
- }
- }
-
- /**
- * Read a series of HTML attributes, terminated by {@literal > }.
- * Each attribute is of the form {@literal identifier[=value] }.
- * "value" may be unquoted, single-quoted, or double-quoted.
- */
- private Map<String,String> htmlAttrs() throws IOException {
- Map<String, String> map = new LinkedHashMap<>();
- skipWhitespace();
-
- loop:
- while (isIdentifierStart((char) ch)) {
- String name = readAttributeName().toLowerCase(Locale.US);
- skipWhitespace();
- String value = null;
- if (ch == '=') {
- nextChar();
- skipWhitespace();
- if (ch == '\'' || ch == '"') {
- char quote = (char) ch;
- nextChar();
- StringBuilder sb = new StringBuilder();
- while (ch != -1 && ch != quote) {
- sb.append((char) ch);
- nextChar();
- }
- value = sb.toString() // hack to replace common entities
- .replace("<", "<")
- .replace(">", ">")
- .replace("&", "&");
- nextChar();
- } else {
- StringBuilder sb = new StringBuilder();
- while (ch != -1 && !isUnquotedAttrValueTerminator((char) ch)) {
- sb.append((char) ch);
- nextChar();
- }
- value = sb.toString();
- }
- skipWhitespace();
- }
- map.put(name, value);
- }
-
- return map;
- }
-
- private boolean isIdentifierStart(char ch) {
- return Character.isUnicodeIdentifierStart(ch);
- }
-
- private String readIdentifier() throws IOException {
- StringBuilder sb = new StringBuilder();
- sb.append((char) ch);
- nextChar();
- while (ch != -1 && Character.isUnicodeIdentifierPart(ch)) {
- sb.append((char) ch);
- nextChar();
- }
- return sb.toString();
- }
-
- private String readAttributeName() throws IOException {
- StringBuilder sb = new StringBuilder();
- sb.append((char) ch);
- nextChar();
- while (ch != -1 && Character.isUnicodeIdentifierPart(ch)
- || ch == '-'
- || xml && ch == ':') {
- sb.append((char) ch);
- nextChar();
- }
- return sb.toString();
- }
-
- private boolean isWhitespace(char ch) {
- return Character.isWhitespace(ch);
- }
-
- private void skipWhitespace() throws IOException {
- while (isWhitespace((char) ch)) {
- nextChar();
- }
- }
-
- private boolean isUnquotedAttrValueTerminator(char ch) {
- switch (ch) {
- case '\f': case '\n': case '\r': case '\t':
- case ' ':
- case '"': case '\'': case '`':
- case '=': case '<': case '>':
- return true;
- default:
- return false;
- }
- }
- }
-
- /**
- * A class to check the links in a set of HTML files.
- */
- static class LinkChecker extends HtmlParser {
- private final Map<Path, IDTable> allFiles;
- private final Map<URI, IDTable> allURIs;
-
- private int files;
- private int links;
- private int badSchemes;
- private int duplicateIds;
- private int missingIds;
-
- private Path currFile;
- private IDTable currTable;
- private boolean html5;
- private boolean xml;
-
- private int errors;
-
- LinkChecker(PrintStream out, Function<Path,String> fileReader) {
- super(out, fileReader);
- allFiles = new HashMap<>();
- allURIs = new HashMap<>();
- }
-
- void checkDirectory(Path dir) throws IOException {
- checkFiles(List.of(dir), false, Collections.emptySet());
- }
-
- void checkFiles(List<Path> files, boolean skipSubdirs, Set<Path> excludeFiles) throws IOException {
- for (Path file : files) {
- Files.walkFileTree(file, new SimpleFileVisitor<Path>() {
- int depth = 0;
-
- @Override
- public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) {
- if ((skipSubdirs && depth > 0) || excludeFiles.contains(dir)) {
- return FileVisitResult.SKIP_SUBTREE;
- }
- depth++;
- return FileVisitResult.CONTINUE;
- }
-
- @Override
- public FileVisitResult visitFile(Path p, BasicFileAttributes attrs) {
- if (excludeFiles.contains(p)) {
- return FileVisitResult.CONTINUE;
- }
-
- if (Files.isRegularFile(p) && p.getFileName().toString().endsWith(".html")) {
- checkFile(p);
- }
- return FileVisitResult.CONTINUE;
- }
-
- @Override
- public FileVisitResult postVisitDirectory(Path dir, IOException e) throws IOException {
- depth--;
- return super.postVisitDirectory(dir, e);
- }
- });
- }
- }
-
- void checkFile(Path file) {
- try {
- read(file);
- } catch (IOException e) {
- error(file, 0, e);
- }
- }
-
- int getErrorCount() {
- return errors;
- }
-
- public void report() {
- List<Path> missingFiles = getMissingFiles();
- if (!missingFiles.isEmpty()) {
- report("Missing files: (" + missingFiles.size() + ")");
- missingFiles.stream()
- .sorted()
- .forEach(this::reportMissingFile);
-
- }
-
- if (!allURIs.isEmpty()) {
- report(false, "External URLs:");
- allURIs.keySet().stream()
- .sorted(new URIComparator())
- .forEach(uri -> report(false, " %s", uri.toString()));
- }
-
- int anchors = 0;
- for (IDTable t : allFiles.values()) {
- anchors += t.map.values().stream()
- .filter(e -> !e.getReferences().isEmpty())
- .count();
- }
- for (IDTable t : allURIs.values()) {
- anchors += t.map.values().stream()
- .filter(e -> !e.references.isEmpty())
- .count();
- }
-
- report(false, "Checked " + files + " files.");
- report(false, "Found " + links + " references to " + anchors + " anchors "
- + "in " + allFiles.size() + " files and " + allURIs.size() + " other URIs.");
- report(!missingFiles.isEmpty(), "%6d missing files", missingFiles.size());
- report(duplicateIds > 0, "%6d duplicate ids", duplicateIds);
- report(missingIds > 0, "%6d missing ids", missingIds);
-
- Map<String, Integer> schemeCounts = new TreeMap<>();
- Map<String, Integer> hostCounts = new TreeMap<>(new HostComparator());
- for (URI uri : allURIs.keySet()) {
- String scheme = uri.getScheme();
- if (scheme != null) {
- schemeCounts.put(scheme, schemeCounts.computeIfAbsent(scheme, s -> 0) + 1);
- }
- String host = uri.getHost();
- if (host != null) {
- hostCounts.put(host, hostCounts.computeIfAbsent(host, h -> 0) + 1);
- }
- }
-
- if (schemeCounts.size() > 0) {
- report(false, "Schemes");
- schemeCounts.forEach((s, n) -> report(!isSchemeOK(s), "%6d %s", n, s));
- }
-
- if (hostCounts.size() > 0) {
- report(false, "Hosts");
- hostCounts.forEach((h, n) -> report(false, "%6d %s", n, h));
- }
- }
-
- private void report(String message, Object... args) {
- out.println(String.format(message, args));
- }
-
- private void report(boolean highlight, String message, Object... args) {
- out.print(highlight ? "* " : " ");
- out.println(String.format(message, args));
- }
-
- private void reportMissingFile(Path file) {
- report("%s", relativePath(file));
- IDTable table = allFiles.get(file);
- Set<Path> refs = new TreeSet<>();
- for (ID id : table.map.values()) {
- if (id.references != null) {
- for (Position p : id.references) {
- refs.add(p.path);
- }
- }
- }
- int n = 0;
- int MAX_REFS = 10;
- for (Path ref : refs) {
- report(" in " + relativePath(ref));
- if (++n == MAX_REFS) {
- report(" ... and %d more", refs.size() - n);
- break;
- }
- }
- }
-
- @Override
- public void startFile(Path path) {
- currFile = path.toAbsolutePath().normalize();
- currTable = allFiles.computeIfAbsent(currFile, p -> new IDTable(p));
- html5 = false;
- files++;
- }
-
- @Override
- public void endFile() {
- currTable.check();
- }
-
- @Override
- public void docType(String doctype) {
- html5 = doctype.matches("(?i)<\\?doctype\\s+html>");
- }
-
- @Override @SuppressWarnings("fallthrough")
- public void startElement(String name, Map<String, String> attrs, boolean selfClosing) {
- int line = getLineNumber();
- switch (name) {
- case "a":
- String nameAttr = html5 ? null : attrs.get("name");
- if (nameAttr != null) {
- foundAnchor(line, nameAttr);
- }
- // fallthrough
- case "link":
- String href = attrs.get("href");
- if (href != null) {
- foundReference(line, href);
- }
- break;
- }
-
- String idAttr = attrs.get("id");
- if (idAttr != null) {
- foundAnchor(line, idAttr);
- }
- }
-
- @Override
- public void endElement(String name) { }
-
- private void foundAnchor(int line, String name) {
- currTable.addID(line, name);
- }
-
- private void foundReference(int line, String ref) {
- links++;
- try {
- URI uri = new URI(ref);
- if (uri.isAbsolute()) {
- foundReference(line, uri);
- } else {
- Path p;
- String uriPath = uri.getPath();
- if (uriPath == null || uriPath.isEmpty()) {
- p = currFile;
- } else {
- p = currFile.getParent().resolve(uriPath).normalize();
- }
- foundReference(line, p, uri.getFragment());
- }
- } catch (URISyntaxException e) {
- error(currFile, line, "invalid URI: " + e);
- }
- }
-
- private void foundReference(int line, Path p, String fragment) {
- IDTable t = allFiles.computeIfAbsent(p, key -> new IDTable(key));
- t.addReference(fragment, currFile, line);
- }
-
- private void foundReference(int line, URI uri) {
- if (!isSchemeOK(uri.getScheme())) {
- error(currFile, line, "bad scheme in URI");
- badSchemes++;
- }
-
- String fragment = uri.getFragment();
- try {
- URI noFrag = new URI(uri.toString().replaceAll("#\\Q" + fragment + "\\E$", ""));
- IDTable t = allURIs.computeIfAbsent(noFrag, key -> new IDTable(key.toString()));
- t.addReference(fragment, currFile, line);
- } catch (URISyntaxException e) {
- throw new Error(e);
- }
- }
-
- private boolean isSchemeOK(String uriScheme) {
- if (uriScheme == null) {
- return true;
- }
-
- switch (uriScheme) {
- case "file":
- case "ftp":
- case "http":
- case "https":
- case "javascript":
- case "mailto":
- return true;
-
- default:
- return false;
- }
- }
-
- private List<Path> getMissingFiles() {
- return allFiles.entrySet().stream()
- .filter(e -> !Files.exists(e.getKey()))
- .map(e -> e.getKey())
- .collect(Collectors.toList());
- }
-
- @Override
- protected void error(Path file, int lineNumber, String message) {
- super.error(relativePath(file), lineNumber, message);
- errors++;
- }
-
- @Override
- protected void error(Path file, int lineNumber, Throwable t) {
- super.error(relativePath(file), lineNumber, t);
- errors++;
- }
-
- private Path relativePath(Path path) {
- return path.startsWith(currDir) ? currDir.relativize(path) : path;
- }
-
- /**
- * A position in a file, as identified by a file name and line number.
- */
- static class Position implements Comparable<Position> {
- Path path;
- int line;
-
- Position(Path path, int line) {
- this.path = path;
- this.line = line;
- }
-
- @Override
- public int compareTo(Position o) {
- int v = path.compareTo(o.path);
- return v != 0 ? v : Integer.compare(line, o.line);
- }
-
- @Override
- public boolean equals(Object obj) {
- if (this == obj) {
- return true;
- } else if (obj == null || getClass() != obj.getClass()) {
- return false;
- } else {
- final Position other = (Position) obj;
- return Objects.equals(this.path, other.path)
- && this.line == other.line;
- }
- }
-
- @Override
- public int hashCode() {
- return Objects.hashCode(path) * 37 + line;
- }
- }
-
- /**
- * Infor for an ID within an HTML file, and a set of positions that reference it.
- */
- static class ID {
- boolean declared;
- Set<Position> references;
-
- Set<Position> getReferences() {
- return (references) == null ? Collections.emptySet() : references;
- }
- }
-
- /**
- * A table for the set of IDs in an HTML file.
- */
- class IDTable {
- private String name;
- private boolean checked;
- private final Map<String, ID> map = new HashMap<>();
-
- IDTable(Path p) {
- this(relativePath(p).toString());
- }
-
- IDTable(String name) {
- this.name = name;
- }
-
- void addID(int line, String name) {
- if (checked) {
- throw new IllegalStateException("Adding ID after file has been read");
- }
- Objects.requireNonNull(name);
- ID id = map.computeIfAbsent(name, x -> new ID());
- if (id.declared) {
- error(currFile, line, "name already declared: " + name);
- duplicateIds++;
- } else {
- id.declared = true;
- }
- }
-
- void addReference(String name, Path from, int line) {
- if (checked) {
- if (name != null) {
- ID id = map.get(name);
- if (id == null || !id.declared) {
- error(from, line, "id not found: " + this.name + "#" + name);
- }
- }
- } else {
- ID id = map.computeIfAbsent(name, x -> new ID());
- if (id.references == null) {
- id.references = new TreeSet<>();
- }
- id.references.add(new Position(from, line));
- }
- }
-
- void check() {
- map.forEach((name, id) -> {
- if (name != null && !id.declared) {
- //log.error(currFile, 0, "id not declared: " + name);
- for (Position ref : id.references) {
- error(ref.path, ref.line, "id not found: " + this.name + "#" + name);
- }
- missingIds++;
- }
- });
- checked = true;
- }
- }
-
- static class URIComparator implements Comparator<URI> {
- final HostComparator hostComparator = new HostComparator();
-
- @Override
- public int compare(URI o1, URI o2) {
- if (o1.isOpaque() || o2.isOpaque()) {
- return o1.compareTo(o2);
- }
- String h1 = o1.getHost();
- String h2 = o2.getHost();
- String s1 = o1.getScheme();
- String s2 = o2.getScheme();
- if (h1 == null || h1.isEmpty() || s1 == null || s1.isEmpty()
- || h2 == null || h2.isEmpty() || s2 == null || s2.isEmpty()) {
- return o1.compareTo(o2);
- }
- int v = hostComparator.compare(h1, h2);
- if (v != 0) {
- return v;
- }
- v = s1.compareTo(s2);
- if (v != 0) {
- return v;
- }
- return o1.compareTo(o2);
- }
- }
-
- static class HostComparator implements Comparator<String> {
- @Override
- public int compare(String h1, String h2) {
- List<String> l1 = new ArrayList<>(Arrays.asList(h1.split("\\.")));
- Collections.reverse(l1);
- String r1 = String.join(".", l1);
- List<String> l2 = new ArrayList<>(Arrays.asList(h2.split("\\.")));
- Collections.reverse(l2);
- String r2 = String.join(".", l2);
- return r1.compareTo(r2);
- }
- }
-
- }
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/langtools/jdk/javadoc/lib/javadoc/tester/LinkChecker.java Tue Jan 15 14:18:44 2019 -0800
@@ -0,0 +1,424 @@
+/*
+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package javadoc.tester;
+
+import java.io.PrintStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+/**
+ * A class to check the links in a set of HTML files.
+ */
+public class LinkChecker extends HtmlChecker {
+
+ private final Map<Path, IDTable> allFiles;
+ private final Map<URI, IDTable> allURIs;
+
+ private int links;
+ private int duplicateIds;
+ private int missingIds;
+
+ private IDTable currTable;
+ private boolean html5;
+
+ LinkChecker(PrintStream out, Function<Path,String> fileReader) {
+ super(out, fileReader);
+ allFiles = new HashMap<>();
+ allURIs = new HashMap<>();
+ }
+
+ @Override
+ public void report() {
+ List<Path> missingFiles = getMissingFiles();
+ if (!missingFiles.isEmpty()) {
+ report("Missing files: (" + missingFiles.size() + ")");
+ missingFiles.stream()
+ .sorted()
+ .forEach(this::reportMissingFile);
+
+ }
+
+ if (!allURIs.isEmpty()) {
+ report(false, "External URLs:");
+ allURIs.keySet().stream()
+ .sorted(new URIComparator())
+ .forEach(uri -> report(false, " %s", uri.toString()));
+ }
+
+ int anchors = 0;
+ for (IDTable t : allFiles.values()) {
+ anchors += t.map.values().stream()
+ .filter(e -> !e.getReferences().isEmpty())
+ .count();
+ }
+ for (IDTable t : allURIs.values()) {
+ anchors += t.map.values().stream()
+ .filter(e -> !e.references.isEmpty())
+ .count();
+ }
+
+ report(false, "Checked " + files + " files.");
+ report(false, "Found " + links + " references to " + anchors + " anchors "
+ + "in " + allFiles.size() + " files and " + allURIs.size() + " other URIs.");
+ report(!missingFiles.isEmpty(), "%6d missing files", missingFiles.size());
+ report(duplicateIds > 0, "%6d duplicate ids", duplicateIds);
+ report(missingIds > 0, "%6d missing ids", missingIds);
+
+ Map<String, Integer> schemeCounts = new TreeMap<>();
+ Map<String, Integer> hostCounts = new TreeMap<>(new HostComparator());
+ for (URI uri : allURIs.keySet()) {
+ String scheme = uri.getScheme();
+ if (scheme != null) {
+ schemeCounts.put(scheme, schemeCounts.computeIfAbsent(scheme, s -> 0) + 1);
+ }
+ String host = uri.getHost();
+ if (host != null) {
+ hostCounts.put(host, hostCounts.computeIfAbsent(host, h -> 0) + 1);
+ }
+ }
+
+ if (schemeCounts.size() > 0) {
+ report(false, "Schemes");
+ schemeCounts.forEach((s, n) -> report(!isSchemeOK(s), "%6d %s", n, s));
+ }
+
+ if (hostCounts.size() > 0) {
+ report(false, "Hosts");
+ hostCounts.forEach((h, n) -> report(false, "%6d %s", n, h));
+ }
+ }
+
+ private void report(String message, Object... args) {
+ out.println(String.format(message, args));
+ }
+
+ private void report(boolean highlight, String message, Object... args) {
+ out.print(highlight ? "* " : " ");
+ out.println(String.format(message, args));
+ }
+
+ private void reportMissingFile(Path file) {
+ report("%s", relativePath(file));
+ IDTable table = allFiles.get(file);
+ Set<Path> refs = new TreeSet<>();
+ for (ID id : table.map.values()) {
+ if (id.references != null) {
+ for (Position p : id.references) {
+ refs.add(p.path);
+ }
+ }
+ }
+ int n = 0;
+ int MAX_REFS = 10;
+ for (Path ref : refs) {
+ report(" in " + relativePath(ref));
+ if (++n == MAX_REFS) {
+ report(" ... and %d more", refs.size() - n);
+ break;
+ }
+ }
+ }
+
+ @Override
+ public void startFile(Path path) {
+ currTable = allFiles.computeIfAbsent(currFile, p -> new IDTable(p));
+ html5 = false;
+ }
+
+ @Override
+ public void endFile() {
+ currTable.check();
+ }
+
+ @Override
+ public void docType(String doctype) {
+ html5 = doctype.matches("(?i)<\\?doctype\\s+html>");
+ }
+
+ @Override @SuppressWarnings("fallthrough")
+ public void startElement(String name, Map<String, String> attrs, boolean selfClosing) {
+ int line = getLineNumber();
+ switch (name) {
+ case "a":
+ String nameAttr = html5 ? null : attrs.get("name");
+ if (nameAttr != null) {
+ foundAnchor(line, nameAttr);
+ }
+ // fallthrough
+ case "link":
+ String href = attrs.get("href");
+ if (href != null) {
+ foundReference(line, href);
+ }
+ break;
+ }
+
+ String idAttr = attrs.get("id");
+ if (idAttr != null) {
+ foundAnchor(line, idAttr);
+ }
+ }
+
+ @Override
+ public void endElement(String name) { }
+
+ private void foundAnchor(int line, String name) {
+ currTable.addID(line, name);
+ }
+
+ private void foundReference(int line, String ref) {
+ links++;
+ try {
+ URI uri = new URI(ref);
+ if (uri.isAbsolute()) {
+ foundReference(line, uri);
+ } else {
+ Path p;
+ String uriPath = uri.getPath();
+ if (uriPath == null || uriPath.isEmpty()) {
+ p = currFile;
+ } else {
+ p = currFile.getParent().resolve(uriPath).normalize();
+ }
+ foundReference(line, p, uri.getFragment());
+ }
+ } catch (URISyntaxException e) {
+ error(currFile, line, "invalid URI: " + e);
+ }
+ }
+
+ private void foundReference(int line, Path p, String fragment) {
+ IDTable t = allFiles.computeIfAbsent(p, key -> new IDTable(key));
+ t.addReference(fragment, currFile, line);
+ }
+
+ private void foundReference(int line, URI uri) {
+ if (!isSchemeOK(uri.getScheme())) {
+ error(currFile, line, "bad scheme in URI");
+ }
+
+ String fragment = uri.getFragment();
+ try {
+ URI noFrag = new URI(uri.toString().replaceAll("#\\Q" + fragment + "\\E$", ""));
+ IDTable t = allURIs.computeIfAbsent(noFrag, key -> new IDTable(key.toString()));
+ t.addReference(fragment, currFile, line);
+ } catch (URISyntaxException e) {
+ throw new Error(e);
+ }
+ }
+
+ private boolean isSchemeOK(String uriScheme) {
+ if (uriScheme == null) {
+ return true;
+ }
+
+ switch (uriScheme) {
+ case "file":
+ case "ftp":
+ case "http":
+ case "https":
+ case "javascript":
+ case "mailto":
+ return true;
+
+ default:
+ return false;
+ }
+ }
+
+ private List<Path> getMissingFiles() {
+ return allFiles.entrySet().stream()
+ .filter(e -> !Files.exists(e.getKey()))
+ .map(e -> e.getKey())
+ .collect(Collectors.toList());
+ }
+
+ /**
+ * A position in a file, as identified by a file name and line number.
+ */
+ static class Position implements Comparable<Position> {
+ Path path;
+ int line;
+
+ Position(Path path, int line) {
+ this.path = path;
+ this.line = line;
+ }
+
+ @Override
+ public int compareTo(Position o) {
+ int v = path.compareTo(o.path);
+ return v != 0 ? v : Integer.compare(line, o.line);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ } else if (obj == null || getClass() != obj.getClass()) {
+ return false;
+ } else {
+ final Position other = (Position) obj;
+ return Objects.equals(this.path, other.path)
+ && this.line == other.line;
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hashCode(path) * 37 + line;
+ }
+ }
+
+ /**
+ * Infor for an ID within an HTML file, and a set of positions that reference it.
+ */
+ static class ID {
+ boolean declared;
+ Set<Position> references;
+
+ Set<Position> getReferences() {
+ return (references) == null ? Collections.emptySet() : references;
+ }
+ }
+
+ /**
+ * A table for the set of IDs in an HTML file.
+ */
+ class IDTable {
+ private String name;
+ private boolean checked;
+ private final Map<String, ID> map = new HashMap<>();
+
+ IDTable(Path p) {
+ this(relativePath(p).toString());
+ }
+
+ IDTable(String name) {
+ this.name = name;
+ }
+
+ void addID(int line, String name) {
+ if (checked) {
+ throw new IllegalStateException("Adding ID after file has been read");
+ }
+ Objects.requireNonNull(name);
+ ID id = map.computeIfAbsent(name, x -> new ID());
+ if (id.declared) {
+ error(currFile, line, "name already declared: " + name);
+ duplicateIds++;
+ } else {
+ id.declared = true;
+ }
+ }
+
+ void addReference(String name, Path from, int line) {
+ if (checked) {
+ if (name != null) {
+ ID id = map.get(name);
+ if (id == null || !id.declared) {
+ error(from, line, "id not found: " + this.name + "#" + name);
+ }
+ }
+ } else {
+ ID id = map.computeIfAbsent(name, x -> new ID());
+ if (id.references == null) {
+ id.references = new TreeSet<>();
+ }
+ id.references.add(new Position(from, line));
+ }
+ }
+
+ void check() {
+ map.forEach((name, id) -> {
+ if (name != null && !id.declared) {
+ //log.error(currFile, 0, "id not declared: " + name);
+ for (Position ref : id.references) {
+ error(ref.path, ref.line, "id not found: " + this.name + "#" + name);
+ }
+ missingIds++;
+ }
+ });
+ checked = true;
+ }
+ }
+
+ static class URIComparator implements Comparator<URI> {
+ final HostComparator hostComparator = new HostComparator();
+
+ @Override
+ public int compare(URI o1, URI o2) {
+ if (o1.isOpaque() || o2.isOpaque()) {
+ return o1.compareTo(o2);
+ }
+ String h1 = o1.getHost();
+ String h2 = o2.getHost();
+ String s1 = o1.getScheme();
+ String s2 = o2.getScheme();
+ if (h1 == null || h1.isEmpty() || s1 == null || s1.isEmpty()
+ || h2 == null || h2.isEmpty() || s2 == null || s2.isEmpty()) {
+ return o1.compareTo(o2);
+ }
+ int v = hostComparator.compare(h1, h2);
+ if (v != 0) {
+ return v;
+ }
+ v = s1.compareTo(s2);
+ if (v != 0) {
+ return v;
+ }
+ return o1.compareTo(o2);
+ }
+ }
+
+ static class HostComparator implements Comparator<String> {
+ @Override
+ public int compare(String h1, String h2) {
+ List<String> l1 = new ArrayList<>(Arrays.asList(h1.split("\\.")));
+ Collections.reverse(l1);
+ String r1 = String.join(".", l1);
+ List<String> l2 = new ArrayList<>(Arrays.asList(h2.split("\\.")));
+ Collections.reverse(l2);
+ String r2 = String.join(".", l2);
+ return r1.compareTo(r2);
+ }
+ }
+
+}