Add fixuppandoc/Main.java for JDK-8224257 JDK-8224257-branch
authorjjg
Mon, 20 May 2019 17:52:51 -0700
branchJDK-8224257-branch
changeset 57363 ac8238552c85
parent 57362 b836d0fda509
child 57365 40c87d67e9f2
Add fixuppandoc/Main.java for JDK-8224257
make/jdk/src/classes/build/tools/fixuppandoc/Main.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/jdk/src/classes/build/tools/fixuppandoc/Main.java	Mon May 20 17:52:51 2019 -0700
@@ -0,0 +1,915 @@
+package build.tools.fixuppandoc;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.io.Reader;
+import java.io.Writer;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+/**
+ * Fixup HTML generated by pandoc.
+ *
+ * <h2>{@code <html>}</h2>
+ *
+ * Replace the existing element with {@code <html lang="en">}, removing references to XML.
+ *
+ * <h2>{@code <main>}</h2>
+ *
+ * {@code <main>} is inserted if palpable content is found that is not with a
+ * section such as {@code header},  {@code footer},  {@code aside}.
+ *
+ * {@code </main>} is inserted if {@code <main>} was inserted and a section
+ * is started that should not be included in the main section.
+ *
+ * <h2>Tables: row headings</h2>
+ *
+ * {@code scope="row"} is added to the {@code <td>} elements in the first
+ * column whose cell contents are all different and therefore which can be
+ * used to identify the row. In case of ambiguity, a column containing
+ * a {@code <th>} whose contents begin <em>name</em> is preferred.
+ *
+ *
+ * <h2>{@code <meta name="generator">}</h2>
+ *
+ * Update the content string, to indicate it has been processed by this program.
+ *
+ */
+public class Main {
+    /**
+     * Runs the program.
+     *
+     * <pre>
+     *     java build.tools.fixuphtml.Main [-o output-file] [input-file]
+     * </pre>
+     *
+     * If no input file is specified, the program will read from standard input.
+     * If no output file is specified, the program will write to standard output.
+     * Any error messages will be written to the standard error stream.
+     *
+     * @param args the command-line arguments
+     */
+    public static void main(String... args) {
+        try {
+            new Main().run(args);
+        } catch (IOException | IllegalArgumentException e) {
+            System.err.println(e);
+            System.exit(1);
+        } catch (Throwable t) {
+            t.printStackTrace(System.err);
+            System.exit(1);
+        }
+    }
+
+    private void run(String... args) throws IOException {
+        Path inFile = null;
+        Path outFile = null;
+
+        for (int i = 0; i < args.length; i++) {
+            String arg = args[i];
+            if (arg.equals("-o") && i + 1 < args.length) {
+                outFile = Path.of(args[++i]);
+            } else if (arg.startsWith("-")) {
+                throw new IllegalArgumentException(arg);
+            } else if (inFile == null) {
+                inFile = Path.of(arg);
+            } else {
+                throw new IllegalArgumentException(arg);
+            }
+        }
+
+        new Fixup().run(inFile, outFile);
+    }
+
+    /**
+     * A class to read HTML, copying input to output, modifying
+     * fragments as needed.
+     */
+    class Fixup extends HtmlParser {
+        /** The output stream. */
+        PrintWriter out;
+
+        /** A stream for reporting errors. */
+        PrintStream err = System.err;
+
+        /**
+         * Flag to indicate when {@code <main>} is permitted around palpable content.
+         * Set within {@code <body>}; disabled within elements in which {@code <main>}
+         * is not permitted.
+         */
+        boolean allowMain = false;
+
+        /**
+         * Flag to indicate that {@code <main>} is required.
+         * Set on {@code <body>}; reset when {@code <main>} is either found or generated.
+         */
+        boolean needMain = false;
+
+        /**
+         * Flag to indicate that {@code </main>} is required.
+         * Set if {@code <main>} is generated.
+         * Reset when a start or end element is found that requires that {@code </main>}
+         * needs to be generated if necessary.
+         */
+        boolean needEndMain = false;
+
+        /**
+         * Handler for {@code <table>} elements.
+         */
+        Table table;
+
+        /**
+         * Run the program, copying an input file to an output file.
+         * If the input file is {@code null}, input is read from the standard input.
+         * If the output file is {@code null}, output is written to the standard output.
+         *
+         * @param inFile the input file
+         * @param outFile the output file
+         * @throws IOException if an IO error occurs
+         */
+        void run(Path inFile, Path outFile) throws IOException {
+            try (Writer out = openWriter(outFile)) {
+                this.out = new PrintWriter(out);
+                if (inFile != null) {
+                    read(inFile);
+                } else {
+                    read(new BufferedReader(new InputStreamReader(System.in)));
+                }
+            }
+        }
+
+        /**
+         * Returns a writer for a file, or for the standard output if the file is {@code null}.
+         *
+         * @param file the file
+         * @return the writer
+         * @throws IOException if an IO error occurs
+         */
+        private Writer openWriter(Path file) throws IOException {
+            if (file != null) {
+                return Files.newBufferedWriter(file);
+            } else {
+                return new BufferedWriter(new OutputStreamWriter(System.out) {
+                    @Override
+                    public void close() throws IOException {
+                        flush();
+                    }
+                });
+            }
+        }
+
+        @Override
+        protected void error(Path file, int lineNumber, String message) {
+            err.print(file == null ? "<stdin>" : file);
+            if (lineNumber > 0) {
+                err.print(":");
+                err.print(lineNumber);
+            }
+            err.print(": ");
+            err.println(message);
+        }
+
+        @Override
+        protected void error(Path file, int lineNumber, Throwable t) {
+            error(file, lineNumber, t.toString());
+            t.printStackTrace(err);
+        }
+
+        /**
+         * The buffer in which input is stored until an appropriate action can be determined.
+         * Using the buffer ensures that the output exactly matches the input, except where
+         * it is intentionally modified.
+         */
+        private StringBuilder buffer = new StringBuilder();
+
+        @Override
+        public int nextChar() throws IOException {
+            if (ch > 0) {
+                buffer.append((char) ch);
+            }
+            return super.nextChar();
+        }
+
+        @Override
+        protected void doctype(String s) {
+            flushBuffer();
+        }
+
+        @Override
+        protected void startElement(String name, Map<String,String> attrs, boolean selfClosing) {
+            switch (name) {
+                case "html":
+                    // replace the existing <html> fragment
+                    out.write("<html lang=\"en\">");
+                    buffer.setLength(0);
+                    break;
+
+                case "meta":
+                    // update the meta-data for the generator
+                    if (Objects.equals(attrs.get("name"), "generator")) {
+                        out.write(buffer.toString()
+                                .replaceAll("(content=\"[^\"]*)(\")", "$1,fixuphtml$2"));
+                        buffer.setLength(0);
+                    }
+                    break;
+
+                case "article":
+                case "aside":
+                case "footer":
+                case "header":
+                case "nav":
+                    // starting one of these elements will terminate <main> if one is being
+                    // inserted
+                    if (needEndMain) {
+                        out.write("</main>");
+                        needEndMain = false;
+                    }
+                    // <main> is not permitted within these elements
+                    allowMain = false;
+                    break;
+
+                case "body":
+                    // within <body>, <main> is both permitted and required
+                    allowMain = true;
+                    needMain = true;
+                    break;
+
+                case "main":
+                    // an explicit <main> found in the input; no need to add one
+                    needMain = false;
+                    break;
+
+                case "table":
+                    // The entire content of a <table> is buffered, until it can be
+                    // determined in which column of the table contains the cells
+                    // that can be used to identify the row.
+                    if (table == null) {
+                        table = new Table();
+                    } else {
+                        // tables containing nested tables are not updated
+                        table.simple = false;
+                    }
+                    table.nestDepth++;
+                    break;
+
+                case "thead":
+                case "tbody":
+                    if (table != null) {
+                        table.endCell();
+                    }
+                    break;
+
+                case "tr":
+                    if (table != null) {
+                        table.endCell();
+                        table.nextCellColumnIndex = 0;
+                    }
+                    break;
+
+                case "td":
+                case "th":
+                    if (table != null) {
+                        if (attrs.containsKey("rowspan")
+                                || attrs.containsKey("colspan")
+                                || attrs.containsKey("scope")) {
+                            // tables containing spanning cells and tables that already
+                            // contain scope attributes are not updated
+                            table.simple = false;
+                        }
+                        table.startCell(name);
+                    }
+                    break;
+            }
+
+            // by default, the content is deemed to be palpable content, and so
+            // insert <main> if it is permitted and one is still required,
+            // while also ensuring that it does not appear before <body>
+            if (allowMain && needMain && !name.equals("body")) {
+                out.write("<main>");
+                needMain = false;
+                needEndMain = true;
+            }
+
+            flushBuffer();
+        }
+
+        @Override
+        protected void endElement(String name) {
+            switch (name) {
+                case "article":
+                case "aside":
+                case "footer":
+                case "header":
+                case "nav":
+                    // The code does not handle nested elements of these kinds, but could.
+                    // So, assuming they are not nested, ending these elements implies
+                    // that <main> is once again permitted.
+                    allowMain = true;
+                    break;
+
+                case "body":
+                    // The document is nearly done; insert <main> if needed
+                    if (needEndMain) {
+                        out.write("</main>");
+                        needEndMain = false;
+                    }
+                    break;
+
+                case "table":
+                    // if the table is finished, analyze it and write it out
+                    if (table != null) {
+                        if (--table.nestDepth == 0) {
+                            table.add(buffer.toString());
+                            table.write(out);
+                            table = null;
+                            buffer.setLength(0);
+                        }
+                    }
+                    break;
+
+                case "thead":
+                case "tbody":
+                case "tr":
+                case "td":
+                case "th":
+                    // ending any of these elements implicity or explicitly ends the
+                    // current cell
+                    table.endCell();
+                    break;
+
+            }
+            flushBuffer();
+        }
+
+        @Override
+        protected void content(String content) {
+            if (table != null) {
+                table.content(content);
+            } else if (allowMain && needMain && !content.isBlank()) {
+                // insert <main> if required and if we have palpable content
+                out.write("<main>");
+                needMain = false;
+                needEndMain = true;
+            }
+            flushBuffer();
+        }
+
+        @Override
+        protected void comment(String comment) {
+            flushBuffer();
+        }
+
+        /**
+         * Flushes the buffer, either by adding it into a table, if one is
+         * in progress, or by writing it out.
+         */
+        private void flushBuffer() {
+            String s = buffer.toString();
+            if (table != null) {
+                table.add(s);
+            } else {
+                out.write(s);
+            }
+            buffer.setLength(0);
+
+        }
+    }
+
+    /**
+     * Storage for the content of a {@code <table>} element} until we can determine
+     * whether we should add {@code scope="row"} to the cells in a given column,
+     * and if so, which column.
+     *
+     * The column with the highest number of unique entries is selected;
+     * in case of ambiguity, a column whose heading begins "name" is chosen.
+     *
+     * Only "simple" tables are supported. Tables with any of the following
+     * features are not considered "simple" and will not be modified:
+     * <ul>
+     *     <li>Tables containing nested tables</li>
+     *     <li>Tables containing cells that use "rowspan" and "colspan" attributes</li>
+     *     <li>Tables containing cells that already use "scope" attributes</li>
+     * </ul>
+     */
+    class Table {
+        /**
+         * A fragment of HTML in this table.
+         */
+        class Entry {
+            /** The fragment. */
+            final String html;
+            /** The column for a {@code <td>} fragment, or -1. */
+            final int column;
+
+            Entry(String html, int column) {
+                this.html = html;
+                this.column = column;
+            }
+        }
+
+        /** Whether or not this is a "simple" table. */
+        boolean simple = true;
+
+        /** The nesting depth of the current table, within enclosing tables. */
+        int nestDepth;
+
+        /** A list of the HTML fragments that make up this table. */
+        List<Entry> entries;
+
+        /** The plain text contents of each column, used to determine the primary column. */
+        List<Set<String>> columnContents;
+
+        /** The column index of the next cell to be found. */
+        int nextCellColumnIndex;
+
+        /** A flag to mark the start of a {@code <td>} cell. */
+        boolean startTDCell;
+
+        /** The column index of the current cell, or -1 if not in a cell. */
+        int currCellColumnIndex;
+
+        /** The plain text contents of the current column. */
+        Set<String> currColumnContents;
+
+        /** The plain text content of the current cell. */
+        StringBuilder currCellContent;
+
+        /** The kind ({@code th} or {@code td}) of the current cell. */
+        String currCellKind;
+
+        /**
+         * The index of the column, if any, containing a heading beginning "name".
+         * This column is given preferential treatment when deciding the primary column.
+         */
+        int nameColumn;
+
+        Table() {
+            entries = new ArrayList<>();
+            columnContents = new ArrayList<>();
+        }
+
+        void startCell(String name) {
+            endCell();
+            startTDCell = name.equals("td");
+            currCellColumnIndex = nextCellColumnIndex++;
+            currColumnContents = getColumn(currCellColumnIndex);
+            currCellContent = new StringBuilder();
+            currCellKind = name;
+        }
+
+        void endCell() {
+            if (currCellContent != null) {
+                String c = currCellContent.toString().trim();
+                if (Objects.equals(currCellKind, "th")
+                        && c.toLowerCase(Locale.US).startsWith("name")) {
+                    nameColumn = currCellColumnIndex;
+                }
+                currColumnContents.add(c);
+                currCellContent = null;
+                currCellColumnIndex = -1;
+                currColumnContents = null;
+            }
+        }
+
+        void content(String content) {
+            if (currCellContent != null) {
+                currCellContent.append(content);
+            }
+        }
+
+        void add(String html) {
+            int index = startTDCell ? currCellColumnIndex : -1;
+            entries.add(new Entry(html, index));
+            startTDCell = false;
+        }
+
+        void write(PrintWriter out) {
+            int max = -1;
+            int maxIndex = -1;
+            int index = 0;
+            for (Set<String> c : columnContents) {
+                if (c.size() > max || c.size() == max && index == nameColumn) {
+                    max = c.size();
+                    maxIndex = index;
+                }
+                index++;
+            }
+            for (Entry e : entries) {
+                if (simple && e.column == maxIndex) {
+                    out.write(e.html.substring(0, e.html.length() - 1));
+                    out.write(" scope=\"row\">");
+                } else {
+                    out.write(e.html);
+                }
+            }
+        }
+
+        private Set<String> getColumn(int index) {
+            while (columnContents.size() <= index) {
+                columnContents.add(new LinkedHashSet<>());
+            }
+
+            return columnContents.get(index);
+        }
+    }
+
+    /**
+     * A basic HTML parser.
+     * Override the protected methods as needed to get notified of significant items
+     * in any file that is read.
+     */
+    abstract class HtmlParser {
+
+        private Path file;
+        private Reader in;
+        protected int ch;
+        private int lineNumber;
+        private boolean inScript;
+        private boolean xml;
+
+        /**
+         * Read a file.
+         * @param file the file
+         */
+        void read(Path file) {
+            try (Reader r = Files.newBufferedReader(file)) {
+                this.file = file;
+                read(r);
+            } catch (IOException e) {
+                error(file, -1, e);
+            }
+        }
+
+        HtmlParser() { }
+
+        /**
+         * Read a stream.
+         * @param r the stream
+         */
+        void read(Reader r) {
+            try {
+                this.in = r;
+                StringBuilder content = new StringBuilder();
+
+                startFile(file);
+                try {
+                    lineNumber = 1;
+                    xml = false;
+                    nextChar();
+
+                    while (ch != -1) {
+                        if (ch == '<') {
+                            content(content.toString());
+                            content.setLength(0);
+                            html();
+                        } else {
+                            content.append((char) ch);
+                            if (ch == '\n') {
+                                content(content.toString());
+                                content.setLength(0);
+                            }
+                            nextChar();
+                        }
+                    }
+                } finally {
+                    endFile();
+                }
+            } catch (IOException e) {
+                error(file, lineNumber, e);
+            } catch (Throwable t) {
+                error(file, lineNumber, t);
+                t.printStackTrace(System.err);
+            }
+        }
+
+        protected int getLineNumber() {
+            return lineNumber;
+        }
+
+        /**
+         * Called when a file has been opened, before parsing begins.
+         * This is always the first notification when reading a file.
+         * This implementation does nothing.
+         *
+         * @param file the file
+         */
+        protected void startFile(Path file) { }
+
+        /**
+         * Called when the parser has finished reading a file.
+         * This is always the last notification when reading a file,
+         * unless any errors occur while closing the file.
+         * This implementation does nothing.
+         */
+        protected void endFile() { }
+
+        /**
+         * Called when a doctype declaration is found, at the beginning of the file.
+         * This implementation does nothing.
+         * @param s the doctype declaration
+         */
+        protected void doctype(String s) { }
+
+        /**
+         * Called when the opening tag of an HTML element is encountered.
+         * This implementation does nothing.
+         * @param name the name of the tag
+         * @param attrs the attribute
+         * @param selfClosing whether or not this is a self-closing tag
+         */
+        protected void startElement(String name, Map<String,String> attrs, boolean selfClosing) { }
+
+        /**
+         * Called when the closing tag of an HTML tag is encountered.
+         * This implementation does nothing.
+         * @param name the name of the tag
+         */
+        protected void endElement(String name) { }
+
+        /**
+         * Called for sequences of character content.
+         * @param content the character content
+         */
+        protected void content(String content) { }
+
+        /**
+         * Called for sequences of comment.
+         * @param comment the comment
+         */
+        protected void comment(String comment) { }
+
+        /**
+         * Called when an error has been encountered.
+         * @param file the file being read
+         * @param lineNumber the line number of line containing the error
+         * @param message a description of the error
+         */
+        protected abstract void error(Path file, int lineNumber, String message);
+
+        /**
+         * Called when an exception has been encountered.
+         * @param file the file being read
+         * @param lineNumber the line number of the line being read when the exception was found
+         * @param t the exception
+         */
+        protected abstract void error(Path file, int lineNumber, Throwable t);
+
+        protected int nextChar() throws IOException {
+            ch = in.read();
+            if (ch == '\n')
+                lineNumber++;
+            return ch;
+        }
+
+        /**
+         * Read the start or end of an HTML tag, or an HTML comment
+         * {@literal <identifier attrs> } or {@literal </identifier> }
+         * @throws java.io.IOException if there is a problem reading the file
+         */
+        protected void html() throws IOException {
+            nextChar();
+            if (isIdentifierStart((char) ch)) {
+                String name = readIdentifier().toLowerCase(Locale.US);
+                Map<String,String> attrs = htmlAttrs();
+                if (attrs != null) {
+                    boolean selfClosing = false;
+                    if (ch == '/') {
+                        nextChar();
+                        selfClosing = true;
+                    }
+                    if (ch == '>') {
+                        nextChar();
+                        startElement(name, attrs, selfClosing);
+                        if (name.equals("script")) {
+                            inScript = true;
+                        }
+                        return;
+                    }
+                }
+            } else if (ch == '/') {
+                nextChar();
+                if (isIdentifierStart((char) ch)) {
+                    String name = readIdentifier().toLowerCase(Locale.US);
+                    skipWhitespace();
+                    if (ch == '>') {
+                        nextChar();
+                        endElement(name);
+                        if (name.equals("script")) {
+                            inScript = false;
+                        }
+                        return;
+                    }
+                }
+            } else if (ch == '!') {
+                nextChar();
+                if (ch == '-') {
+                    nextChar();
+                    if (ch == '-') {
+                        nextChar();
+                        StringBuilder comment = new StringBuilder();
+                        while (ch != -1) {
+                            int dash = 0;
+                            while (ch == '-') {
+                                dash++;
+                                comment.append(ch);
+                                nextChar();
+                            }
+                            // Strictly speaking, a comment should not contain "--"
+                            // so dash > 2 is an error, dash == 2 implies ch == '>'
+                            // See http://www.w3.org/TR/html-markup/syntax.html#syntax-comments
+                            // for more details.
+                            if (dash >= 2 && ch == '>') {
+                                comment.setLength(comment.length() - 2);
+                                comment(comment.toString());
+                                nextChar();
+                                return;
+                            }
+
+                            comment.append(ch);
+                            nextChar();
+                        }
+                    }
+                } else if (ch == '[') {
+                    nextChar();
+                    if (ch == 'C') {
+                        nextChar();
+                        if (ch == 'D') {
+                            nextChar();
+                            if (ch == 'A') {
+                                nextChar();
+                                if (ch == 'T') {
+                                    nextChar();
+                                    if (ch == 'A') {
+                                        nextChar();
+                                        if (ch == '[') {
+                                            while (true) {
+                                                nextChar();
+                                                if (ch == ']') {
+                                                    nextChar();
+                                                    if (ch == ']') {
+                                                        nextChar();
+                                                        if (ch == '>') {
+                                                            nextChar();
+                                                            return;
+                                                        }
+                                                    }
+                                                }
+                                            }
+
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                } else {
+                    StringBuilder sb = new StringBuilder();
+                    while (ch != -1 && ch != '>') {
+                        sb.append((char) ch);
+                        nextChar();
+                    }
+                    Pattern p = Pattern.compile("(?is)doctype\\s+html\\s?.*");
+                    String s = sb.toString();
+                    if (p.matcher(s).matches()) {
+                        doctype(s);
+                        return;
+                    }
+                }
+            } else if (ch == '?') {
+                nextChar();
+                if (ch == 'x') {
+                    nextChar();
+                    if (ch == 'm') {
+                        nextChar();
+                        if (ch == 'l') {
+                            Map<String,String> attrs = htmlAttrs();
+                            if (ch == '?') {
+                                nextChar();
+                                if (ch == '>') {
+                                    nextChar();
+                                    xml = true;
+                                    return;
+                                }
+                            }
+                        }
+                    }
+
+                }
+            }
+
+            if (!inScript) {
+                error(file, lineNumber, "bad html");
+            }
+        }
+
+        /**
+         * Read a series of HTML attributes, terminated by {@literal > }.
+         * Each attribute is of the form {@literal identifier[=value] }.
+         * "value" may be unquoted, single-quoted, or double-quoted.
+         */
+        private Map<String,String> htmlAttrs() throws IOException {
+            Map<String, String> map = new LinkedHashMap<>();
+            skipWhitespace();
+
+            while (isIdentifierStart((char) ch)) {
+                String name = readAttributeName().toLowerCase(Locale.US);
+                skipWhitespace();
+                String value = null;
+                if (ch == '=') {
+                    nextChar();
+                    skipWhitespace();
+                    if (ch == '\'' || ch == '"') {
+                        char quote = (char) ch;
+                        nextChar();
+                        StringBuilder sb = new StringBuilder();
+                        while (ch != -1 && ch != quote) {
+                            sb.append((char) ch);
+                            nextChar();
+                        }
+                        value = sb.toString() // hack to replace common entities
+                                .replace("&lt;", "<")
+                                .replace("&gt;", ">")
+                                .replace("&amp;", "&");
+                        nextChar();
+                    } else {
+                        StringBuilder sb = new StringBuilder();
+                        while (ch != -1 && !isUnquotedAttrValueTerminator((char) ch)) {
+                            sb.append((char) ch);
+                            nextChar();
+                        }
+                        value = sb.toString();
+                    }
+                    skipWhitespace();
+                }
+                map.put(name, value);
+            }
+
+            return map;
+        }
+
+        private boolean isIdentifierStart(char ch) {
+            return Character.isUnicodeIdentifierStart(ch);
+        }
+
+        private String readIdentifier() throws IOException {
+            StringBuilder sb = new StringBuilder();
+            sb.append((char) ch);
+            nextChar();
+            while (ch != -1 && Character.isUnicodeIdentifierPart(ch)) {
+                sb.append((char) ch);
+                nextChar();
+            }
+            return sb.toString();
+        }
+
+        private String readAttributeName() throws IOException {
+            StringBuilder sb = new StringBuilder();
+            sb.append((char) ch);
+            nextChar();
+            while (ch != -1 && Character.isUnicodeIdentifierPart(ch)
+                    || ch == '-'
+                    || (xml || sb.toString().startsWith("xml")) && ch == ':') {
+                sb.append((char) ch);
+                nextChar();
+            }
+            return sb.toString();
+        }
+
+        private boolean isWhitespace(char ch) {
+            return Character.isWhitespace(ch);
+        }
+
+        private void skipWhitespace() throws IOException {
+            while (isWhitespace((char) ch)) {
+                nextChar();
+            }
+        }
+
+        private boolean isUnquotedAttrValueTerminator(char ch) {
+            switch (ch) {
+                case '\f': case '\n': case '\r': case '\t':
+                case ' ':
+                case '"': case '\'': case '`':
+                case '=': case '<': case '>':
+                    return true;
+                default:
+                    return false;
+            }
+        }
+    }
+
+}