langtools/test/tools/doclint/tidy/util/Main.java
author vasya
Mon, 14 Dec 2015 20:18:19 +0100
changeset 34752 9c262a013456
parent 20263 e7ce67bbe293
child 43028 7af3f09fcd6c
permissions -rw-r--r--
8145342: Some copyright notices are inconsistently and ill formatted Summary: Adjusted formatting of copyright notices. Reviewed-by: darcy, alundblad

/*
 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package tidystats;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Generate statistics from the files generated by tidy.sh.
 *
 * <p>The tidy.sh script is used to run tidy on all the HTML files
 * in a directory, creating files in a new directory, and for each
 * HTML file, it writes the console output from tidy into a file
 * beside the fixed up file, with an additional .tidy extension.
 *
 * <p>This program will scan a directory for *.tidy files and
 * analyze the messages reported by tidy, in order to generate a
 * report with statistics on the various messages that were
 * reported by tidy.
 *
 * <p>Typical usage:
 * <pre>
 * $ bash /path/to/tidy.sh /path/to/htmldir
 * $ javac -d /path/to/classes /path/to/Main.java
 * $ java -cp /path/to/classes tidystats.Main /path/to/htmldir.tidy
 * </pre>
 *
 * <p>Internally, the program works by matching lines in the *.tidy
 * files against a series of regular expressions that are used to
 * categorize the messages.  The set of regular expressions was
 * empirically determined by running the program on the output from
 * running tidy.sh on all the generated JDK documentation. It is
 * possible that tidy may generate more/different messages on other
 * doc sets, in which case, the set of regexes in the program should
 * be updated.
 */
public class Main {
    public static void main(String... args) throws IOException {
        new Main().run(args);
    }

    void run(String... args) throws IOException {
        FileSystem fs = FileSystems.getDefault();
        List<Path> paths = new ArrayList<>();

        int i;
        for (i = 0; i < args.length; i++) {
            String arg = args[i];
            if (arg.startsWith("-"))
                throw new IllegalArgumentException(arg);
            else
                break;
        }

        for ( ; i < args.length; i++) {
            Path p = fs.getPath(args[i]);
            paths.add(p);
        }

        for (Path p: paths) {
            scan(p);
        }

        print("%6d files read", files);
        print("%6d files had no errors or warnings", ok);
        print("%6d files reported \"Not all warnings/errors were shown.\"", overflow);
        print("%6d errors found", errs);
        print("%6d warnings found", warns);
        print("%6d recommendations to use CSS", css);
        print("");

        Map<Integer, Set<String>> sortedCounts = new TreeMap<>(
                new Comparator<Integer>() {
                    @Override
                    public int compare(Integer o1, Integer o2) {
                        return o2.compareTo(o1);
                    }
                });

        for (Map.Entry<Pattern, Integer> e: counts.entrySet()) {
            Pattern p = e.getKey();
            Integer n = e.getValue();
            Set<String> set = sortedCounts.get(n);
            if (set == null)
                sortedCounts.put(n, (set = new TreeSet<>()));
            set.add(p.toString());
        }

        for (Map.Entry<Integer, Set<String>> e: sortedCounts.entrySet()) {
            for (String p: e.getValue()) {
                if (p.startsWith(".*")) p = p.substring(2);
                print("%6d: %s", e.getKey(), p);
            }
        }
    }

    void scan(Path p) throws IOException {
        if (Files.isDirectory(p)) {
            for (Path c: Files.newDirectoryStream(p)) {
                scan(c);
            }
        } else if (isTidyFile(p)) {
            scan(Files.readAllLines(p, Charset.defaultCharset()));
        }
    }

    boolean isTidyFile(Path p) {
        return Files.isRegularFile(p) && p.getFileName().toString().endsWith(".tidy");
    }

    void scan(List<String> lines) {
        Matcher m;
        files++;
        for (String line: lines) {
            if (okPattern.matcher(line).matches()) {
                ok++;
            } else if ((m = countPattern.matcher(line)).matches()) {
                warns += Integer.valueOf(m.group(1));
                errs += Integer.valueOf(m.group(2));
                if (m.group(3) != null)
                    overflow++;
            } else if ((m = guardPattern.matcher(line)).matches()) {
                boolean found = false;
                for (Pattern p: patterns) {
                    if ((m = p.matcher(line)).matches()) {
                        found = true;
                        count(p);
                        break;
                    }
                }
                if (!found)
                    System.err.println("Unrecognized line: " + line);
            } else if (cssPattern.matcher(line).matches()) {
                css++;
            }
        }
    }

    Map<Pattern, Integer> counts = new HashMap<>();
    void count(Pattern p) {
        Integer i = counts.get(p);
        counts.put(p, (i == null) ? 1 : i + 1);
    }

    void print(String format, Object... args) {
        System.out.println(String.format(format, args));
    }

    Pattern okPattern = Pattern.compile("No warnings or errors were found.");
    Pattern countPattern = Pattern.compile("([0-9]+) warnings, ([0-9]+) errors were found!.*?(Not all warnings/errors were shown.)?");
    Pattern cssPattern = Pattern.compile("You are recommended to use CSS.*");
    Pattern guardPattern = Pattern.compile("line [0-9]+ column [0-9]+ - (Error|Warning):.*");

    Pattern[] patterns = {
        Pattern.compile(".*Error: <.*> is not recognized!"),
        Pattern.compile(".*Error: missing quote mark for attribute value"),
        Pattern.compile(".*Warning: <.*> anchor \".*\" already defined"),
        Pattern.compile(".*Warning: <.*> attribute \".*\" has invalid value \".*\""),
        Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"),
        Pattern.compile(".*Warning: <.*> attribute \".*\" lacks value"),
        Pattern.compile(".*Warning: <.*> attribute with missing trailing quote mark"),
        Pattern.compile(".*Warning: <.*> dropping value \".*\" for repeated attribute \".*\""),
        Pattern.compile(".*Warning: <.*> inserting \".*\" attribute"),
        Pattern.compile(".*Warning: <.*> is probably intended as </.*>"),
        Pattern.compile(".*Warning: <.*> isn't allowed in <.*> elements"),
        Pattern.compile(".*Warning: <.*> lacks \".*\" attribute"),
        Pattern.compile(".*Warning: <.*> missing '>' for end of tag"),
        Pattern.compile(".*Warning: <.*> proprietary attribute \".*\""),
        Pattern.compile(".*Warning: <.*> unexpected or duplicate quote mark"),
        Pattern.compile(".*Warning: <a> cannot copy name attribute to id"),
        Pattern.compile(".*Warning: <a> escaping malformed URI reference"),
        Pattern.compile(".*Warning: <blockquote> proprietary attribute \"pre\""),
        Pattern.compile(".*Warning: discarding unexpected <.*>"),
        Pattern.compile(".*Warning: discarding unexpected </.*>"),
        Pattern.compile(".*Warning: entity \".*\" doesn't end in ';'"),
        Pattern.compile(".*Warning: inserting implicit <.*>"),
        Pattern.compile(".*Warning: inserting missing 'title' element"),
        Pattern.compile(".*Warning: missing <!DOCTYPE> declaration"),
        Pattern.compile(".*Warning: missing <.*>"),
        Pattern.compile(".*Warning: missing </.*> before <.*>"),
        Pattern.compile(".*Warning: nested emphasis <.*>"),
        Pattern.compile(".*Warning: plain text isn't allowed in <.*> elements"),
        Pattern.compile(".*Warning: replacing <p> by <br>"),
        Pattern.compile(".*Warning: replacing invalid numeric character reference .*"),
        Pattern.compile(".*Warning: replacing unexpected .* by </.*>"),
        Pattern.compile(".*Warning: trimming empty <.*>"),
        Pattern.compile(".*Warning: unescaped & or unknown entity \".*\""),
        Pattern.compile(".*Warning: unescaped & which should be written as &amp;"),
        Pattern.compile(".*Warning: using <br> in place of <p>")
    };

    int files;
    int ok;
    int warns;
    int errs;
    int css;
    int overflow;
}