make/jdk/src/classes/build/tools/publicsuffixlist/GeneratePublicSuffixList.java
author weijun
Tue, 26 Jun 2018 18:55:48 +0800
changeset 50788 6274aee1f692
permissions -rw-r--r--
8201815: Use Mozilla Public Suffix List Reviewed-by: michaelm, erikj, ihse
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
50788
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
     1
/*
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
     2
 * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved.
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
     4
 *
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
     7
 * published by the Free Software Foundation.  Oracle designates this
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
     8
 * particular file as subject to the "Classpath" exception as provided
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
     9
 * by Oracle in the LICENSE file that accompanied this code.
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    10
 *
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    11
 * This code is distributed in the hope that it will be useful, but WITHOUT
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    12
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    13
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    14
 * version 2 for more details (a copy is included in the LICENSE file that
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    15
 * accompanied this code).
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    16
 *
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    17
 * You should have received a copy of the GNU General Public License version
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    18
 * 2 along with this work; if not, write to the Free Software Foundation,
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    19
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    20
 *
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    21
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    22
 * or visit www.oracle.com if you need additional information or have any
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    23
 * questions.
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    24
 */
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    25
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    26
package build.tools.publicsuffixlist;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    27
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    28
import java.io.BufferedReader;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    29
import java.io.BufferedWriter;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    30
import java.io.FileInputStream;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    31
import java.io.FileOutputStream;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    32
import java.io.InputStreamReader;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    33
import java.io.IOException;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    34
import java.io.OutputStreamWriter;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    35
import java.nio.file.attribute.FileTime;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    36
import java.util.HashMap;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    37
import java.util.LinkedList;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    38
import java.util.List;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    39
import java.util.Map;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    40
import java.util.Set;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    41
import java.util.regex.Pattern;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    42
import java.util.stream.Collectors;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    43
import java.util.zip.ZipEntry;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    44
import java.util.zip.ZipOutputStream;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    45
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    46
/**
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    47
 * This tool takes the original Mozilla public suffix rule list as input
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    48
 * and slices it into a set of files, one for each top-level domain.
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    49
 * Each file contains only the rules for that domain. Lines containing comments
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    50
 * or only whitespace are not copied. Each of these files are then combined
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    51
 * into the target zipfile.
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    52
 *
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    53
 * Usage: java GeneratePublicSuffixList mozilla_file destination_zipfile
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    54
 */
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    55
public final class GeneratePublicSuffixList {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    56
    // patterns
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    57
    private static final String COMMENT = "//";
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    58
    private static final String BEGIN_PRIVATE = "// ===BEGIN PRIVATE DOMAINS===";
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    59
    private static final Pattern WHITESPACE = Pattern.compile("\\s*");
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    60
    private static final byte ICANN = 0x00;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    61
    private static final byte PRIVATE = 0x01;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    62
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    63
    private static class Domain {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    64
        final String name;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    65
        final byte type;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    66
        Domain(String name, byte type) {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    67
            this.name = name;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    68
            this.type = type;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    69
        }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    70
    }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    71
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    72
    public static void main(String[] args) throws Exception {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    73
        if (args.length != 2) {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    74
            throw new Exception("2 args required: input_file output_file");
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    75
        }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    76
        try (FileInputStream fis = new FileInputStream(args[0]);
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    77
             ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(args[1])))
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    78
        {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    79
            BufferedReader br =
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    80
                new BufferedReader(new InputStreamReader(fis, "UTF-8"));
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    81
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    82
            List<Domain> domains = new LinkedList<>();
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    83
            byte type = ICANN;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    84
            String line;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    85
            while ((line = br.readLine()) != null) {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    86
                if (line.startsWith(COMMENT)) {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    87
                    if (line.startsWith(BEGIN_PRIVATE)) {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    88
                        type = PRIVATE;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    89
                    }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    90
                    continue;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    91
                }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    92
                if (WHITESPACE.matcher(line).matches()) {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    93
                    continue;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    94
                }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    95
                domains.add(new Domain(line, type));
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    96
            }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    97
            // have a list of rules now
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    98
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
    99
            // Map of TLD names to rules with the same TLD
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   100
            Map<String, List<Domain>> rules = addDomains(domains);
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   101
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   102
            // stream for writing the file contents
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   103
            BufferedWriter bw =
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   104
                new BufferedWriter(new OutputStreamWriter(zos, "UTF-8"));
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   105
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   106
            // now output each map entry to its own file,
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   107
            // whose filename is the TLD
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   108
            writeRules(zos, bw, rules);
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   109
        }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   110
    }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   111
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   112
    private static Map<String, List<Domain>> addDomains(List<Domain> domains) {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   113
        Map<String, List<Domain>> rules = new HashMap<>();
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   114
        for (Domain domain : domains) {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   115
            String tld = getTLD(domain.name);
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   116
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   117
            rules.compute(tld, (k, v) -> {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   118
                if (v == null) {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   119
                    List<Domain> newV = new LinkedList<>();
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   120
                    newV.add(domain);
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   121
                    return newV;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   122
                } else {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   123
                    v.add(domain);
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   124
                    return v;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   125
                }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   126
            });
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   127
        }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   128
        return rules;
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   129
    }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   130
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   131
    private static void writeRules(ZipOutputStream zos, BufferedWriter bw,
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   132
                                   Map<String, List<Domain>> rules)
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   133
                                   throws IOException {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   134
        // Sort keys for deterministic output
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   135
        List<String> tlds = rules.keySet().stream().sorted().collect(Collectors.toList());
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   136
        for (String tld : tlds) {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   137
            List<Domain> entries = rules.get(tld);
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   138
            ZipEntry ze = new ZipEntry(tld);
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   139
            ze.setLastModifiedTime(FileTime.fromMillis(0));
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   140
            zos.putNextEntry(ze);
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   141
            for (Domain entry : entries) {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   142
                bw.write(entry.type);
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   143
                bw.write(entry.name, 0, entry.name.length());
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   144
                bw.newLine();
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   145
            }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   146
            bw.flush();
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   147
        }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   148
    }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   149
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   150
    private static String getTLD(String line) {
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   151
        int dotIndex = line.lastIndexOf('.');
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   152
        return (dotIndex == -1) ? line : line.substring(dotIndex + 1);
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   153
    }
6274aee1f692 8201815: Use Mozilla Public Suffix List
weijun
parents:
diff changeset
   154
}