50788
|
1 |
/*
|
|
2 |
* Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 |
*
|
|
5 |
* This code is free software; you can redistribute it and/or modify it
|
|
6 |
* under the terms of the GNU General Public License version 2 only, as
|
|
7 |
* published by the Free Software Foundation. Oracle designates this
|
|
8 |
* particular file as subject to the "Classpath" exception as provided
|
|
9 |
* by Oracle in the LICENSE file that accompanied this code.
|
|
10 |
*
|
|
11 |
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
12 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 |
* version 2 for more details (a copy is included in the LICENSE file that
|
|
15 |
* accompanied this code).
|
|
16 |
*
|
|
17 |
* You should have received a copy of the GNU General Public License version
|
|
18 |
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
19 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
20 |
*
|
|
21 |
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
22 |
* or visit www.oracle.com if you need additional information or have any
|
|
23 |
* questions.
|
|
24 |
*/
|
|
25 |
|
|
26 |
package build.tools.publicsuffixlist;
|
|
27 |
|
|
28 |
import java.io.BufferedReader;
|
|
29 |
import java.io.BufferedWriter;
|
|
30 |
import java.io.FileInputStream;
|
|
31 |
import java.io.FileOutputStream;
|
|
32 |
import java.io.InputStreamReader;
|
|
33 |
import java.io.IOException;
|
|
34 |
import java.io.OutputStreamWriter;
|
|
35 |
import java.nio.file.attribute.FileTime;
|
|
36 |
import java.util.HashMap;
|
|
37 |
import java.util.LinkedList;
|
|
38 |
import java.util.List;
|
|
39 |
import java.util.Map;
|
|
40 |
import java.util.Set;
|
|
41 |
import java.util.regex.Pattern;
|
|
42 |
import java.util.stream.Collectors;
|
|
43 |
import java.util.zip.ZipEntry;
|
|
44 |
import java.util.zip.ZipOutputStream;
|
|
45 |
|
|
46 |
/**
|
|
47 |
* This tool takes the original Mozilla public suffix rule list as input
|
|
48 |
* and slices it into a set of files, one for each top-level domain.
|
|
49 |
* Each file contains only the rules for that domain. Lines containing comments
|
|
50 |
* or only whitespace are not copied. Each of these files are then combined
|
|
51 |
* into the target zipfile.
|
|
52 |
*
|
|
53 |
* Usage: java GeneratePublicSuffixList mozilla_file destination_zipfile
|
|
54 |
*/
|
|
55 |
public final class GeneratePublicSuffixList {
|
|
56 |
// patterns
|
|
57 |
private static final String COMMENT = "//";
|
|
58 |
private static final String BEGIN_PRIVATE = "// ===BEGIN PRIVATE DOMAINS===";
|
|
59 |
private static final Pattern WHITESPACE = Pattern.compile("\\s*");
|
|
60 |
private static final byte ICANN = 0x00;
|
|
61 |
private static final byte PRIVATE = 0x01;
|
|
62 |
|
|
63 |
private static class Domain {
|
|
64 |
final String name;
|
|
65 |
final byte type;
|
|
66 |
Domain(String name, byte type) {
|
|
67 |
this.name = name;
|
|
68 |
this.type = type;
|
|
69 |
}
|
|
70 |
}
|
|
71 |
|
|
72 |
public static void main(String[] args) throws Exception {
|
|
73 |
if (args.length != 2) {
|
|
74 |
throw new Exception("2 args required: input_file output_file");
|
|
75 |
}
|
|
76 |
try (FileInputStream fis = new FileInputStream(args[0]);
|
|
77 |
ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(args[1])))
|
|
78 |
{
|
|
79 |
BufferedReader br =
|
|
80 |
new BufferedReader(new InputStreamReader(fis, "UTF-8"));
|
|
81 |
|
|
82 |
List<Domain> domains = new LinkedList<>();
|
|
83 |
byte type = ICANN;
|
|
84 |
String line;
|
|
85 |
while ((line = br.readLine()) != null) {
|
|
86 |
if (line.startsWith(COMMENT)) {
|
|
87 |
if (line.startsWith(BEGIN_PRIVATE)) {
|
|
88 |
type = PRIVATE;
|
|
89 |
}
|
|
90 |
continue;
|
|
91 |
}
|
|
92 |
if (WHITESPACE.matcher(line).matches()) {
|
|
93 |
continue;
|
|
94 |
}
|
|
95 |
domains.add(new Domain(line, type));
|
|
96 |
}
|
|
97 |
// have a list of rules now
|
|
98 |
|
|
99 |
// Map of TLD names to rules with the same TLD
|
|
100 |
Map<String, List<Domain>> rules = addDomains(domains);
|
|
101 |
|
|
102 |
// stream for writing the file contents
|
|
103 |
BufferedWriter bw =
|
|
104 |
new BufferedWriter(new OutputStreamWriter(zos, "UTF-8"));
|
|
105 |
|
|
106 |
// now output each map entry to its own file,
|
|
107 |
// whose filename is the TLD
|
|
108 |
writeRules(zos, bw, rules);
|
|
109 |
}
|
|
110 |
}
|
|
111 |
|
|
112 |
private static Map<String, List<Domain>> addDomains(List<Domain> domains) {
|
|
113 |
Map<String, List<Domain>> rules = new HashMap<>();
|
|
114 |
for (Domain domain : domains) {
|
|
115 |
String tld = getTLD(domain.name);
|
|
116 |
|
|
117 |
rules.compute(tld, (k, v) -> {
|
|
118 |
if (v == null) {
|
|
119 |
List<Domain> newV = new LinkedList<>();
|
|
120 |
newV.add(domain);
|
|
121 |
return newV;
|
|
122 |
} else {
|
|
123 |
v.add(domain);
|
|
124 |
return v;
|
|
125 |
}
|
|
126 |
});
|
|
127 |
}
|
|
128 |
return rules;
|
|
129 |
}
|
|
130 |
|
|
131 |
private static void writeRules(ZipOutputStream zos, BufferedWriter bw,
|
|
132 |
Map<String, List<Domain>> rules)
|
|
133 |
throws IOException {
|
|
134 |
// Sort keys for deterministic output
|
|
135 |
List<String> tlds = rules.keySet().stream().sorted().collect(Collectors.toList());
|
|
136 |
for (String tld : tlds) {
|
|
137 |
List<Domain> entries = rules.get(tld);
|
|
138 |
ZipEntry ze = new ZipEntry(tld);
|
|
139 |
ze.setLastModifiedTime(FileTime.fromMillis(0));
|
|
140 |
zos.putNextEntry(ze);
|
|
141 |
for (Domain entry : entries) {
|
|
142 |
bw.write(entry.type);
|
|
143 |
bw.write(entry.name, 0, entry.name.length());
|
|
144 |
bw.newLine();
|
|
145 |
}
|
|
146 |
bw.flush();
|
|
147 |
}
|
|
148 |
}
|
|
149 |
|
|
150 |
private static String getTLD(String line) {
|
|
151 |
int dotIndex = line.lastIndexOf('.');
|
|
152 |
return (dotIndex == -1) ? line : line.substring(dotIndex + 1);
|
|
153 |
}
|
|
154 |
}
|