19 package org.sleuthkit.autopsy.coreutils;
21 import java.io.BufferedReader;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.InputStreamReader;
25 import java.nio.charset.StandardCharsets;
26 import java.util.HashMap;
27 import java.util.List;
28 import java.util.stream.Collectors;
29 import java.util.stream.Stream;
30 import org.apache.commons.lang3.StringUtils;
36 class DomainTokenizer {
43 private static class DomainCategory extends HashMap<String, DomainCategory> {
49 this.put(childKey, cat);
57 private static final String JOINER =
".";
59 private static final String DELIMITER =
"\\" + JOINER;
61 private static final String WILDCARD =
"*";
62 private static final String EXCEPTION_PREFIX =
"!";
67 private static final String DOMAIN_LIST =
"public_suffix_list.dat";
70 private static final String COMMENT_TOKEN =
"//";
73 private static DomainTokenizer categorizer = null;
81 static DomainTokenizer getInstance() throws IOException {
82 if (categorizer == null) {
95 private static DomainTokenizer load() throws IOException {
96 try (InputStream is = DomainTokenizer.class.getResourceAsStream(DOMAIN_LIST);
97 InputStreamReader isReader =
new InputStreamReader(is, StandardCharsets.UTF_8);
98 BufferedReader reader =
new BufferedReader(isReader)) {
100 DomainTokenizer categorizer =
new DomainTokenizer();
101 while (reader.ready()) {
102 String line = reader.readLine();
103 String trimmed = line.trim();
104 if (!StringUtils.isBlank(trimmed) && !trimmed.startsWith(COMMENT_TOKEN)) {
105 categorizer.addDomainSuffix(trimmed);
113 private DomainTokenizer() {
117 private final DomainCategory trie =
new DomainCategory();
125 private void addDomainSuffix(String domainSuffix) {
126 if (StringUtils.isBlank(domainSuffix)) {
130 String[] tokens = domainSuffix.toLowerCase().trim().split(DELIMITER);
132 DomainCategory cat = trie;
133 for (
int i = tokens.length - 1; i >= 0; i--) {
134 String token = tokens[i];
135 if (StringUtils.isBlank(token)) {
139 cat = cat.getOrAddChild(tokens[i]);
153 String getDomain(String domain) {
154 if (StringUtils.isBlank(domain)) {
158 List<String> tokens = Stream.of(domain.toLowerCase().split(DELIMITER))
159 .filter(StringUtils::isNotBlank)
160 .collect(Collectors.toList());
162 int idx = tokens.size() - 1;
163 DomainCategory cat = trie;
165 for (; idx >= 0; idx--) {
169 if (cat.get(EXCEPTION_PREFIX + tokens.get(idx)) != null) {
173 DomainCategory newCat = cat.get(tokens.get(idx));
176 if (newCat == null) {
179 newCat = cat.get(WILDCARD);
180 if (newCat == null) {
189 if (idx == tokens.size() - 1) {
192 int minIndex = Math.max(0, idx);
193 List<String> subList = tokens.subList(minIndex, tokens.size());
194 return String.join(JOINER, subList);
DomainCategory getOrAddChild(String childKey)