Autopsy  4.20.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
DefaultPriorityDomainCategorizer.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2021 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.recentactivity;
20 
21 import java.io.IOException;
22 import java.util.Arrays;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.stream.Collectors;
26 import java.util.stream.Stream;
27 import org.apache.commons.lang.StringUtils;
28 import org.openide.util.NbBundle.Messages;
32 
37 @Messages({
38  "DefaultPriorityDomainCategorizer_searchEngineCategory=Search Engine"
39 })
41 
42  // taken from https://www.google.com/supported_domains
43  private static final List<String> GOOGLE_DOMAINS = Arrays.asList("google.com", "google.ad", "google.ae", "google.com.af", "google.com.ag", "google.com.ai", "google.al", "google.am", "google.co.ao", "google.com.ar", "google.as", "google.at", "google.com.au", "google.az", "google.ba", "google.com.bd", "google.be", "google.bf", "google.bg", "google.com.bh", "google.bi", "google.bj", "google.com.bn", "google.com.bo", "google.com.br", "google.bs", "google.bt", "google.co.bw", "google.by", "google.com.bz", "google.ca", "google.cd", "google.cf", "google.cg", "google.ch", "google.ci", "google.co.ck", "google.cl", "google.cm", "google.cn", "google.com.co", "google.co.cr", "google.com.cu", "google.cv", "google.com.cy", "google.cz", "google.de", "google.dj", "google.dk", "google.dm", "google.com.do", "google.dz", "google.com.ec", "google.ee", "google.com.eg", "google.es", "google.com.et", "google.fi", "google.com.fj", "google.fm", "google.fr", "google.ga", "google.ge", "google.gg", "google.com.gh", "google.com.gi", "google.gl", "google.gm", "google.gr", "google.com.gt", "google.gy", "google.com.hk", "google.hn", "google.hr", "google.ht", "google.hu", "google.co.id", "google.ie", "google.co.il", "google.im", "google.co.in", "google.iq", "google.is", "google.it", "google.je", "google.com.jm", "google.jo", "google.co.jp", "google.co.ke", "google.com.kh", "google.ki", "google.kg", "google.co.kr", "google.com.kw", "google.kz", "google.la", "google.com.lb", "google.li", "google.lk", "google.co.ls", "google.lt", "google.lu", "google.lv", "google.com.ly", "google.co.ma", "google.md", "google.me", "google.mg", "google.mk", "google.ml", "google.com.mm", "google.mn", "google.ms", "google.com.mt", "google.mu", "google.mv", "google.mw", "google.com.mx", "google.com.my", "google.co.mz", "google.com.na", "google.com.ng", "google.com.ni", "google.ne", "google.nl", "google.no", "google.com.np", "google.nr", "google.nu", "google.co.nz", "google.com.om", "google.com.pa", "google.com.pe", "google.com.pg", "google.com.ph", "google.com.pk", "google.pl", "google.pn", "google.com.pr", "google.ps", "google.pt", "google.com.py", "google.com.qa", "google.ro", "google.ru", "google.rw", "google.com.sa", "google.com.sb", "google.sc", "google.se", "google.com.sg", "google.sh", "google.si", "google.sk", "google.com.sl", "google.sn", "google.so", "google.sm", "google.sr", "google.st", "google.com.sv", "google.td", "google.tg", "google.co.th", "google.com.tj", "google.tl", "google.tm", "google.tn", "google.to", "google.com.tr", "google.tt", "google.com.tw", "google.co.tz", "google.com.ua", "google.co.ug", "google.co.uk", "google.com.uy", "google.co.uz", "google.com.vc", "google.co.ve", "google.vg", "google.co.vi", "google.com.vn", "google.vu", "google.ws", "google.rs", "google.co.za", "google.co.zm", "google.co.zw", "google.cat");
44 
45  // taken from https://www.yahoo.com/everything/world
46  private static final List<String> YAHOO_DOMAINS = Arrays.asList("espanol.yahoo.com", "au.yahoo.com", "be.yahoo.com", "fr-be.yahoo.com", "br.yahoo.com", "ca.yahoo.com", "espanol.yahoo.com", "espanol.yahoo.com", "de.yahoo.com", "es.yahoo.com", "espanol.yahoo.com", "fr.yahoo.com", "in.yahoo.com", "id.yahoo.com", "ie.yahoo.com", "it.yahoo.com", "en-maktoob.yahoo.com", "malaysia.yahoo.com", "espanol.yahoo.com", "nz.yahoo.com", "espanol.yahoo.com", "ph.yahoo.com", "qc.yahoo.com", "ro.yahoo.com", "sg.yahoo.com", "za.yahoo.com", "se.yahoo.com", "uk.yahoo.com", "yahoo.com", "espanol.yahoo.com", "vn.yahoo.com", "gr.yahoo.com", "maktoob.yahoo.com", "yahoo.com", "hk.yahoo.com", "tw.yahoo.com", "yahoo.co.jp");
47 
48  private static final List<String> OTHER_SEARCH_ENGINES = Arrays.asList(
49  "bing.com",
50  "baidu.com",
51  "sogou.com",
52  "soso.com",
53  "duckduckgo.com",
54  "swisscows.com",
55  "gibiru.com",
56  "cutestat.com",
57  "youdao.com",
58  "biglobe.ne.jp",
59  "givewater.com",
60  "ekoru.org",
61  "ecosia.org",
62  // according to https://en.wikipedia.org/wiki/Yandex
63  "yandex.ru",
64  "yandex.com"
65  );
66 
67  private static final String WWW_PREFIX = "www";
68 
69  private static final Map<String, String> DOMAIN_LOOKUP
70  = Stream.of(GOOGLE_DOMAINS, YAHOO_DOMAINS, OTHER_SEARCH_ENGINES)
71  .flatMap((lst) -> lst.stream())
72  .collect(Collectors.toMap((k) -> k, (k) -> Bundle.DefaultPriorityDomainCategorizer_searchEngineCategory(), (v1, v2) -> v1));
73 
74  @Override
75  public void initialize() throws DomainCategorizerException {
76  }
77 
78  @Override
79  public DomainCategory getCategory(String domain, String host) throws DomainCategorizerException {
80 
81  String hostToUse = StringUtils.isBlank(host) ? domain : host;
82 
83  if (StringUtils.isBlank(hostToUse)) {
84  return null;
85  }
86 
87  List<String> domainWords = Stream.of(hostToUse.toLowerCase().split("\\."))
88  .filter(StringUtils::isNotBlank)
89  .map(String::trim)
90  .collect(Collectors.toList());
91 
92  String sanitizedDomain = domainWords.stream()
93  // skip first word segment if 'www'
94  .skip(domainWords.size() > 0 && WWW_PREFIX.equals(domainWords.get(0)) ? 1 : 0)
95  .collect(Collectors.joining("."));
96 
97  String category = DOMAIN_LOOKUP.get(sanitizedDomain);
98  return category == null ? null : new DomainCategory(sanitizedDomain, category);
99  }
100 
101  @Override
102  public void close() throws IOException {
103  }
104 }

Copyright © 2012-2022 Basis Technology. Generated on: Tue Aug 1 2023
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.