19 package org.sleuthkit.autopsy.url.analytics.domaincategorization;
21 import com.fasterxml.jackson.annotation.JsonCreator;
22 import com.fasterxml.jackson.annotation.JsonGetter;
23 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
24 import com.fasterxml.jackson.annotation.JsonProperty;
25 import com.fasterxml.jackson.core.type.TypeReference;
26 import com.fasterxml.jackson.databind.ObjectMapper;
28 import java.io.IOException;
29 import java.nio.file.Path;
30 import java.nio.file.Paths;
31 import java.sql.Connection;
32 import java.sql.DriverManager;
33 import java.sql.PreparedStatement;
34 import java.sql.ResultSet;
35 import java.sql.SQLException;
36 import java.sql.Statement;
37 import java.util.ArrayList;
38 import java.util.Arrays;
39 import java.util.Collections;
40 import java.util.List;
41 import java.util.logging.Level;
42 import java.util.logging.Logger;
43 import java.util.stream.Collectors;
44 import java.util.stream.IntStream;
45 import java.util.stream.Stream;
46 import org.apache.commons.lang3.StringUtils;
47 import org.apache.commons.lang3.tuple.Pair;
55 class WebCategoriesDataModel
implements AutoCloseable {
60 @JsonIgnoreProperties(ignoreUnknown =
true)
61 static class CustomCategorizationJsonDto {
63 private final String category;
64 private final List<String> domains;
73 CustomCategorizationJsonDto(
74 @JsonProperty(
"category") String category,
75 @JsonProperty(
"domains") List<String> domains) {
76 this.category = category;
77 this.domains = domains == null
78 ? Collections.emptyList()
79 :
new ArrayList<>(domains);
87 @JsonGetter(
"category")
88 String getCategory() {
97 @JsonGetter(
"domains")
98 List<String> getDomains() {
103 private static final int MAX_CAT_SIZE = 300;
104 private static final int MAX_DOMAIN_SIZE = 255;
106 private static final String ROOT_FOLDER =
"DomainCategorization";
107 private static final String FILE_REL_PATH =
"custom_list.db";
108 private static final String JDBC_SQLITE_PREFIX =
"jdbc:sqlite:";
109 private static final String TABLE_NAME =
"domain_suffix";
110 private static final String SUFFIX_COLUMN =
"suffix";
111 private static final String CATEGORY_COLUMN =
"category";
113 private static final Logger logger = Logger.getLogger(WebCategoriesDataModel.class.getName());
114 private static WebCategoriesDataModel instance;
121 static int getMaxDomainSuffixLength() {
122 return MAX_DOMAIN_SIZE;
130 static int getMaxCategoryLength() {
131 return MAX_DOMAIN_SIZE;
139 private static File getDefaultPath() {
140 String configDir = PlatformUtil.getUserConfigDirectory();
141 if (configDir == null || !
new File(configDir).exists()) {
142 logger.log(Level.WARNING,
"Unable to find UserConfigDirectory");
146 Path subDirPath = Paths.get(configDir, ROOT_FOLDER);
147 File subDir = subDirPath.toFile();
148 if (!subDir.exists() && !subDir.mkdirs()) {
149 logger.log(Level.WARNING,
"There was an issue creating custom domain config at: {0}", subDirPath.toString());
152 return Paths.get(configDir, ROOT_FOLDER, FILE_REL_PATH).toFile();
162 static String getNormalizedCategory(String category) {
163 if (category == null) {
167 String trimmedCategory = category.trim();
169 return trimmedCategory.substring(0, Math.min(trimmedCategory.length(), MAX_CAT_SIZE));
179 static String getNormalizedSuffix(String domainSuffix) {
180 if (domainSuffix == null) {
184 String sanitized = Stream.of(domainSuffix.split(
"\\."))
188 .replaceAll(
"[^0-9a-zA-Z\\-]",
"")
190 .replaceAll(
"^\\-*(.+?)?\\-*$",
"$1");
192 .filter(StringUtils::isNotEmpty)
193 .collect(Collectors.joining(
"."));
195 return sanitized.substring(0, Math.min(sanitized.length(), MAX_DOMAIN_SIZE)).toLowerCase();
203 static WebCategoriesDataModel getInstance() {
204 if (instance == null) {
205 instance =
new WebCategoriesDataModel();
211 private final File sqlitePath;
212 private Connection dbConn = null;
217 private WebCategoriesDataModel() {
218 this(getDefaultPath());
227 WebCategoriesDataModel(File sqlitePath) {
228 this.sqlitePath = sqlitePath;
236 synchronized void initialize() throws SQLException {
237 String url = JDBC_SQLITE_PREFIX + sqlitePath.getAbsolutePath();
238 if (this.dbConn != null) {
243 this.dbConn = DriverManager.getConnection(url);
246 try (Statement turnOffWal = dbConn.createStatement()) {
247 turnOffWal.execute(
"PRAGMA journal_mode=OFF");
251 try (Statement createDomainsTable = dbConn.createStatement()) {
252 createDomainsTable.execute(
253 " CREATE TABLE IF NOT EXISTS " + TABLE_NAME +
" (\n"
254 +
" " + SUFFIX_COLUMN +
" VARCHAR(" + MAX_DOMAIN_SIZE +
") PRIMARY KEY,\n"
255 +
" " + CATEGORY_COLUMN +
" VARCHAR(" + MAX_CAT_SIZE +
")\n"
256 +
" ) WITHOUT ROWID");
265 synchronized boolean isInitialized() {
266 return this.dbConn != null;
276 List<DomainCategory> getJsonEntries(File jsonInput)
throws IOException {
277 if (jsonInput == null) {
278 logger.log(Level.WARNING,
"No valid file provided.");
279 return Collections.emptyList();
282 ObjectMapper mapper =
new ObjectMapper();
283 List<CustomCategorizationJsonDto> customCategorizations = mapper.readValue(jsonInput,
new TypeReference<List<CustomCategorizationJsonDto>>() {
286 Stream<CustomCategorizationJsonDto> categoryStream = (customCategorizations != null) ? customCategorizations.stream() : Stream.empty();
288 return categoryStream
289 .filter(c -> c != null && c.getCategory() != null && c.getDomains() != null)
290 .flatMap(c -> c.getDomains().stream()
291 .map(WebCategoriesDataModel::getNormalizedSuffix)
292 .filter(StringUtils::isNotBlank)
293 .map(d ->
new DomainCategory(d, getNormalizedCategory(c.getCategory()))))
294 .collect(Collectors.toList());
305 synchronized void exportToJson(File jsonOutput)
throws SQLException, IOException {
306 if (jsonOutput == null) {
307 logger.log(Level.WARNING,
"Null file provided.");
311 if (!isInitialized()) {
316 List<Pair<String, String>> categoryDomains =
new ArrayList<>();
317 try (Statement domainSelect = dbConn.createStatement();
318 ResultSet resultSet = domainSelect.executeQuery(
319 "SELECT " + SUFFIX_COLUMN +
", " + CATEGORY_COLUMN +
" FROM " + TABLE_NAME +
" ORDER BY " + SUFFIX_COLUMN)) {
321 while (resultSet.next()) {
322 categoryDomains.add(Pair.of(resultSet.getString(CATEGORY_COLUMN), resultSet.getString(SUFFIX_COLUMN)));
327 List<CustomCategorizationJsonDto> categories
328 = categoryDomains.stream()
329 .collect(Collectors.toMap(
331 p ->
new ArrayList<>(Arrays.asList(p.getValue())),
338 .map(entry ->
new CustomCategorizationJsonDto(entry.getKey(), entry.getValue()))
339 .collect(Collectors.toList());
342 ObjectMapper mapper =
new ObjectMapper();
343 mapper.writerWithDefaultPrettyPrinter().writeValue(jsonOutput, categories);
354 synchronized boolean deleteRecord(String domainSuffix)
throws SQLException, IllegalArgumentException {
355 if (StringUtils.isBlank(domainSuffix)) {
356 throw new IllegalArgumentException(
"Expected non-empty domain suffix");
359 if (!isInitialized()) {
363 try (PreparedStatement suffixDelete = dbConn.prepareStatement(
364 "DELETE FROM " + TABLE_NAME +
" WHERE LOWER(" + SUFFIX_COLUMN +
") = LOWER(?)", Statement.RETURN_GENERATED_KEYS);) {
366 suffixDelete.setString(1, getNormalizedSuffix(domainSuffix));
367 return suffixDelete.executeUpdate() > 0;
380 synchronized boolean insertUpdateSuffix(DomainCategory entry)
throws SQLException, IllegalStateException, IllegalArgumentException {
381 if (entry == null || StringUtils.isBlank(getNormalizedCategory(entry.getCategory())) || StringUtils.isBlank(getNormalizedSuffix(entry.getHostSuffix()))) {
382 throw new IllegalArgumentException(
"Expected non-empty, valid category and domain suffix.");
385 if (!isInitialized()) {
389 try (PreparedStatement insertUpdate = dbConn.prepareStatement(
390 "INSERT OR REPLACE INTO " + TABLE_NAME +
"(" + SUFFIX_COLUMN +
", " + CATEGORY_COLUMN +
") VALUES (?, ?)",
391 Statement.RETURN_GENERATED_KEYS)) {
393 insertUpdate.setString(1, getNormalizedSuffix(entry.getHostSuffix()));
394 insertUpdate.setString(2, getNormalizedCategory(entry.getCategory()));
395 return insertUpdate.executeUpdate() > 0;
405 synchronized List<DomainCategory> getRecords() throws SQLException {
406 if (!isInitialized()) {
410 List<DomainCategory> entries =
new ArrayList<>();
412 try (Statement domainSelect = dbConn.createStatement();
413 ResultSet resultSet = domainSelect.executeQuery(
414 "SELECT " + SUFFIX_COLUMN +
", " + CATEGORY_COLUMN +
" FROM " + TABLE_NAME +
" ORDER BY " + SUFFIX_COLUMN)) {
416 while (resultSet.next()) {
417 entries.add(
new DomainCategory(
418 resultSet.getString(SUFFIX_COLUMN),
419 resultSet.getString(CATEGORY_COLUMN)));
426 private static final String GET_DOMAIN_SUFFIX_QUERY
427 =
"SELECT " + SUFFIX_COLUMN +
", " + CATEGORY_COLUMN
428 +
" FROM " + TABLE_NAME +
" WHERE " + SUFFIX_COLUMN +
" = ?";
437 synchronized DomainCategory getRecordBySuffix(String domainSuffix)
throws SQLException {
438 if (!isInitialized()) {
442 try (PreparedStatement domainSelect = dbConn.prepareStatement(GET_DOMAIN_SUFFIX_QUERY)) {
443 domainSelect.setString(1, domainSuffix);
445 try (ResultSet resultSet = domainSelect.executeQuery()) {
446 if (resultSet.next()) {
447 return new DomainCategory(
448 resultSet.getString(SUFFIX_COLUMN),
449 resultSet.getString(CATEGORY_COLUMN));
458 private static final String BASE_QUERY_FMT_STR
459 =
"SELECT " + SUFFIX_COLUMN +
", " + CATEGORY_COLUMN +
" FROM " + TABLE_NAME
460 +
" WHERE suffix IN (%s) ORDER BY LENGTH(" + SUFFIX_COLUMN +
") DESC LIMIT 1";
470 synchronized DomainCategory getLongestSuffixRecord(List<String> suffixes)
throws SQLException {
471 if (suffixes == null) {
475 if (!isInitialized()) {
479 String questionMarks = IntStream.range(0, suffixes.size())
480 .mapToObj((num) ->
"?")
481 .collect(Collectors.joining(
","));
483 try (PreparedStatement stmt = dbConn.prepareStatement(String.format(BASE_QUERY_FMT_STR, questionMarks))) {
484 for (
int i = 0; i < suffixes.size(); i++) {
485 stmt.setString(i + 1, suffixes.get(i));
488 try (ResultSet resultSet = stmt.executeQuery()) {
489 if (resultSet.next()) {
490 String suffix = resultSet.getString(SUFFIX_COLUMN);
491 String category = resultSet.getString(CATEGORY_COLUMN);
492 return new DomainCategory(suffix, category);
508 DomainCategory getMatchingRecord(String host)
throws SQLException {
509 return getLongestSuffixRecord(getSuffixes(host));
520 private List<String> getSuffixes(String host) {
525 List<String> hostTokens = Arrays.asList(host.split(
"\\."));
526 List<String> hostSegmentations =
new ArrayList<>();
528 for (
int i = 0; i < hostTokens.size(); i++) {
529 String searchString = String.join(
".", hostTokens.subList(i, hostTokens.size()));
530 hostSegmentations.add(searchString);
533 return hostSegmentations;
537 public synchronized void close() throws SQLException {
538 if (dbConn != null) {