Autopsy  4.20.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
WebCategoriesDataModel.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2021 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.url.analytics.domaincategorization;
20 
21 import com.fasterxml.jackson.annotation.JsonCreator;
22 import com.fasterxml.jackson.annotation.JsonGetter;
23 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
24 import com.fasterxml.jackson.annotation.JsonProperty;
25 import com.fasterxml.jackson.core.type.TypeReference;
26 import com.fasterxml.jackson.databind.ObjectMapper;
27 import java.io.File;
28 import java.io.IOException;
29 import java.nio.file.Path;
30 import java.nio.file.Paths;
31 import java.sql.Connection;
32 import java.sql.DriverManager;
33 import java.sql.PreparedStatement;
34 import java.sql.ResultSet;
35 import java.sql.SQLException;
36 import java.sql.Statement;
37 import java.util.ArrayList;
38 import java.util.Arrays;
39 import java.util.Collections;
40 import java.util.List;
41 import java.util.logging.Level;
42 import java.util.logging.Logger;
43 import java.util.stream.Collectors;
44 import java.util.stream.IntStream;
45 import java.util.stream.Stream;
46 import org.apache.commons.lang3.StringUtils;
47 import org.apache.commons.lang3.tuple.Pair;
50 
55 class WebCategoriesDataModel implements AutoCloseable {
56 
60  @JsonIgnoreProperties(ignoreUnknown = true)
61  static class CustomCategorizationJsonDto {
62 
63  private final String category;
64  private final List<String> domains;
65 
72  @JsonCreator
73  CustomCategorizationJsonDto(
74  @JsonProperty("category") String category,
75  @JsonProperty("domains") List<String> domains) {
76  this.category = category;
77  this.domains = domains == null
78  ? Collections.emptyList()
79  : new ArrayList<>(domains);
80  }
81 
87  @JsonGetter("category")
88  String getCategory() {
89  return category;
90  }
91 
97  @JsonGetter("domains")
98  List<String> getDomains() {
99  return domains;
100  }
101  }
102 
103  private static final int MAX_CAT_SIZE = 300;
104  private static final int MAX_DOMAIN_SIZE = 255;
105 
106  private static final String ROOT_FOLDER = "DomainCategorization";
107  private static final String FILE_REL_PATH = "custom_list.db";
108  private static final String JDBC_SQLITE_PREFIX = "jdbc:sqlite:";
109  private static final String TABLE_NAME = "domain_suffix";
110  private static final String SUFFIX_COLUMN = "suffix";
111  private static final String CATEGORY_COLUMN = "category";
112 
113  private static final Logger logger = Logger.getLogger(WebCategoriesDataModel.class.getName());
114  private static WebCategoriesDataModel instance;
115 
121  static int getMaxDomainSuffixLength() {
122  return MAX_DOMAIN_SIZE;
123  }
124 
130  static int getMaxCategoryLength() {
131  return MAX_DOMAIN_SIZE;
132  }
133 
139  private static File getDefaultPath() {
140  String configDir = PlatformUtil.getUserConfigDirectory();
141  if (configDir == null || !new File(configDir).exists()) {
142  logger.log(Level.WARNING, "Unable to find UserConfigDirectory");
143  return null;
144  }
145 
146  Path subDirPath = Paths.get(configDir, ROOT_FOLDER);
147  File subDir = subDirPath.toFile();
148  if (!subDir.exists() && !subDir.mkdirs()) {
149  logger.log(Level.WARNING, "There was an issue creating custom domain config at: {0}", subDirPath.toString());
150  }
151 
152  return Paths.get(configDir, ROOT_FOLDER, FILE_REL_PATH).toFile();
153  }
154 
162  static String getNormalizedCategory(String category) {
163  if (category == null) {
164  return "";
165  }
166 
167  String trimmedCategory = category.trim();
168 
169  return trimmedCategory.substring(0, Math.min(trimmedCategory.length(), MAX_CAT_SIZE));
170  }
171 
179  static String getNormalizedSuffix(String domainSuffix) {
180  if (domainSuffix == null) {
181  return "";
182  }
183 
184  String sanitized = Stream.of(domainSuffix.split("\\."))
185  .map(s -> {
186  return s
187  // alphanumeric and hyphen
188  .replaceAll("[^0-9a-zA-Z\\-]", "")
189  // no leading or trailing hyphen
190  .replaceAll("^\\-*(.+?)?\\-*$", "$1");
191  })
192  .filter(StringUtils::isNotEmpty)
193  .collect(Collectors.joining("."));
194 
195  return sanitized.substring(0, Math.min(sanitized.length(), MAX_DOMAIN_SIZE)).toLowerCase();
196  }
197 
203  static WebCategoriesDataModel getInstance() {
204  if (instance == null) {
205  instance = new WebCategoriesDataModel();
206  }
207 
208  return instance;
209  }
210 
211  private final File sqlitePath;
212  private Connection dbConn = null;
213 
217  private WebCategoriesDataModel() {
218  this(getDefaultPath());
219  }
220 
227  WebCategoriesDataModel(File sqlitePath) {
228  this.sqlitePath = sqlitePath;
229  }
230 
236  synchronized void initialize() throws SQLException {
237  String url = JDBC_SQLITE_PREFIX + sqlitePath.getAbsolutePath();
238  if (this.dbConn != null) {
239  this.dbConn.close();
240  this.dbConn = null;
241  }
242 
243  this.dbConn = DriverManager.getConnection(url);
244 
245  // speed up operations by turning off WAL
246  try (Statement turnOffWal = dbConn.createStatement()) {
247  turnOffWal.execute("PRAGMA journal_mode=OFF");
248  }
249 
250  // create table if it doesn't exist
251  try (Statement createDomainsTable = dbConn.createStatement()) {
252  createDomainsTable.execute(
253  " CREATE TABLE IF NOT EXISTS " + TABLE_NAME + " (\n"
254  + " " + SUFFIX_COLUMN + " VARCHAR(" + MAX_DOMAIN_SIZE + ") PRIMARY KEY,\n"
255  + " " + CATEGORY_COLUMN + " VARCHAR(" + MAX_CAT_SIZE + ")\n"
256  + " ) WITHOUT ROWID");
257  }
258  }
259 
265  synchronized boolean isInitialized() {
266  return this.dbConn != null;
267  }
268 
276  List<DomainCategory> getJsonEntries(File jsonInput) throws IOException {
277  if (jsonInput == null) {
278  logger.log(Level.WARNING, "No valid file provided.");
279  return Collections.emptyList();
280  }
281 
282  ObjectMapper mapper = new ObjectMapper();
283  List<CustomCategorizationJsonDto> customCategorizations = mapper.readValue(jsonInput, new TypeReference<List<CustomCategorizationJsonDto>>() {
284  });
285 
286  Stream<CustomCategorizationJsonDto> categoryStream = (customCategorizations != null) ? customCategorizations.stream() : Stream.empty();
287 
288  return categoryStream
289  .filter(c -> c != null && c.getCategory() != null && c.getDomains() != null)
290  .flatMap(c -> c.getDomains().stream()
291  .map(WebCategoriesDataModel::getNormalizedSuffix)
292  .filter(StringUtils::isNotBlank)
293  .map(d -> new DomainCategory(d, getNormalizedCategory(c.getCategory()))))
294  .collect(Collectors.toList());
295 
296  }
297 
305  synchronized void exportToJson(File jsonOutput) throws SQLException, IOException {
306  if (jsonOutput == null) {
307  logger.log(Level.WARNING, "Null file provided.");
308  return;
309  }
310 
311  if (!isInitialized()) {
312  initialize();
313  }
314 
315  // retrieve items from the database
316  List<Pair<String, String>> categoryDomains = new ArrayList<>();
317  try (Statement domainSelect = dbConn.createStatement();
318  ResultSet resultSet = domainSelect.executeQuery(
319  "SELECT " + SUFFIX_COLUMN + ", " + CATEGORY_COLUMN + " FROM " + TABLE_NAME + " ORDER BY " + SUFFIX_COLUMN)) {
320 
321  while (resultSet.next()) {
322  categoryDomains.add(Pair.of(resultSet.getString(CATEGORY_COLUMN), resultSet.getString(SUFFIX_COLUMN)));
323  }
324  }
325 
326  // aggregate data appropriately into CustomCategorizationJsonDto
327  List<CustomCategorizationJsonDto> categories
328  = categoryDomains.stream()
329  .collect(Collectors.toMap(
330  p -> p.getKey(),
331  p -> new ArrayList<>(Arrays.asList(p.getValue())),
332  (p1, p2) -> {
333  p1.addAll(p2);
334  return p1;
335  }
336  ))
337  .entrySet().stream()
338  .map(entry -> new CustomCategorizationJsonDto(entry.getKey(), entry.getValue()))
339  .collect(Collectors.toList());
340 
341  // write to disk
342  ObjectMapper mapper = new ObjectMapper();
343  mapper.writerWithDefaultPrettyPrinter().writeValue(jsonOutput, categories);
344  }
345 
354  synchronized boolean deleteRecord(String domainSuffix) throws SQLException, IllegalArgumentException {
355  if (StringUtils.isBlank(domainSuffix)) {
356  throw new IllegalArgumentException("Expected non-empty domain suffix");
357  }
358 
359  if (!isInitialized()) {
360  initialize();
361  }
362 
363  try (PreparedStatement suffixDelete = dbConn.prepareStatement(
364  "DELETE FROM " + TABLE_NAME + " WHERE LOWER(" + SUFFIX_COLUMN + ") = LOWER(?)", Statement.RETURN_GENERATED_KEYS);) {
365 
366  suffixDelete.setString(1, getNormalizedSuffix(domainSuffix));
367  return suffixDelete.executeUpdate() > 0;
368  }
369  }
370 
380  synchronized boolean insertUpdateSuffix(DomainCategory entry) throws SQLException, IllegalStateException, IllegalArgumentException {
381  if (entry == null || StringUtils.isBlank(getNormalizedCategory(entry.getCategory())) || StringUtils.isBlank(getNormalizedSuffix(entry.getHostSuffix()))) {
382  throw new IllegalArgumentException("Expected non-empty, valid category and domain suffix.");
383  }
384 
385  if (!isInitialized()) {
386  initialize();
387  }
388 
389  try (PreparedStatement insertUpdate = dbConn.prepareStatement(
390  "INSERT OR REPLACE INTO " + TABLE_NAME + "(" + SUFFIX_COLUMN + ", " + CATEGORY_COLUMN + ") VALUES (?, ?)",
391  Statement.RETURN_GENERATED_KEYS)) {
392 
393  insertUpdate.setString(1, getNormalizedSuffix(entry.getHostSuffix()));
394  insertUpdate.setString(2, getNormalizedCategory(entry.getCategory()));
395  return insertUpdate.executeUpdate() > 0;
396  }
397  }
398 
405  synchronized List<DomainCategory> getRecords() throws SQLException {
406  if (!isInitialized()) {
407  initialize();
408  }
409 
410  List<DomainCategory> entries = new ArrayList<>();
411 
412  try (Statement domainSelect = dbConn.createStatement();
413  ResultSet resultSet = domainSelect.executeQuery(
414  "SELECT " + SUFFIX_COLUMN + ", " + CATEGORY_COLUMN + " FROM " + TABLE_NAME + " ORDER BY " + SUFFIX_COLUMN)) {
415 
416  while (resultSet.next()) {
417  entries.add(new DomainCategory(
418  resultSet.getString(SUFFIX_COLUMN),
419  resultSet.getString(CATEGORY_COLUMN)));
420  }
421  }
422  return entries;
423 
424  }
425 
426  private static final String GET_DOMAIN_SUFFIX_QUERY
427  = "SELECT " + SUFFIX_COLUMN + ", " + CATEGORY_COLUMN
428  + " FROM " + TABLE_NAME + " WHERE " + SUFFIX_COLUMN + " = ?";
429 
437  synchronized DomainCategory getRecordBySuffix(String domainSuffix) throws SQLException {
438  if (!isInitialized()) {
439  initialize();
440  }
441 
442  try (PreparedStatement domainSelect = dbConn.prepareStatement(GET_DOMAIN_SUFFIX_QUERY)) {
443  domainSelect.setString(1, domainSuffix);
444 
445  try (ResultSet resultSet = domainSelect.executeQuery()) {
446  if (resultSet.next()) {
447  return new DomainCategory(
448  resultSet.getString(SUFFIX_COLUMN),
449  resultSet.getString(CATEGORY_COLUMN));
450  } else {
451  return null;
452  }
453  }
454  }
455  }
456 
457  // get the suffix and category from the main table and gets the longest matching suffix.
458  private static final String BASE_QUERY_FMT_STR
459  = "SELECT " + SUFFIX_COLUMN + ", " + CATEGORY_COLUMN + " FROM " + TABLE_NAME
460  + " WHERE suffix IN (%s) ORDER BY LENGTH(" + SUFFIX_COLUMN + ") DESC LIMIT 1";
461 
470  synchronized DomainCategory getLongestSuffixRecord(List<String> suffixes) throws SQLException {
471  if (suffixes == null) {
472  return null;
473  }
474 
475  if (!isInitialized()) {
476  initialize();
477  }
478 
479  String questionMarks = IntStream.range(0, suffixes.size())
480  .mapToObj((num) -> "?")
481  .collect(Collectors.joining(","));
482 
483  try (PreparedStatement stmt = dbConn.prepareStatement(String.format(BASE_QUERY_FMT_STR, questionMarks))) {
484  for (int i = 0; i < suffixes.size(); i++) {
485  stmt.setString(i + 1, suffixes.get(i));
486  }
487 
488  try (ResultSet resultSet = stmt.executeQuery()) {
489  if (resultSet.next()) {
490  String suffix = resultSet.getString(SUFFIX_COLUMN);
491  String category = resultSet.getString(CATEGORY_COLUMN);
492  return new DomainCategory(suffix, category);
493  }
494  }
495  }
496 
497  return null;
498  }
499 
508  DomainCategory getMatchingRecord(String host) throws SQLException {
509  return getLongestSuffixRecord(getSuffixes(host));
510  }
511 
520  private List<String> getSuffixes(String host) {
521  if (host == null) {
522  return null;
523  }
524 
525  List<String> hostTokens = Arrays.asList(host.split("\\."));
526  List<String> hostSegmentations = new ArrayList<>();
527 
528  for (int i = 0; i < hostTokens.size(); i++) {
529  String searchString = String.join(".", hostTokens.subList(i, hostTokens.size()));
530  hostSegmentations.add(searchString);
531  }
532 
533  return hostSegmentations;
534  }
535 
536  @Override
537  public synchronized void close() throws SQLException {
538  if (dbConn != null) {
539  dbConn.close();
540  dbConn = null;
541  }
542  }
543 }

Copyright © 2012-2022 Basis Technology. Generated on: Tue Aug 1 2023
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.