19 package org.sleuthkit.autopsy.recentactivity;
21 import java.net.MalformedURLException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.Collection;
26 import java.util.Collections;
27 import java.util.Comparator;
28 import java.util.HashSet;
29 import java.util.List;
31 import java.util.logging.Level;
33 import java.util.regex.Matcher;
34 import java.util.regex.Pattern;
35 import java.util.stream.Collectors;
36 import java.util.stream.Stream;
37 import org.apache.commons.lang.StringUtils;
38 import org.openide.util.Lookup;
39 import org.openide.util.NbBundle.Messages;
47 import org.
sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
49 import org.
sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
62 "DomainCategoryRunner_moduleName_text=DomainCategoryRunner",
63 "DomainCategoryRunner_Progress_Message_Domain_Types=Finding Domain Types",
64 "DomainCategoryRunner_parentModuleName=Recent Activity"
66 class DomainCategoryRunner extends Extract {
71 private static final String URL_REGEX_SCHEME =
"(((?<scheme>[^:\\/?#]+):?)?\\/\\/)";
73 private static final String URL_REGEX_USERINFO =
"((?<userinfo>[^\\/?#@]*)@)";
74 private static final String URL_REGEX_HOST =
"(?<host>[^\\/\\.?#:]*\\.[^\\/?#:]*)";
75 private static final String URL_REGEX_PORT =
"(:(?<port>[0-9]{1,5}))";
76 private static final String URL_REGEX_AUTHORITY = String.format(
"(%s?%s?%s?\\/?)", URL_REGEX_USERINFO, URL_REGEX_HOST, URL_REGEX_PORT);
78 private static final String URL_REGEX_PATH =
"(?<path>([^?#]*)(\\?([^#]*))?(#(.*))?)";
80 private static final String URL_REGEX_STR = String.format(
"^\\s*%s?%s?%s?", URL_REGEX_SCHEME, URL_REGEX_AUTHORITY, URL_REGEX_PATH);
81 private static final Pattern URL_REGEX = Pattern.compile(URL_REGEX_STR);
83 private static int DATETIME_ACCESSED_TYPEID = ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED.getTypeID();
84 private static int URL_TYPEID = ATTRIBUTE_TYPE.TSK_URL.getTypeID();
86 private static final Logger logger = Logger.getLogger(DomainCategoryRunner.class.getName());
89 private static final String CUSTOM_CATEGORIZER_PATH =
"org.sleuthkit.autopsy.url.analytics.domaincategorization.CustomWebCategorizer";
92 private static final List<BlackboardArtifact.Type> DOMAIN_CATEGORIZATION_TYPES = Stream.of(
93 BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_BOOKMARK,
94 BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_CACHE,
95 BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_COOKIE,
96 BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_DOWNLOAD,
97 BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_HISTORY,
98 BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY)
99 .map(BlackboardArtifact.Type::new)
100 .collect(Collectors.toList());
110 private static long getTimeOrZero(Map<Integer, BlackboardAttribute> attrMap,
int attrTypeId) {
111 if (attrMap == null) {
115 BlackboardAttribute attr = attrMap.get(attrTypeId);
116 return attr == null ? 0 : attr.getValueLong();
127 private static String getStringOrEmpty(Map<Integer, BlackboardAttribute> attrMap,
int attrTypeId) {
128 if (attrMap == null) {
132 BlackboardAttribute attr = attrMap.get(attrTypeId);
133 String attrStr = attr == null ?
"" : attr.getValueString();
134 return attrStr == null ?
"" : attrStr;
140 private static final Comparator<BlackboardArtifact> ARTIFACT_COMPARATOR = (a, b) -> {
142 Map<Integer, BlackboardAttribute> attrMapA = null;
143 Map<Integer, BlackboardAttribute> attrMapB = null;
146 attrMapA = a.getAttributes()
148 .collect(Collectors.toMap(attr -> attr.getAttributeType().getTypeID(), attr -> attr, (attr1, attr2) -> attr1));
150 attrMapB = b.getAttributes()
152 .collect(Collectors.toMap(attr -> attr.getAttributeType().getTypeID(), attr -> attr, (attr1, attr2) -> attr1));
154 }
catch (TskCoreException ex) {
155 logger.log(Level.WARNING,
"There was an error fetching attributes for artifacts", ex);
160 int timeCompare = Long.compare(getTimeOrZero(attrMapA, DATETIME_ACCESSED_TYPEID), getTimeOrZero(attrMapB, DATETIME_ACCESSED_TYPEID));
161 if (timeCompare != 0) {
167 int urlCompare = getStringOrEmpty(attrMapA, URL_TYPEID).compareToIgnoreCase(getStringOrEmpty(attrMapB, URL_TYPEID));
168 if (urlCompare != 0) {
173 return Long.compare(a.getId(), b.getId());
176 private Content dataSource;
177 private IngestJobContext context;
178 private List<DomainCategorizer> domainProviders = Collections.emptyList();
183 DomainCategoryRunner() {
194 private String getHost(String urlString) {
198 URL url =
new URL(urlString);
200 host = url.getHost();
202 }
catch (MalformedURLException ignore) {
207 if (StringUtils.isBlank(host)) {
208 Matcher m = URL_REGEX.matcher(urlString);
210 host = m.group(
"host");
224 private DomainCategory findCategory(String domain, String host) {
225 List<DomainCategorizer> safeProviders = domainProviders == null ? Collections.emptyList() : domainProviders;
226 for (DomainCategorizer provider : safeProviders) {
227 DomainCategory result;
229 result = provider.getCategory(domain, host);
230 if (result != null) {
233 }
catch (DomainCategorizerException ex) {
234 logger.log(Level.WARNING,
"There was an error processing results with " + provider.getClass().getCanonicalName(), ex);
258 ArtifactHost(AbstractFile abstractFile, String host, String domain) {
259 this.abstractFile = abstractFile;
261 this.domain = domain;
267 AbstractFile getAbstractFile() {
295 private ArtifactHost getDomainAndHost(BlackboardArtifact artifact)
throws TskCoreException {
297 AbstractFile file = tskCase.getAbstractFileById(artifact.getObjectID());
303 BlackboardAttribute urlAttr = artifact.getAttribute(
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL));
304 String urlString = null;
306 if (urlAttr != null) {
307 urlString = urlAttr.getValueString();
308 if (StringUtils.isNotBlank(urlString)) {
309 host = getHost(urlString);
314 BlackboardAttribute domainAttr = artifact.getAttribute(
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN));
315 String domainString = null;
316 if (domainAttr != null) {
317 domainString = domainAttr.getValueString();
320 boolean hasDomain = StringUtils.isNotBlank(domainString);
321 boolean hasHost = StringUtils.isNotBlank(host);
324 if (!hasDomain && !hasHost) {
326 }
else if (!hasDomain) {
327 domainString = NetworkUtils.extractDomain(host);
328 }
else if (!hasHost) {
332 return new ArtifactHost(file, host.toLowerCase(), domainString.toLowerCase());
344 private static boolean isDuplicateOrAdd(Set<String> items, String item) {
345 if (StringUtils.isBlank(item)) {
347 }
else if (items.contains(item)) {
360 private void findDomainTypes() {
361 int artifactsAnalyzed = 0;
362 int domainTypeInstancesFound = 0;
365 Set<String> hostsSeen =
new HashSet<>();
368 Set<String> hostSuffixesSeen =
new HashSet<>();
370 List<BlackboardArtifact> listArtifacts = currentCase.getSleuthkitCase().getBlackboard().getArtifacts(
371 DOMAIN_CATEGORIZATION_TYPES,
372 Arrays.asList(dataSource.getId()));
374 logger.log(Level.INFO,
"Processing {0} blackboard artifacts.", listArtifacts.size());
375 Collections.sort(listArtifacts, ARTIFACT_COMPARATOR);
377 for (BlackboardArtifact artifact : listArtifacts) {
379 if (context.dataSourceIngestIsCancelled()) {
385 ArtifactHost curArtHost = getDomainAndHost(artifact);
386 if (curArtHost == null || isDuplicateOrAdd(hostsSeen, curArtHost.getHost())) {
394 DomainCategory domainEntryFound = findCategory(curArtHost.getDomain(), curArtHost.getHost());
395 if (domainEntryFound == null) {
400 String hostSuffix = domainEntryFound.getHostSuffix();
401 String domainCategory = domainEntryFound.getCategory();
402 if (StringUtils.isBlank(hostSuffix) || StringUtils.isBlank(domainCategory)) {
407 domainTypeInstancesFound++;
409 if (isDuplicateOrAdd(hostSuffixesSeen, hostSuffix)) {
414 addCategoryArtifact(curArtHost, domainCategory);
416 }
catch (TskCoreException e) {
417 logger.log(Level.SEVERE,
"Encountered error retrieving artifacts for messaging domains", e);
419 if (context.dataSourceIngestIsCancelled()) {
420 logger.info(
"Operation terminated by user.");
422 logger.log(Level.INFO, String.format(
"Extracted %s distinct messaging domain(s) from the blackboard. "
423 +
"Of the %s artifact(s) with valid hosts, %s url(s) contained messaging domain suffix.",
424 hostSuffixesSeen.size(), artifactsAnalyzed, domainTypeInstancesFound));
435 private void addCategoryArtifact(ArtifactHost artHost, String domainCategory)
throws TskCoreException {
436 String moduleName = Bundle.DomainCategoryRunner_parentModuleName();
437 Collection<BlackboardAttribute> bbattributes = Arrays.asList(
438 new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN, moduleName, artHost.getDomain()),
439 new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_HOST, moduleName, artHost.getHost()),
440 new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_NAME, moduleName, domainCategory)
442 postArtifact(createArtifactWithAttributes(ARTIFACT_TYPE.TSK_WEB_CATEGORIZATION, artHost.getAbstractFile(), bbattributes));
446 public void process(Content dataSource, IngestJobContext context, DataSourceIngestModuleProgress progressBar) {
447 this.dataSource = dataSource;
448 this.context = context;
450 progressBar.progress(Bundle.DomainCategoryRunner_Progress_Message_Domain_Types());
451 this.findDomainTypes();
455 void configExtractor() throws IngestModule.IngestModuleException {
457 Collection<? extends DomainCategorizer> lookupCollection = Lookup.getDefault().lookupAll(DomainCategorizer.class);
458 Collection<? extends DomainCategorizer> lookupList = (lookupCollection == null) ?
459 Collections.emptyList() :
463 List<DomainCategorizer> foundProviders =
new ArrayList<>();
467 .filter(categorizer -> categorizer.getClass().getName().contains(CUSTOM_CATEGORIZER_PATH))
469 .ifPresent((provider) -> foundProviders.add(provider));
472 foundProviders.add(
new DefaultPriorityDomainCategorizer());
477 .filter(categorizer -> categorizer != null)
478 .filter(categorizer -> {
479 String className = categorizer.getClass().getName();
480 return !className.contains(CUSTOM_CATEGORIZER_PATH) &&
481 !className.equals(DefaultPriorityDomainCategorizer.class.getName()) &&
482 !className.equals(DefaultDomainCategorizer.class.getName());
484 .sorted((a, b) -> a.getClass().getName().compareToIgnoreCase(b.getClass().getName()))
485 .forEach(foundProviders::add);
488 foundProviders.add(
new DefaultDomainCategorizer());
490 for (DomainCategorizer provider : foundProviders) {
492 provider.initialize();
493 }
catch (DomainCategorizerException ex) {
494 throw new IngestModule.IngestModuleException(
"There was an error instantiating the provider: " +
495 provider.getClass().getSimpleName(), ex);
499 this.domainProviders = foundProviders;
503 public void complete() {
504 if (this.domainProviders != null) {
505 for (DomainCategorizer provider : this.domainProviders) {
508 }
catch (Exception ex) {
509 logger.log(Level.WARNING,
"There was an error closing " + provider.getClass().getName(), ex);
514 logger.info(
"Domain categorization completed.");
final AbstractFile abstractFile