19 package org.sleuthkit.autopsy.recentactivity;
21 import java.net.MalformedURLException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.Collection;
26 import java.util.Collections;
27 import java.util.Comparator;
28 import java.util.HashSet;
29 import java.util.List;
31 import java.util.logging.Level;
33 import java.util.regex.Matcher;
34 import java.util.regex.Pattern;
35 import java.util.stream.Collectors;
36 import java.util.stream.Stream;
37 import org.apache.commons.lang.StringUtils;
38 import org.openide.util.Lookup;
39 import org.openide.util.NbBundle.Messages;
48 import org.
sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
61 "DomainCategoryRunner_moduleName_text=Domain Category Analyzer",
62 "DomainCategoryRunner_Progress_Message_Domain_Types=Finding Domain Types",
63 "DomainCategoryRunner_parentModuleName=Recent Activity"
65 class DomainCategoryRunner extends Extract {
70 private static final String URL_REGEX_SCHEME =
"(((?<scheme>[^:\\/?#]+):?)?\\/\\/)";
72 private static final String URL_REGEX_USERINFO =
"((?<userinfo>[^\\/?#@]*)@)";
73 private static final String URL_REGEX_HOST =
"(?<host>[^\\/\\.?#:]*\\.[^\\/?#:]*)";
74 private static final String URL_REGEX_PORT =
"(:(?<port>[0-9]{1,5}))";
75 private static final String URL_REGEX_AUTHORITY = String.format(
"(%s?%s?%s?\\/?)", URL_REGEX_USERINFO, URL_REGEX_HOST, URL_REGEX_PORT);
77 private static final String URL_REGEX_PATH =
"(?<path>([^?#]*)(\\?([^#]*))?(#(.*))?)";
79 private static final String URL_REGEX_STR = String.format(
"^\\s*%s?%s?%s?", URL_REGEX_SCHEME, URL_REGEX_AUTHORITY, URL_REGEX_PATH);
80 private static final Pattern URL_REGEX = Pattern.compile(URL_REGEX_STR);
82 private static int DATETIME_ACCESSED_TYPEID = ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED.getTypeID();
83 private static int URL_TYPEID = ATTRIBUTE_TYPE.TSK_URL.getTypeID();
85 private static final Logger logger = Logger.getLogger(DomainCategoryRunner.class.getName());
88 private static final String CUSTOM_CATEGORIZER_PATH =
"org.sleuthkit.autopsy.url.analytics.domaincategorization.CustomWebCategorizer";
91 private static final List<BlackboardArtifact.Type> DOMAIN_CATEGORIZATION_TYPES = Stream.of(
92 BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_BOOKMARK,
93 BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_CACHE,
94 BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_COOKIE,
95 BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_DOWNLOAD,
96 BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_HISTORY,
97 BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY)
98 .map(BlackboardArtifact.Type::new)
99 .collect(Collectors.toList());
100 private final IngestJobContext context;
111 private static long getTimeOrZero(Map<Integer, BlackboardAttribute> attrMap,
int attrTypeId) {
112 if (attrMap == null) {
116 BlackboardAttribute attr = attrMap.get(attrTypeId);
117 return attr == null ? 0 : attr.getValueLong();
129 private static String getStringOrEmpty(Map<Integer, BlackboardAttribute> attrMap,
int attrTypeId) {
130 if (attrMap == null) {
134 BlackboardAttribute attr = attrMap.get(attrTypeId);
135 String attrStr = attr == null ?
"" : attr.getValueString();
136 return attrStr == null ?
"" : attrStr;
142 private static final Comparator<BlackboardArtifact> ARTIFACT_COMPARATOR = (a, b) -> {
144 Map<Integer, BlackboardAttribute> attrMapA = null;
145 Map<Integer, BlackboardAttribute> attrMapB = null;
148 attrMapA = a.getAttributes()
150 .collect(Collectors.toMap(attr -> attr.getAttributeType().getTypeID(), attr -> attr, (attr1, attr2) -> attr1));
152 attrMapB = b.getAttributes()
154 .collect(Collectors.toMap(attr -> attr.getAttributeType().getTypeID(), attr -> attr, (attr1, attr2) -> attr1));
156 }
catch (TskCoreException ex) {
157 logger.log(Level.WARNING,
"There was an error fetching attributes for artifacts", ex);
162 int timeCompare = Long.compare(getTimeOrZero(attrMapA, DATETIME_ACCESSED_TYPEID), getTimeOrZero(attrMapB, DATETIME_ACCESSED_TYPEID));
163 if (timeCompare != 0) {
169 int urlCompare = getStringOrEmpty(attrMapA, URL_TYPEID).compareToIgnoreCase(getStringOrEmpty(attrMapB, URL_TYPEID));
170 if (urlCompare != 0) {
175 return Long.compare(a.getId(), b.getId());
178 private Content dataSource;
179 private List<DomainCategorizer> domainProviders = Collections.emptyList();
184 DomainCategoryRunner(IngestJobContext context) {
185 super(Bundle.DomainCategoryRunner_moduleName_text(), context);
186 this.context = context;
197 private String getHost(String urlString) {
201 URL url =
new URL(urlString);
203 host = url.getHost();
205 }
catch (MalformedURLException ignore) {
210 if (StringUtils.isBlank(host)) {
211 Matcher m = URL_REGEX.matcher(urlString);
213 host = m.group(
"host");
228 private DomainCategory findCategory(String domain, String host) {
229 List<DomainCategorizer> safeProviders = domainProviders == null ? Collections.emptyList() : domainProviders;
230 for (DomainCategorizer provider : safeProviders) {
231 DomainCategory result;
233 result = provider.getCategory(domain, host);
234 if (result != null) {
237 }
catch (DomainCategorizerException ex) {
238 logger.log(Level.WARNING,
"There was an error processing results with " + provider.getClass().getCanonicalName(), ex);
264 ArtifactHost(AbstractFile abstractFile, String host, String domain) {
265 this.abstractFile = abstractFile;
267 this.domain = domain;
273 AbstractFile getAbstractFile() {
303 private ArtifactHost getDomainAndHost(BlackboardArtifact artifact)
throws TskCoreException {
305 AbstractFile file = tskCase.getAbstractFileById(artifact.getObjectID());
311 BlackboardAttribute urlAttr = artifact.getAttribute(
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL));
312 String urlString = null;
314 if (urlAttr != null) {
315 urlString = urlAttr.getValueString();
316 if (StringUtils.isNotBlank(urlString)) {
317 host = getHost(urlString);
322 BlackboardAttribute domainAttr = artifact.getAttribute(
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN));
323 String domainString = null;
324 if (domainAttr != null) {
325 domainString = domainAttr.getValueString();
328 boolean hasDomain = StringUtils.isNotBlank(domainString);
329 boolean hasHost = StringUtils.isNotBlank(host);
332 if (!hasDomain && !hasHost) {
334 }
else if (!hasDomain) {
335 domainString = NetworkUtils.extractDomain(host);
336 }
else if (!hasHost) {
340 return new ArtifactHost(file, host.toLowerCase(), domainString.toLowerCase());
353 private static boolean isDuplicateOrAdd(Set<String> items, String item) {
354 if (StringUtils.isBlank(item)) {
356 }
else if (items.contains(item)) {
369 private void findDomainTypes() {
370 int artifactsAnalyzed = 0;
371 int domainTypeInstancesFound = 0;
374 Set<String> hostsSeen =
new HashSet<>();
377 Set<String> hostSuffixesSeen =
new HashSet<>();
379 List<BlackboardArtifact> listArtifacts = currentCase.getSleuthkitCase().getBlackboard().getArtifacts(
380 DOMAIN_CATEGORIZATION_TYPES,
381 Arrays.asList(dataSource.getId()));
383 logger.log(Level.INFO,
"Processing {0} blackboard artifacts.", listArtifacts.size());
384 Collections.sort(listArtifacts, ARTIFACT_COMPARATOR);
386 for (BlackboardArtifact artifact : listArtifacts) {
388 if (context.dataSourceIngestIsCancelled()) {
394 ArtifactHost curArtHost = getDomainAndHost(artifact);
395 if (curArtHost == null || isDuplicateOrAdd(hostsSeen, curArtHost.getHost())) {
403 DomainCategory domainEntryFound = findCategory(curArtHost.getDomain(), curArtHost.getHost());
404 if (domainEntryFound == null) {
409 String hostSuffix = domainEntryFound.getHostSuffix();
410 String domainCategory = domainEntryFound.getCategory();
411 if (StringUtils.isBlank(hostSuffix) || StringUtils.isBlank(domainCategory)) {
416 domainTypeInstancesFound++;
418 if (isDuplicateOrAdd(hostSuffixesSeen, hostSuffix)) {
423 addCategoryArtifact(curArtHost, domainCategory);
425 }
catch (TskCoreException e) {
426 logger.log(Level.SEVERE,
"Encountered error retrieving artifacts for messaging domains", e);
428 if (context.dataSourceIngestIsCancelled()) {
429 logger.info(
"Operation terminated by user.");
431 logger.log(Level.INFO, String.format(
"Extracted %s distinct messaging domain(s) from the blackboard. "
432 +
"Of the %s artifact(s) with valid hosts, %s url(s) contained messaging domain suffix.",
433 hostSuffixesSeen.size(), artifactsAnalyzed, domainTypeInstancesFound));
444 private void addCategoryArtifact(ArtifactHost artHost, String domainCategory)
throws TskCoreException {
445 String moduleName = Bundle.DomainCategoryRunner_parentModuleName();
446 Collection<BlackboardAttribute> bbattributes = Arrays.asList(
447 new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN, moduleName, artHost.getDomain()),
448 new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_HOST, moduleName, artHost.getHost()),
449 new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_NAME, moduleName, domainCategory)
451 postArtifact(createArtifactWithAttributes(BlackboardArtifact.Type.TSK_WEB_CATEGORIZATION, artHost.getAbstractFile(), bbattributes));
455 public void process(Content dataSource, DataSourceIngestModuleProgress progressBar) {
456 this.dataSource = dataSource;
457 progressBar.progress(Bundle.DomainCategoryRunner_Progress_Message_Domain_Types());
458 this.findDomainTypes();
462 void startUp() throws IngestModule.IngestModuleException {
464 Collection<? extends DomainCategorizer> lookupCollection = Lookup.getDefault().lookupAll(DomainCategorizer.class);
465 Collection<? extends DomainCategorizer> lookupList = (lookupCollection == null)
466 ? Collections.emptyList()
470 List<DomainCategorizer> foundProviders =
new ArrayList<>();
474 .filter(categorizer -> categorizer.getClass().getName().contains(CUSTOM_CATEGORIZER_PATH))
476 .ifPresent((provider) -> foundProviders.add(provider));
479 foundProviders.add(
new DefaultPriorityDomainCategorizer());
484 .filter(categorizer -> categorizer != null)
485 .filter(categorizer -> {
486 String className = categorizer.getClass().getName();
487 return !className.contains(CUSTOM_CATEGORIZER_PATH)
488 && !className.equals(DefaultPriorityDomainCategorizer.class.getName())
489 && !className.equals(DefaultDomainCategorizer.class.getName());
491 .sorted((a, b) -> a.getClass().getName().compareToIgnoreCase(b.getClass().getName()))
492 .forEach(foundProviders::add);
495 foundProviders.add(
new DefaultDomainCategorizer());
497 for (DomainCategorizer provider : foundProviders) {
499 provider.initialize();
500 }
catch (DomainCategorizerException ex) {
501 throw new IngestModule.IngestModuleException(
"There was an error instantiating the provider: "
502 + provider.getClass().getSimpleName(), ex);
506 this.domainProviders = foundProviders;
510 public void shutDown() {
511 if (this.domainProviders != null) {
512 for (DomainCategorizer provider : this.domainProviders) {
515 }
catch (Exception ex) {
516 logger.log(Level.WARNING,
"There was an error closing " + provider.getClass().getName(), ex);
final AbstractFile abstractFile