19 package org.sleuthkit.autopsy.discovery.search;
21 import java.sql.ResultSet;
22 import java.sql.SQLException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.HashMap;
26 import java.util.HashSet;
27 import java.util.Iterator;
28 import java.util.List;
31 import java.util.logging.Level;
32 import org.openide.util.NbBundle;
47 import java.util.StringJoiner;
49 import static org.
sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_CATEGORIZATION;
125 static class DataSourceAttribute
extends AttributeType {
128 public DiscoveryKeyUtils.GroupKey getGroupKey(Result result) {
129 return new DiscoveryKeyUtils.DataSourceGroupKey(result);
136 static class FileTypeAttribute
extends AttributeType {
139 public DiscoveryKeyUtils.GroupKey getGroupKey(Result file) {
140 return new DiscoveryKeyUtils.FileTypeGroupKey(file);
148 static class DomainCategoryAttribute
extends AttributeType {
151 public DiscoveryKeyUtils.GroupKey getGroupKey(Result result) {
152 return new DiscoveryKeyUtils.DomainCategoryGroupKey(result);
156 public void addAttributeToResults(List<Result> results, SleuthkitCase caseDb,
157 CentralRepository centralRepoDb)
throws DiscoveryException {
159 Map<String, Set<String>> domainsToCategories = getDomainsWithWebCategories(caseDb);
160 for (Result result : results) {
161 if (result instanceof ResultDomain) {
162 ResultDomain domain = (ResultDomain) result;
163 domain.addWebCategories(domainsToCategories.get(domain.getDomain()));
166 }
catch (TskCoreException | InterruptedException ex) {
167 throw new DiscoveryException(
"Error fetching TSK_WEB_CATEGORY artifacts from the database", ex);
176 private Map<String, Set<String>> getDomainsWithWebCategories(SleuthkitCase caseDb)
throws TskCoreException, InterruptedException {
177 Map<String, Set<String>> domainToCategory =
new HashMap<>();
179 for (BlackboardArtifact artifact : caseDb.getBlackboardArtifacts(TSK_WEB_CATEGORIZATION)) {
180 if (Thread.currentThread().isInterrupted()) {
181 throw new InterruptedException();
183 BlackboardAttribute webCategory = artifact.getAttribute(
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_NAME));
184 BlackboardAttribute domain = artifact.getAttribute(
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN));
185 if (webCategory != null && domain != null) {
186 String domainDisplayName = domain.getValueString().trim().toLowerCase();
187 if (!domainToCategory.containsKey(domainDisplayName)) {
188 domainToCategory.put(domainDisplayName,
new HashSet<>());
190 domainToCategory.get(domainDisplayName).add(webCategory.getValueString());
193 return domainToCategory;
200 static class KeywordListAttribute
extends AttributeType {
203 public DiscoveryKeyUtils.GroupKey getGroupKey(Result file) {
204 return new DiscoveryKeyUtils.KeywordListGroupKey((ResultFile) file);
208 public void addAttributeToResults(List<Result> results, SleuthkitCase caseDb,
209 CentralRepository centralRepoDb)
throws DiscoveryException {
213 String selectQuery =
createSetNameClause(results, BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT.getTypeID(),
214 BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME.getTypeID());
216 SetKeywordListNamesCallback callback =
new SetKeywordListNamesCallback(results);
218 caseDb.getCaseDbAccessManager().select(selectQuery, callback);
219 }
catch (TskCoreException ex) {
220 throw new DiscoveryException(
"Error looking up keyword list attributes", ex);
231 List<Result> resultFiles;
239 this.resultFiles = resultFiles;
246 Map<Long, ResultFile> tempMap =
new HashMap<>();
247 for (
Result result : resultFiles) {
257 Long objId = rs.getLong(
"object_id");
258 String keywordListName = rs.getString(
"set_name");
262 }
catch (SQLException ex) {
263 logger.log(Level.SEVERE,
"Unable to get object_id or set_name from result set", ex);
266 }
catch (SQLException ex) {
267 logger.log(Level.SEVERE,
"Failed to get keyword list names", ex);
283 final Map<String, List<ResultDomain>> resultDomainTable =
new HashMap<>();
286 final String domainValue = domainInstance.getDomain();
288 final List<ResultDomain> bucket = resultDomainTable.getOrDefault(normalizedDomain,
new ArrayList<>());
289 bucket.add(domainInstance);
290 resultDomainTable.put(normalizedDomain, bucket);
292 logger.log(Level.INFO, String.format(
"Domain [%s] failed normalization, skipping...", domainInstance.getDomain()));
295 return resultDomainTable;
304 StringJoiner joiner =
new StringJoiner(
", ");
305 for (String value : values) {
306 joiner.add(
"'" + value +
"'");
308 return joiner.toString();
314 static class PreviouslyNotableAttribute
extends AttributeType {
316 static final int DOMAIN_BATCH_SIZE = 500;
324 public void addAttributeToResults(List<Result> results, SleuthkitCase caseDb,
325 CentralRepository centralRepoDb)
throws DiscoveryException {
327 if (centralRepoDb != null) {
328 processFilesWithCr(results, centralRepoDb);
332 private void processFilesWithCr(List<Result> results, CentralRepository centralRepo)
throws DiscoveryException {
334 List<ResultDomain> domainsBatch =
new ArrayList<>();
335 for (Result result : results) {
337 domainsBatch.add((ResultDomain) result);
338 if (domainsBatch.size() == DOMAIN_BATCH_SIZE) {
339 queryPreviouslyNotable(domainsBatch, centralRepo);
340 domainsBatch.clear();
345 queryPreviouslyNotable(domainsBatch, centralRepo);
348 private void queryPreviouslyNotable(List<ResultDomain> domainsBatch, CentralRepository centralRepo)
throws DiscoveryException {
349 if (domainsBatch.isEmpty()) {
354 final CorrelationAttributeInstance.Type attributeType = centralRepo.getCorrelationTypeById(CorrelationAttributeInstance.DOMAIN_TYPE_ID);
355 final Map<String, List<ResultDomain>> resultDomainTable =
organizeByValue(domainsBatch, attributeType);
356 final String values =
createCSV(resultDomainTable.keySet());
358 final String tableName = CentralRepoDbUtil.correlationTypeToInstanceTableName(attributeType);
359 final String domainFrequencyQuery =
" value AS domain_name "
360 +
"FROM " + tableName +
" "
361 +
"WHERE value IN (" + values +
") "
362 +
"AND known_status = " + TskData.FileKnown.BAD.getFileKnownValue();
364 final DomainPreviouslyNotableCallback previouslyNotableCallback =
new DomainPreviouslyNotableCallback(resultDomainTable);
365 centralRepo.processSelectClause(domainFrequencyQuery, previouslyNotableCallback);
367 if (previouslyNotableCallback.getCause() != null) {
368 throw previouslyNotableCallback.getCause();
370 }
catch (CentralRepoException | SQLException ex) {
371 throw new DiscoveryException(
"Fatal exception encountered querying the CR.", ex);
387 while (resultSet.next()) {
388 String domain = resultSet.getString(
"domain_name");
389 List<ResultDomain> domainInstances = domainLookup.get(domain);
391 domainInstance.markAsPreviouslyNotableInCR();
394 }
catch (SQLException ex) {
402 SQLException getCause() {
411 static class FrequencyAttribute
extends AttributeType {
413 static final int BATCH_SIZE = 50;
415 static final int DOMAIN_BATCH_SIZE = 500;
418 public DiscoveryKeyUtils.GroupKey getGroupKey(Result file) {
419 return new DiscoveryKeyUtils.FrequencyGroupKey(file);
423 public void addAttributeToResults(List<Result> results, SleuthkitCase caseDb,
424 CentralRepository centralRepoDb)
throws DiscoveryException {
425 if (centralRepoDb == null) {
426 for (Result result : results) {
432 processResultFilesForCR(results, centralRepoDb);
444 private void processResultFilesForCR(List<Result> results,
445 CentralRepository centralRepoDb)
throws DiscoveryException {
446 List<ResultFile> currentFiles =
new ArrayList<>();
447 Set<String> hashesToLookUp =
new HashSet<>();
448 List<ResultDomain> domainsToQuery =
new ArrayList<>();
449 for (Result result : results) {
452 if (result.
getKnown() == TskData.FileKnown.KNOWN) {
457 ResultFile file = (ResultFile) result;
458 if (file.getFirstInstance().getMd5Hash() != null
459 && !file.getFirstInstance().getMd5Hash().isEmpty()) {
460 hashesToLookUp.add(file.getFirstInstance().getMd5Hash());
461 currentFiles.add(file);
464 if (hashesToLookUp.size() >= BATCH_SIZE) {
467 hashesToLookUp.clear();
468 currentFiles.clear();
471 domainsToQuery.add((ResultDomain) result);
472 if (domainsToQuery.size() == DOMAIN_BATCH_SIZE) {
475 domainsToQuery.clear();
495 if (domainsToQuery.isEmpty()) {
500 final Map<String, List<ResultDomain>> resultDomainTable =
organizeByValue(domainsToQuery, attributeType);
501 final String values =
createCSV(resultDomainTable.keySet());
503 final String domainFrequencyQuery =
" value AS domain_name, COUNT(value) AS frequency FROM"
504 +
"(SELECT DISTINCT case_id, value FROM "
506 +
" WHERE value IN ("
508 +
")) AS foo GROUP BY value";
511 centralRepository.processSelectClause(domainFrequencyQuery, frequencyCallback);
513 if (frequencyCallback.getCause() != null) {
514 throw frequencyCallback.getCause();
541 while (resultSet.next()) {
542 String domain = resultSet.getString(
"domain_name");
543 Long frequency = resultSet.getLong(
"frequency");
545 List<ResultDomain> domainInstances = domainLookup.get(domain);
550 }
catch (SQLException ex) {
560 SQLException getCause() {
571 private final List<ResultFile>
files;
579 this.files =
new ArrayList<>(
files);
586 while (resultSet.next()) {
587 String hash = resultSet.getString(1);
588 int count = resultSet.getInt(2);
589 for (Iterator<ResultFile> iterator = files.iterator(); iterator.hasNext();) {
602 }
catch (SQLException ex) {
603 logger.log(Level.WARNING,
"Error getting frequency counts from Central Repository", ex);
618 return new DiscoveryKeyUtils.HashHitsGroupKey((ResultFile) result);
622 public void addAttributeToResults(List<Result> results, SleuthkitCase caseDb,
623 CentralRepository centralRepoDb)
throws DiscoveryException {
627 String selectQuery =
createSetNameClause(results, BlackboardArtifact.ARTIFACT_TYPE.TSK_HASHSET_HIT.getTypeID(),
628 BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME.getTypeID());
630 HashSetNamesCallback callback =
new HashSetNamesCallback(results);
632 caseDb.getCaseDbAccessManager().select(selectQuery, callback);
633 }
catch (TskCoreException ex) {
634 throw new DiscoveryException(
"Error looking up hash set attributes", ex);
644 List<Result> results;
652 this.results = results;
659 Map<Long, ResultFile> tempMap =
new HashMap<>();
660 for (
Result result : results) {
670 Long objId = rs.getLong(
"object_id");
671 String hashSetName = rs.getString(
"set_name");
673 tempMap.get(objId).addHashSetName(hashSetName);
675 }
catch (SQLException ex) {
676 logger.log(Level.SEVERE,
"Unable to get object_id or set_name from result set", ex);
679 }
catch (SQLException ex) {
680 logger.log(Level.SEVERE,
"Failed to get hash set names", ex);
689 static class InterestingItemAttribute
extends AttributeType {
697 public void addAttributeToResults(List<Result> results, SleuthkitCase caseDb,
698 CentralRepository centralRepoDb)
throws DiscoveryException {
702 String selectQuery =
createSetNameClause(results, BlackboardArtifact.ARTIFACT_TYPE.TSK_INTERESTING_FILE_HIT.getTypeID(),
703 BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME.getTypeID());
705 InterestingFileSetNamesCallback callback =
new InterestingFileSetNamesCallback(results);
707 caseDb.getCaseDbAccessManager().select(selectQuery, callback);
708 }
catch (TskCoreException ex) {
709 throw new DiscoveryException(
"Error looking up interesting file set attributes", ex);
720 List<Result> results;
729 this.results = results;
736 Map<Long, ResultFile> tempMap =
new HashMap<>();
737 for (
Result result : results) {
747 Long objId = rs.getLong(
"object_id");
748 String setName = rs.getString(
"set_name");
750 tempMap.get(objId).addInterestingSetName(setName);
752 }
catch (SQLException ex) {
753 logger.log(Level.SEVERE,
"Unable to get object_id or set_name from result set", ex);
756 }
catch (SQLException ex) {
757 logger.log(Level.SEVERE,
"Failed to get interesting file set names", ex);
766 static class LastActivityDateAttribute
extends AttributeType {
778 static class FirstActivityDateAttribute
extends AttributeType {
781 public DiscoveryKeyUtils.GroupKey getGroupKey(Result result) {
782 return new DiscoveryKeyUtils.FirstActivityDateGroupKey(result);
791 static class PageViewsAttribute
extends AttributeType {
794 public DiscoveryKeyUtils.GroupKey getGroupKey(Result result) {
795 return new DiscoveryKeyUtils.PageViewsGroupKey(result);
802 static class ObjectDetectedAttribute
extends AttributeType {
805 public DiscoveryKeyUtils.GroupKey getGroupKey(Result file) {
806 return new DiscoveryKeyUtils.ObjectDetectedGroupKey((ResultFile) file);
810 public void addAttributeToResults(List<Result> results, SleuthkitCase caseDb,
811 CentralRepository centralRepoDb)
throws DiscoveryException {
815 String selectQuery =
createSetNameClause(results, BlackboardArtifact.ARTIFACT_TYPE.TSK_OBJECT_DETECTED.getTypeID(),
816 BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DESCRIPTION.getTypeID());
818 ObjectDetectedNamesCallback callback =
new ObjectDetectedNamesCallback(results);
820 caseDb.getCaseDbAccessManager().select(selectQuery, callback);
821 }
catch (TskCoreException ex) {
822 throw new DiscoveryException(
"Error looking up object detected attributes", ex);
833 List<Result> results;
841 this.results = results;
848 Map<Long, ResultFile> tempMap =
new HashMap<>();
849 for (
Result result : results) {
859 Long objId = rs.getLong(
"object_id");
860 String setName = rs.getString(
"set_name");
862 tempMap.get(objId).addObjectDetectedName(setName);
864 }
catch (SQLException ex) {
865 logger.log(Level.SEVERE,
"Unable to get object_id or set_name from result set", ex);
868 }
catch (SQLException ex) {
869 logger.log(Level.SEVERE,
"Failed to get object detected names", ex);
886 public void addAttributeToResults(List<Result> results, SleuthkitCase caseDb,
887 CentralRepository centralRepoDb)
throws DiscoveryException {
890 for (Result result : results) {
894 ResultFile file = (ResultFile) result;
895 List<ContentTag> contentTags = caseDb.getContentTagsByContent(file.getFirstInstance());
897 for (ContentTag tag : contentTags) {
898 result.
addTagName(tag.getName().getDisplayName());
901 }
catch (TskCoreException ex) {
902 throw new DiscoveryException(
"Error looking up file tag attributes", ex);
911 "DiscoveryAttributes.GroupingAttributeType.fileType.displayName=File Type",
912 "DiscoveryAttributes.GroupingAttributeType.frequency.displayName=Past Occurrences",
913 "DiscoveryAttributes.GroupingAttributeType.keywordList.displayName=Keyword",
914 "DiscoveryAttributes.GroupingAttributeType.size.displayName=File Size",
915 "DiscoveryAttributes.GroupingAttributeType.datasource.displayName=Data Source",
916 "DiscoveryAttributes.GroupingAttributeType.parent.displayName=Parent Folder",
917 "DiscoveryAttributes.GroupingAttributeType.hash.displayName=Hash Set",
918 "DiscoveryAttributes.GroupingAttributeType.interestingItem.displayName=Interesting Item",
919 "DiscoveryAttributes.GroupingAttributeType.tag.displayName=Tag",
920 "DiscoveryAttributes.GroupingAttributeType.object.displayName=Object Detected",
921 "DiscoveryAttributes.GroupingAttributeType.lastDate.displayName=Final Activity Date",
922 "DiscoveryAttributes.GroupingAttributeType.firstDate.displayName=First Activity Date",
923 "DiscoveryAttributes.GroupingAttributeType.pageViews.displayName=Page Views",
924 "DiscoveryAttributes.GroupingAttributeType.none.displayName=None",
925 "DiscoveryAttributes.GroupingAttributeType.previouslyNotable.displayName=Previous Notability",
926 "DiscoveryAttributes.GroupingAttributeType.webCategory.displayName=Domain Category"})
929 FREQUENCY(
new FrequencyAttribute(), Bundle.DiscoveryAttributes_GroupingAttributeType_frequency_displayName()),
930 KEYWORD_LIST_NAME(
new KeywordListAttribute(), Bundle.DiscoveryAttributes_GroupingAttributeType_keywordList_displayName()),
931 DATA_SOURCE(
new DataSourceAttribute(), Bundle.DiscoveryAttributes_GroupingAttributeType_datasource_displayName()),
933 HASH_LIST_NAME(
new HashHitsAttribute(), Bundle.DiscoveryAttributes_GroupingAttributeType_hash_displayName()),
934 INTERESTING_ITEM_SET(
new InterestingItemAttribute(), Bundle.DiscoveryAttributes_GroupingAttributeType_interestingItem_displayName()),
935 FILE_TAG(
new FileTagAttribute(), Bundle.DiscoveryAttributes_GroupingAttributeType_tag_displayName()),
936 OBJECT_DETECTED(
new ObjectDetectedAttribute(), Bundle.DiscoveryAttributes_GroupingAttributeType_object_displayName()),
937 LAST_ACTIVITY_DATE(
new LastActivityDateAttribute(), Bundle.DiscoveryAttributes_GroupingAttributeType_lastDate_displayName()),
938 FIRST_ACTIVITY_DATE(
new FirstActivityDateAttribute(), Bundle.DiscoveryAttributes_GroupingAttributeType_firstDate_displayName()),
939 PAGE_VIEWS(
new PageViewsAttribute(), Bundle.DiscoveryAttributes_GroupingAttributeType_pageViews_displayName()),
940 NO_GROUPING(
new NoGroupingAttribute(), Bundle.DiscoveryAttributes_GroupingAttributeType_none_displayName()),
941 PREVIOUSLY_NOTABLE(
new PreviouslyNotableAttribute(), Bundle.DiscoveryAttributes_GroupingAttributeType_previouslyNotable_displayName()),
942 DOMAIN_CATEGORY(
new DomainCategoryAttribute(), Bundle.DiscoveryAttributes_GroupingAttributeType_webCategory_displayName());
956 this.attributeType = attributeType;
957 this.displayName = displayName;
971 return attributeType;
980 return Arrays.asList(FILE_SIZE, FREQUENCY, PARENT_PATH, OBJECT_DETECTED, HASH_LIST_NAME, INTERESTING_ITEM_SET);
990 return Arrays.asList(PAGE_VIEWS, FREQUENCY, LAST_ACTIVITY_DATE, FIRST_ACTIVITY_DATE, PREVIOUSLY_NOTABLE, DOMAIN_CATEGORY);
992 return Arrays.asList(PAGE_VIEWS, LAST_ACTIVITY_DATE, FIRST_ACTIVITY_DATE, DOMAIN_CATEGORY);
1007 if (hashesToLookUp.isEmpty()) {
1011 String hashes = String.join(
"','", hashesToLookUp);
1012 hashes =
"'" + hashes +
"'";
1017 String selectClause =
" value, COUNT(value) FROM "
1018 +
"(SELECT DISTINCT case_id, value FROM " + tableName
1019 +
" WHERE value IN ("
1021 +
")) AS foo GROUP BY value";
1027 logger.log(Level.WARNING,
"Error getting frequency counts from Central Repository", ex);
1049 String objIdList =
"";
1050 for (
Result result : results) {
1055 if (!objIdList.isEmpty()) {
1063 return "blackboard_artifacts.obj_id AS object_id, blackboard_attributes.value_text AS set_name "
1064 +
"FROM blackboard_artifacts "
1065 +
"INNER JOIN blackboard_attributes ON blackboard_artifacts.artifact_id=blackboard_attributes.artifact_id "
1066 +
"WHERE blackboard_attributes.artifact_type_id=\'" + artifactTypeID +
"\' "
1067 +
"AND blackboard_attributes.attribute_type_id=\'" + setNameAttrID +
"\' "
1068 +
"AND blackboard_artifacts.obj_id IN (" + objIdList
static String createSetNameClause(List< Result > results, int artifactTypeID, int setNameAttrID)
AttributeType getAttributeType()
DiscoveryKeyUtils.GroupKey getGroupKey(Result file)
DomainPreviouslyNotableCallback(Map< String, List< ResultDomain >> domainLookup)
static List< GroupingAttributeType > getOptionsForGroupingForDomains()
DomainFrequencyCallback(Map< String, List< ResultDomain >> domainLookup)
SearchData.Frequency getFrequency()
void process(ResultSet rs)
abstract TskData.FileKnown getKnown()
static final Logger logger
void process(ResultSet resultSet)
static void computeFrequency(Set< String > hashesToLookUp, List< ResultFile > currentFiles, CentralRepository centralRepoDb)
void addTagName(String tagName)
static Frequency fromCount(long count)
abstract SearchData.Type getType()
FrequencyCallback(List< ResultFile > files)
GroupingAttributeType(AttributeType attributeType, String displayName)
static List< GroupingAttributeType > getOptionsForGroupingForFiles()
final void setFrequency(SearchData.Frequency frequency)
static String correlationTypeToInstanceTableName(CorrelationAttributeInstance.Type type)
static String normalize(CorrelationAttributeInstance.Type attributeType, String data)
AbstractFile getFirstInstance()
void process(ResultSet resultSet)
DiscoveryKeyUtils.GroupKey getGroupKey(Result result)
void addAttributeToResults(List< Result > results, SleuthkitCase caseDb, CentralRepository centralRepoDb)
static final int DOMAIN_TYPE_ID
void process(ResultSet rs)
abstract DiscoveryKeyUtils.GroupKey getGroupKey(Result result)
final List< ResultFile > files
final Map< String, List< ResultDomain > > domainLookup
void process(ResultSet rs)
void addKeywordListName(String keywordListName)
final Map< String, List< ResultDomain > > domainLookup
void process(ResultSet rs)
static Map< String, List< ResultDomain > > organizeByValue(List< ResultDomain > domainsBatch, CorrelationAttributeInstance.Type attributeType)
synchronized static Logger getLogger(String name)
CorrelationAttributeInstance.Type getCorrelationTypeById(int typeId)
void process(ResultSet resultSet)
static String createCSV(Set< String > values)
void processSelectClause(String selectClause, InstanceTableCallback instanceTableCallback)
static void queryDomainFrequency(List< ResultDomain > domainsToQuery, CentralRepository centralRepository)
static final int FILES_TYPE_ID
final AttributeType attributeType
static boolean isEnabled()