19 package org.sleuthkit.autopsy.keywordsearch;
21 import com.google.common.base.CharMatcher;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.List;
27 import java.util.logging.Level;
28 import java.util.regex.Matcher;
29 import java.util.regex.Pattern;
30 import org.apache.commons.lang3.StringUtils;
31 import org.apache.commons.lang3.math.NumberUtils;
32 import org.apache.commons.validator.routines.DomainValidator;
33 import org.apache.solr.client.solrj.SolrQuery;
34 import org.apache.solr.client.solrj.SolrQuery.SortClause;
35 import org.apache.solr.client.solrj.SolrRequest;
36 import org.apache.solr.client.solrj.response.QueryResponse;
37 import org.apache.solr.common.SolrDocument;
38 import org.apache.solr.common.SolrDocumentList;
39 import org.apache.solr.common.params.CursorMarkParams;
40 import org.openide.util.NbBundle;
51 import org.
sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
71 final class RegexQuery
implements KeywordSearchQuery {
73 public static final Logger LOGGER = Logger.getLogger(RegexQuery.class.getName());
85 private static final CharSequence[] UNSUPPORTED_CHARS = {
"\\d",
"\\D",
"\\w",
"\\W",
"\\s",
"\\S",
"\\n",
86 "\\t",
"\\r",
"\\f",
"\\a",
"\\e",
"\\v",
"\\V",
"\\h",
"\\H",
"\\p"};
88 private static final int MAX_RESULTS_PER_CURSOR_MARK = 512;
89 private static final int MIN_EMAIL_ADDR_LENGTH = 8;
90 private static final String SNIPPET_DELIMITER = String.valueOf(Character.toChars(171));
102 static final Pattern CREDIT_CARD_NUM_PATTERN
103 = Pattern.compile(
"(?<ccn>[2-6]([ -]?[0-9]){11,18})");
104 static final Pattern CREDIT_CARD_TRACK1_PATTERN = Pattern.compile(
116 +
"(?<accountNumber>[2-6]([ -]?[0-9]){11,18})"
118 +
"(?<name>[^^]{2,26})"
120 +
"(?:(?:\\^|(?<expiration>\\d{4}))"
121 +
"(?:(?:\\^|(?<serviceCode>\\d{3}))"
122 +
"(?:(?<discretionary>[^?]*)"
126 static final Pattern CREDIT_CARD_TRACK2_PATTERN = Pattern.compile(
137 +
"(?<accountNumber>[2-6]([ -]?[0-9]){11,18})"
139 +
"(?:(?<expiration>\\d{4})"
140 +
"(?:(?<serviceCode>\\d{3})"
141 +
"(?:(?<discretionary>[^:;<=>?]*)"
145 static final BlackboardAttribute.Type KEYWORD_SEARCH_DOCUMENT_ID =
new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
147 private final List<KeywordQueryFilter> filters =
new ArrayList<>();
148 private final KeywordList keywordList;
149 private final Keyword originalKeyword;
150 private final String keywordString;
151 private final boolean queryStringContainsWildcardPrefix;
152 private final boolean queryStringContainsWildcardSuffix;
154 private boolean escaped;
155 private String escapedQuery;
156 private String field = Server.Schema.CONTENT_STR.toString();
164 RegexQuery(KeywordList keywordList, Keyword keyword) {
165 this.keywordList = keywordList;
166 this.originalKeyword = keyword;
167 this.keywordString = keyword.getSearchTerm();
169 this.queryStringContainsWildcardPrefix = this.keywordString.startsWith(
".*");
170 this.queryStringContainsWildcardSuffix = this.keywordString.endsWith(
".*");
174 public KeywordList getKeywordList() {
179 public boolean validate() {
180 if (keywordString.isEmpty()) {
185 Pattern.compile(keywordString, Pattern.UNICODE_CHARACTER_CLASS);
191 for (CharSequence c : UNSUPPORTED_CHARS) {
192 if (keywordString.contains(c)) {
197 }
catch (IllegalArgumentException ex) {
203 public QueryResults performQuery() throws NoOpenCoreException {
205 final Server solrServer = KeywordSearch.getServer();
206 SolrQuery solrQuery =
new SolrQuery();
225 boolean skipWildcardPrefix = queryStringContainsWildcardPrefix || getQueryString().startsWith(
"^");
226 boolean skipWildcardSuffix = queryStringContainsWildcardSuffix
227 || (getQueryString().endsWith(
"$") && (!getQueryString().endsWith(
"\\$")));
243 String queryString = (originalKeyword.searchTermIsLiteral() ? getEscapedQueryString() : getQueryString());
244 double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
245 if (indexSchemaVersion >= 2.1) {
246 queryString = queryString.toLowerCase();
249 solrQuery.setQuery((field == null ? Server.Schema.CONTENT_STR.toString() : field) +
":/"
250 + (skipWildcardPrefix ?
"" :
".*")
254 + (skipWildcardSuffix ?
"" :
".*") +
"/");
257 solrQuery.setFields(Server.Schema.CONTENT_STR.toString(), Server.Schema.ID.toString(), Server.Schema.CHUNK_SIZE.toString());
260 .map(KeywordQueryFilter::toString)
261 .forEach(solrQuery::addFilterQuery);
263 solrQuery.setRows(MAX_RESULTS_PER_CURSOR_MARK);
265 solrQuery.setSort(SortClause.asc(Server.Schema.ID.toString()));
267 String cursorMark = CursorMarkParams.CURSOR_MARK_START;
268 SolrDocumentList resultList;
269 boolean allResultsProcessed =
false;
270 QueryResults results =
new QueryResults(
this);
272 while (!allResultsProcessed) {
274 solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
275 QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
276 resultList = response.getResults();
278 for (SolrDocument resultDoc : resultList) {
280 List<KeywordHit> keywordHits = createKeywordHits(resultDoc);
281 for (KeywordHit hit : keywordHits) {
282 Keyword keywordInstance =
new Keyword(hit.getHit(),
true,
true, originalKeyword.getListName(), originalKeyword.getOriginalTerm());
283 List<KeywordHit> hitsForKeyword = results.getResults(keywordInstance);
284 if (hitsForKeyword == null) {
285 hitsForKeyword =
new ArrayList<>();
286 results.addResult(keywordInstance, hitsForKeyword);
288 hitsForKeyword.add(hit);
290 }
catch (TskCoreException ex) {
291 LOGGER.log(Level.SEVERE,
"Error creating keyword hits", ex);
295 String nextCursorMark = response.getNextCursorMark();
296 if (cursorMark.equals(nextCursorMark)) {
297 allResultsProcessed =
true;
299 cursorMark = nextCursorMark;
300 }
catch (KeywordSearchModuleException ex) {
301 LOGGER.log(Level.SEVERE,
"Error executing Regex Solr Query: " + keywordString, ex);
302 MessageNotifyUtil.Notify.error(NbBundle.getMessage(Server.class,
"Server.query.exception.msg", keywordString), ex.getCause().getMessage());
309 private List<KeywordHit> createKeywordHits(SolrDocument solrDoc)
throws TskCoreException {
311 final HashMap<String, String> keywordsFoundInThisDocument =
new HashMap<>();
313 List<KeywordHit> hits =
new ArrayList<>();
314 final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
315 final Integer chunkSize = (Integer) solrDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
317 final Collection<Object> content_str = solrDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
319 String searchPattern;
320 if (originalKeyword.searchTermIsLiteral()) {
334 searchPattern =
"[\\w[\\.']]*" + Pattern.quote(keywordString.toLowerCase()) +
"[\\w[\\.']]*";
336 searchPattern = keywordString;
339 final Pattern pattern = Pattern.compile(searchPattern, Pattern.CASE_INSENSITIVE);
342 for (Object content_obj : content_str) {
343 String content = (String) content_obj;
344 Matcher hitMatcher = pattern.matcher(content);
347 while (hitMatcher.find(offset)) {
352 if (chunkSize != null && hitMatcher.start() >= chunkSize) {
356 String hit = hitMatcher.group();
361 if (
"".equals(hit)) {
365 offset = hitMatcher.end();
366 final ATTRIBUTE_TYPE artifactAttributeType = originalKeyword.getArtifactAttributeType();
374 if (!queryStringContainsWildcardSuffix
375 && (artifactAttributeType == ATTRIBUTE_TYPE.TSK_PHONE_NUMBER
376 || artifactAttributeType == ATTRIBUTE_TYPE.TSK_IP_ADDRESS)) {
377 if (artifactAttributeType == ATTRIBUTE_TYPE.TSK_PHONE_NUMBER) {
379 hit = hit.replaceAll(
"^[^0-9\\(]",
"");
382 hit = hit.replaceAll(
"^[^0-9]",
"");
385 hit = hit.replaceAll(
"[^0-9]$",
"");
410 if (originalKeyword.searchTermIsLiteral()) {
411 hit = hit.replaceAll(
"^" + KeywordSearchList.BOUNDARY_CHARACTERS +
"*",
"");
412 hit = hit.replaceAll(KeywordSearchList.BOUNDARY_CHARACTERS +
"*$",
"");
427 if (keywordsFoundInThisDocument.containsKey(hit)) {
430 keywordsFoundInThisDocument.put(hit, hit);
432 if (artifactAttributeType == null) {
433 hits.add(
new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit));
435 switch (artifactAttributeType) {
442 if (hit.length() >= MIN_EMAIL_ADDR_LENGTH
443 && DomainValidator.getInstance(
true).isValidTld(hit.substring(hit.lastIndexOf(
'.')))) {
444 hits.add(
new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit));
448 case TSK_CARD_NUMBER:
454 Matcher ccnMatcher = CREDIT_CARD_NUM_PATTERN.matcher(hit);
456 for (
int rLength = hit.length(); rLength >= 12; rLength--) {
457 ccnMatcher.region(0, rLength);
458 if (ccnMatcher.find()) {
459 final String group = ccnMatcher.group(
"ccn");
460 if (CreditCardValidator.isValidCCN(group)) {
461 hits.add(
new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit));
468 hits.add(
new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit));
474 }
catch (Throwable error) {
483 throw new TskCoreException(
"Failed to create keyword hits for Solr document id " + docId +
" due to " + error.getMessage());
489 public void addFilter(KeywordQueryFilter filter) {
490 this.filters.add(filter);
494 public void setField(String field) {
499 public void setSubstringQuery() {
503 synchronized public void escape() {
504 if (isEscaped() ==
false) {
505 escapedQuery = KeywordSearchUtil.escapeLuceneQuery(keywordString);
511 synchronized public boolean isEscaped() {
516 public boolean isLiteral() {
521 public String getQueryString() {
522 return originalKeyword.getSearchTerm();
526 synchronized public String getEscapedQueryString() {
527 if (
false == isEscaped()) {
550 public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
551 return createKeywordHitArtifact(content, originalKeyword, foundKeyword, hit, snippet, listName, ingestJobId);
555 public static BlackboardArtifact createKeywordHitArtifact(Content content, Keyword originalKW, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
556 final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
558 if (content == null) {
559 LOGGER.log(Level.WARNING,
"Error adding artifact for keyword hit to blackboard");
566 if (originalKW.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
567 createCCNAccount(content, originalKW, foundKeyword, hit, snippet, listName, ingestJobId);
575 Collection<BlackboardAttribute> attributes =
new ArrayList<>();
577 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm().toLowerCase()));
578 if(!originalKW.searchTermIsWholeWord() || !originalKW.searchTermIsLiteral()) {
579 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, originalKW.getSearchTerm()));
582 if (StringUtils.isNotBlank(listName)) {
583 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
585 if (snippet != null) {
586 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
589 hit.getArtifactID().ifPresent(artifactID
590 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
593 if (originalKW.searchTermIsLiteral()) {
594 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
596 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
600 return content.newAnalysisResult(
601 BlackboardArtifact.Type.TSK_KEYWORD_HIT, Score.SCORE_LIKELY_NOTABLE,
602 null, listName, null, attributes)
603 .getAnalysisResult();
604 }
catch (TskCoreException e) {
605 LOGGER.log(Level.SEVERE,
"Error adding bb attributes for terms search artifact", e);
610 private static void createCCNAccount(Content content, Keyword originalKW, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
612 final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
614 if (originalKW.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
615 LOGGER.log(Level.SEVERE,
"Keyword hit is not a credit card number");
623 List<BlackboardAttribute> attributes =
new ArrayList<>();
625 Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap =
new HashMap<>();
626 Matcher matcher = CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
627 if (matcher.find()) {
628 parseTrack1Data(parsedTrackAttributeMap, matcher);
630 matcher = CREDIT_CARD_TRACK2_PATTERN.matcher(hit.getSnippet());
631 if (matcher.find()) {
632 parseTrack2Data(parsedTrackAttributeMap, matcher);
634 final BlackboardAttribute ccnAttribute = parsedTrackAttributeMap.get(
new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_CARD_NUMBER));
635 if (ccnAttribute == null || StringUtils.isBlank(ccnAttribute.getValueString())) {
637 if (hit.isArtifactHit()) {
638 LOGGER.log(Level.SEVERE, String.format(
"Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getArtifactID().get()));
641 LOGGER.log(Level.SEVERE, String.format(
"Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getContentID()));
642 }
catch (TskCoreException ex) {
643 LOGGER.log(Level.SEVERE, String.format(
"Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s' ", foundKeyword.getSearchTerm(), hit.getSnippet()));
644 LOGGER.log(Level.SEVERE,
"There was a error getting contentID for keyword hit.", ex);
649 attributes.addAll(parsedTrackAttributeMap.values());
655 final int bin = Integer.parseInt(ccnAttribute.getValueString().substring(0, 8));
656 CreditCards.BankIdentificationNumber binInfo = CreditCards.getBINInfo(bin);
657 if (binInfo != null) {
658 binInfo.getScheme().ifPresent(scheme
659 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_SCHEME, MODULE_NAME, scheme)));
660 binInfo.getCardType().ifPresent(cardType
661 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_TYPE, MODULE_NAME, cardType)));
662 binInfo.getBrand().ifPresent(brand
663 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BRAND_NAME, MODULE_NAME, brand)));
664 binInfo.getBankName().ifPresent(bankName
665 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BANK_NAME, MODULE_NAME, bankName)));
666 binInfo.getBankPhoneNumber().ifPresent(phoneNumber
667 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PHONE_NUMBER, MODULE_NAME, phoneNumber)));
668 binInfo.getBankURL().ifPresent(url
669 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_URL, MODULE_NAME, url)));
670 binInfo.getCountry().ifPresent(country
671 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_COUNTRY, MODULE_NAME, country)));
672 binInfo.getBankCity().ifPresent(city
673 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CITY, MODULE_NAME, city)));
680 if (content instanceof AbstractFile) {
681 AbstractFile file = (AbstractFile) content;
682 if (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS
683 || file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) {
684 attributes.add(
new BlackboardAttribute(KEYWORD_SEARCH_DOCUMENT_ID, MODULE_NAME, hit.getSolrDocumentId()));
688 if (StringUtils.isNotBlank(listName)) {
689 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
691 if (snippet != null) {
692 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
695 hit.getArtifactID().ifPresent(artifactID
696 -> attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
699 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
705 Case.getCurrentCaseThrows().getSleuthkitCase().getCommunicationsManager().createAccountFileInstance(Account.Type.CREDIT_CARD,
706 ccnAttribute.getValueString(), MODULE_NAME, content, attributes, ingestJobId);
707 }
catch (TskCoreException | NoCurrentCaseException ex) {
708 LOGGER.log(Level.SEVERE,
"Error creating CCN account instance", ex);
721 static private void parseTrack2Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributesMap, Matcher matcher) {
722 addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_NUMBER,
"accountNumber", matcher);
723 addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_EXPIRATION,
"expiration", matcher);
724 addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_SERVICE_CODE,
"serviceCode", matcher);
725 addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_DISCRETIONARY,
"discretionary", matcher);
726 addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_LRC,
"LRC", matcher);
738 static private void parseTrack1Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, Matcher matcher) {
739 parseTrack2Data(attributeMap, matcher);
740 addAttributeIfNotAlreadyCaptured(attributeMap, ATTRIBUTE_TYPE.TSK_NAME_PERSON,
"name", matcher);
755 static private void addAttributeIfNotAlreadyCaptured(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) {
756 BlackboardAttribute.Type type =
new BlackboardAttribute.Type(attrType);
758 if (!attributeMap.containsKey(type)) {
759 String value = matcher.group(groupName);
760 if (attrType.equals(ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) {
761 attributeMap.put(
new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD),
762 new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, value));
763 value = CharMatcher.anyOf(
" -").removeFrom(value);
766 if (StringUtils.isNotBlank(value)) {
767 attributeMap.put(type,
new BlackboardAttribute(attrType, MODULE_NAME, value));