19 package org.sleuthkit.autopsy.keywordsearch;
21 import com.google.common.base.CharMatcher;
22 import com.google.common.collect.ArrayListMultimap;
23 import com.google.common.collect.ListMultimap;
24 import java.util.ArrayList;
25 import java.util.Collection;
26 import java.util.HashMap;
27 import java.util.HashSet;
28 import java.util.List;
31 import java.util.logging.Level;
32 import java.util.regex.Matcher;
33 import java.util.regex.Pattern;
34 import org.apache.commons.lang.StringUtils;
35 import org.apache.commons.validator.routines.DomainValidator;
36 import org.apache.solr.client.solrj.SolrQuery;
37 import org.apache.solr.client.solrj.SolrQuery.SortClause;
38 import org.apache.solr.client.solrj.SolrRequest;
39 import org.apache.solr.client.solrj.response.QueryResponse;
40 import org.apache.solr.common.SolrDocument;
41 import org.apache.solr.common.SolrDocumentList;
42 import org.apache.solr.common.params.CursorMarkParams;
43 import org.openide.util.NbBundle;
75 final class RegexQuery
implements KeywordSearchQuery {
77 public static final Logger LOGGER = Logger.getLogger(RegexQuery.class.getName());
78 private final List<KeywordQueryFilter> filters =
new ArrayList<>();
80 private final KeywordList keywordList;
81 private final Keyword originalKeyword;
82 private String field = Server.Schema.CONTENT_STR.toString();
83 private final String keywordString;
84 static final private int MAX_RESULTS_PER_CURSOR_MARK = 512;
85 private boolean escaped;
86 private String escapedQuery;
88 private final int MIN_EMAIL_ADDR_LENGTH = 8;
90 private final ListMultimap<Keyword, KeywordHit> hitsMultiMap = ArrayListMultimap.create();
100 private static final CharSequence[] UNSUPPORTED_CHARS = {
"\\d",
"\\D",
"\\w",
"\\W",
"\\s",
"\\S",
"\\n",
101 "\\t",
"\\r",
"\\f",
"\\a",
"\\e",
"\\v",
"\\V",
"\\h",
"\\H",
"\\p"};
103 private boolean queryStringContainsWildcardPrefix =
false;
104 private boolean queryStringContainsWildcardSuffix =
false;
112 RegexQuery(KeywordList keywordList, Keyword keyword) {
113 this.keywordList = keywordList;
114 this.originalKeyword = keyword;
115 this.keywordString = keyword.getSearchTerm();
117 if (this.keywordString.startsWith(
".*")) {
118 this.queryStringContainsWildcardPrefix =
true;
121 if (this.keywordString.endsWith(
".*")) {
122 this.queryStringContainsWildcardSuffix =
true;
127 public KeywordList getKeywordList() {
132 public boolean validate() {
133 if (keywordString.isEmpty()) {
138 Pattern.compile(keywordString, Pattern.UNICODE_CHARACTER_CLASS);
144 for (CharSequence c : UNSUPPORTED_CHARS) {
145 if (keywordString.contains(c)) {
150 }
catch (IllegalArgumentException ex) {
156 public QueryResults performQuery() throws NoOpenCoreException {
158 final Server solrServer = KeywordSearch.getServer();
159 SolrQuery solrQuery =
new SolrQuery();
178 solrQuery.setQuery((field == null ? Server.Schema.CONTENT_STR.toString() : field) +
":/"
179 + (queryStringContainsWildcardPrefix ?
"" :
".*") + getQueryString()
180 + (queryStringContainsWildcardSuffix ?
"" :
".*") +
"/");
183 solrQuery.setFields(Server.Schema.CONTENT_STR.toString(), Server.Schema.ID.toString(), Server.Schema.CHUNK_SIZE.toString());
186 .map(KeywordQueryFilter::toString)
187 .forEach(solrQuery::addFilterQuery);
189 solrQuery.setRows(MAX_RESULTS_PER_CURSOR_MARK);
191 solrQuery.setSort(SortClause.asc(Server.Schema.ID.toString()));
193 String cursorMark = CursorMarkParams.CURSOR_MARK_START;
194 SolrDocumentList resultList ;
195 boolean allResultsProcessed =
false;
197 while (!allResultsProcessed) {
199 solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
200 QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
201 resultList = response.getResults();
203 for (SolrDocument resultDoc : resultList) {
205 List<KeywordHit> keywordHits = createKeywordHits(resultDoc);
206 for (KeywordHit hit : keywordHits) {
207 hitsMultiMap.put(
new Keyword(hit.getHit(),
true,
true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), hit);
209 }
catch (TskException ex) {
214 String nextCursorMark = response.getNextCursorMark();
215 if (cursorMark.equals(nextCursorMark)) {
216 allResultsProcessed =
true;
218 cursorMark = nextCursorMark;
219 }
catch (KeywordSearchModuleException ex) {
220 LOGGER.log(Level.SEVERE,
"Error executing Regex Solr Query: " + keywordString, ex);
221 MessageNotifyUtil.Notify.error(NbBundle.getMessage(Server.class,
"Server.query.exception.msg", keywordString), ex.getCause().getMessage());
224 QueryResults results =
new QueryResults(
this);
225 for (Keyword k : hitsMultiMap.keySet()) {
226 results.addResult(k, hitsMultiMap.get(k));
231 private List<KeywordHit> createKeywordHits(SolrDocument solrDoc)
throws TskException {
233 List<KeywordHit> hits =
new ArrayList<>();
234 final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
235 final Integer chunkSize = (Integer) solrDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
237 final Collection<Object> content_str = solrDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
239 final Pattern pattern = Pattern.compile(keywordString);
240 for (Object content_obj : content_str) {
241 String content = (String) content_obj;
242 Matcher hitMatcher = pattern.matcher(content);
245 while (hitMatcher.find(offset)) {
246 StringBuilder snippet =
new StringBuilder();
251 if (chunkSize != null && hitMatcher.start() >= chunkSize) {
255 String hit = hitMatcher.group();
257 offset = hitMatcher.end();
265 if (!queryStringContainsWildcardSuffix
266 && (originalKeyword.getArtifactAttributeType() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PHONE_NUMBER
267 || originalKeyword.getArtifactAttributeType() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_IP_ADDRESS)) {
268 if (originalKeyword.getArtifactAttributeType() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PHONE_NUMBER) {
270 hit = hit.replaceAll(
"^[^0-9\\(]",
"");
273 hit = hit.replaceAll(
"^[^0-9]",
"");
276 hit = hit.replaceAll(
"[^0-9]$",
"");
279 if (originalKeyword.getArtifactAttributeType() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_EMAIL) {
282 if (hit.length() < MIN_EMAIL_ADDR_LENGTH
283 || !DomainValidator.getInstance(
true).isValidTld(hit.substring(hit.lastIndexOf(
'.')))) {
292 if (originalKeyword.getArtifactAttributeType() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
293 Matcher ccnMatcher = CREDIT_CARD_NUM_PATTERN.matcher(hit);
294 if (ccnMatcher.find()) {
295 final String ccn = CharMatcher.anyOf(
" -").removeFrom(ccnMatcher.group(
"ccn"));
296 if (
false == TermsComponentQuery.CREDIT_CARD_NUM_LUHN_CHECK.isValid(ccn)) {
308 int maxIndex = content.length() - 1;
309 snippet.append(content.substring(Integer.max(0, hitMatcher.start() - 20), Integer.max(0, hitMatcher.start())));
310 snippet.appendCodePoint(171);
312 snippet.appendCodePoint(171);
313 snippet.append(content.substring(Integer.min(maxIndex, hitMatcher.end()), Integer.min(maxIndex, hitMatcher.end() + 20)));
315 hits.add(
new KeywordHit(docId, snippet.toString(), hit));
322 public void addFilter(KeywordQueryFilter filter) {
323 this.filters.add(filter);
327 public void setField(String field) {
332 public void setSubstringQuery() {
336 synchronized public void escape() {
337 if (isEscaped() ==
false) {
338 escapedQuery = KeywordSearchUtil.escapeLuceneQuery(keywordString);
344 synchronized public boolean isEscaped() {
349 public boolean isLiteral() {
354 public String getQueryString() {
355 return originalKeyword.getSearchTerm();
359 synchronized public String getEscapedQueryString() {
360 if (
false == isEscaped()) {
376 private String getDocumentIds(Keyword keyword, KeywordHit hit) {
377 Set<String> documentIds =
new HashSet<>();
379 for (KeywordHit h : hitsMultiMap.get(keyword)) {
382 if (h.getSolrObjectId() == hit.getSolrObjectId() && !documentIds.contains(h.getSolrDocumentId())) {
383 documentIds.add(h.getSolrDocumentId());
387 return StringUtils.join(documentIds,
",");
407 public KeywordCachedArtifact writeSingleFileHitsToBlackBoard(Keyword foundKeyword, KeywordHit hit, String snippet, String listName) {
408 final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
416 BlackboardArtifact newArtifact;
417 Collection<BlackboardAttribute> attributes =
new ArrayList<>();
418 if (originalKeyword.getArtifactAttributeType() != BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
419 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm()));
420 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, getQueryString()));
422 newArtifact = hit.getContent().newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT);
424 }
catch (TskCoreException ex) {
425 LOGGER.log(Level.SEVERE,
"Error adding artifact for keyword hit to blackboard", ex);
433 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ACCOUNT_TYPE, MODULE_NAME, Account.Type.CREDIT_CARD.name()));
434 Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap =
new HashMap<>();
435 Matcher matcher = TermsComponentQuery.CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
436 if (matcher.find()) {
437 parseTrack1Data(parsedTrackAttributeMap, matcher);
439 matcher = CREDIT_CARD_TRACK2_PATTERN.matcher(hit.getSnippet());
440 if (matcher.find()) {
441 parseTrack2Data(parsedTrackAttributeMap, matcher);
443 final BlackboardAttribute ccnAttribute = parsedTrackAttributeMap.get(
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER));
444 if (ccnAttribute == null || StringUtils.isBlank(ccnAttribute.getValueString())) {
445 if (hit.isArtifactHit()) {
446 LOGGER.log(Level.SEVERE, String.format(
"Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getArtifact().getArtifactID()));
448 LOGGER.log(Level.SEVERE, String.format(
"Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getContent().getId()));
452 attributes.addAll(parsedTrackAttributeMap.values());
458 final int bin = Integer.parseInt(ccnAttribute.getValueString().substring(0, 8));
459 CreditCards.BankIdentificationNumber binInfo = CreditCards.getBINInfo(bin);
460 if (binInfo != null) {
461 binInfo.getScheme().ifPresent(scheme
462 -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_SCHEME, MODULE_NAME, scheme)));
463 binInfo.getCardType().ifPresent(cardType
464 -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_TYPE, MODULE_NAME, cardType)));
465 binInfo.getBrand().ifPresent(brand
466 -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_BRAND_NAME, MODULE_NAME, brand)));
467 binInfo.getBankName().ifPresent(bankName
468 -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_BANK_NAME, MODULE_NAME, bankName)));
469 binInfo.getBankPhoneNumber().ifPresent(phoneNumber
470 -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PHONE_NUMBER, MODULE_NAME, phoneNumber)));
471 binInfo.getBankURL().ifPresent(url
472 -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL, MODULE_NAME, url)));
473 binInfo.getCountry().ifPresent(country
474 -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_COUNTRY, MODULE_NAME, country)));
475 binInfo.getBankCity().ifPresent(city
476 -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CITY, MODULE_NAME, city)));
484 if (hit.getContent() instanceof AbstractFile) {
485 AbstractFile file = (AbstractFile) hit.getContent();
486 if (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS
487 || file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) {
488 attributes.add(
new BlackboardAttribute(KEYWORD_SEARCH_DOCUMENT_ID, MODULE_NAME, hit.getSolrDocumentId()));
496 newArtifact = hit.getContent().newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_ACCOUNT);
497 }
catch (TskCoreException ex) {
498 LOGGER.log(Level.SEVERE,
"Error adding artifact for account to blackboard", ex);
503 if (StringUtils.isNotBlank(listName)) {
504 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
506 if (snippet != null) {
507 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
509 if (hit.isArtifactHit()) {
510 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, hit.getArtifact().getArtifactID()));
513 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
516 newArtifact.addAttributes(attributes);
517 KeywordCachedArtifact writeResult =
new KeywordCachedArtifact(newArtifact);
518 writeResult.add(attributes);
520 }
catch (TskCoreException e) {
521 LOGGER.log(Level.SEVERE,
"Error adding bb attributes for terms search artifact", e);
534 static private void parseTrack2Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributesMap, Matcher matcher) {
535 addAttributeIfNotAlreadyCaptured(attributesMap, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER,
"accountNumber", matcher);
536 addAttributeIfNotAlreadyCaptured(attributesMap, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_EXPIRATION,
"expiration", matcher);
537 addAttributeIfNotAlreadyCaptured(attributesMap, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_SERVICE_CODE,
"serviceCode", matcher);
538 addAttributeIfNotAlreadyCaptured(attributesMap, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_DISCRETIONARY,
"discretionary", matcher);
539 addAttributeIfNotAlreadyCaptured(attributesMap, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_LRC,
"LRC", matcher);
551 static private void parseTrack1Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, Matcher matcher) {
552 parseTrack2Data(attributeMap, matcher);
553 addAttributeIfNotAlreadyCaptured(attributeMap, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_NAME_PERSON,
"name", matcher);
567 static private void addAttributeIfNotAlreadyCaptured(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, BlackboardAttribute.ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) {
568 BlackboardAttribute.Type type =
new BlackboardAttribute.Type(attrType);
569 attributeMap.computeIfAbsent(type, (BlackboardAttribute.Type t) -> {
570 String value = matcher.group(groupName);
571 if (attrType.equals(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) {
572 attributeMap.put(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD),
573 new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, value));
574 value = CharMatcher.anyOf(
" -").removeFrom(value);
576 if (StringUtils.isNotBlank(value)) {
577 return new BlackboardAttribute(attrType, MODULE_NAME, value);