Autopsy  4.20.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
RegexQuery.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2018 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import com.google.common.base.CharMatcher;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.List;
26 import java.util.Map;
27 import java.util.logging.Level;
28 import java.util.regex.Matcher;
29 import java.util.regex.Pattern;
30 import org.apache.commons.lang3.StringUtils;
31 import org.apache.commons.lang3.math.NumberUtils;
32 import org.apache.commons.validator.routines.DomainValidator;
33 import org.apache.solr.client.solrj.SolrQuery;
34 import org.apache.solr.client.solrj.SolrQuery.SortClause;
35 import org.apache.solr.client.solrj.SolrRequest;
36 import org.apache.solr.client.solrj.response.QueryResponse;
37 import org.apache.solr.common.SolrDocument;
38 import org.apache.solr.common.SolrDocumentList;
39 import org.apache.solr.common.params.CursorMarkParams;
40 import org.openide.util.NbBundle;
46 import static org.sleuthkit.autopsy.keywordsearch.KeywordSearchSettings.MODULE_NAME;
47 import org.sleuthkit.datamodel.AbstractFile;
48 import org.sleuthkit.datamodel.Account;
49 import org.sleuthkit.datamodel.BlackboardArtifact;
50 import org.sleuthkit.datamodel.BlackboardAttribute;
51 import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
52 import org.sleuthkit.datamodel.Content;
53 import org.sleuthkit.datamodel.Score;
54 import org.sleuthkit.datamodel.TskCoreException;
55 import org.sleuthkit.datamodel.TskData;
56 
71 final class RegexQuery implements KeywordSearchQuery {
72 
73  public static final Logger LOGGER = Logger.getLogger(RegexQuery.class.getName());
74 
85  private static final CharSequence[] UNSUPPORTED_CHARS = {"\\d", "\\D", "\\w", "\\W", "\\s", "\\S", "\\n",
86  "\\t", "\\r", "\\f", "\\a", "\\e", "\\v", "\\V", "\\h", "\\H", "\\p"}; //NON-NLS
87 
88  private static final int MAX_RESULTS_PER_CURSOR_MARK = 512;
89  private static final int MIN_EMAIL_ADDR_LENGTH = 8;
90  private static final String SNIPPET_DELIMITER = String.valueOf(Character.toChars(171));
91 
92  /*
93  * The following fields are part of the initial implementation of credit
94  * card account search and should be factored into another class when time
95  * permits.
96  */
102  static final Pattern CREDIT_CARD_NUM_PATTERN
103  = Pattern.compile("(?<ccn>[2-6]([ -]?[0-9]){11,18})");
104  static final Pattern CREDIT_CARD_TRACK1_PATTERN = Pattern.compile(
105  /*
106  * Track 1 is alphanumeric.
107  *
108  * This regex matches 12-19 digit ccns embeded in a track 1 formated
109  * string. This regex matches (and extracts groups) even if the
110  * entire track is not present as long as the part that is conforms
111  * to the track format.
112  */
113  "(?:" //begin nested optinal group //NON-NLS
114  + "%?" //optional start sentinal: % //NON-NLS
115  + "B)?" //format code //NON-NLS
116  + "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
117  + "\\^" //separator //NON-NLS
118  + "(?<name>[^^]{2,26})" //2-26 charachter name, not containing ^ //NON-NLS
119  + "(?:\\^" //separator //NON-NLS
120  + "(?:(?:\\^|(?<expiration>\\d{4}))" //separator or 4 digit expiration YYMM //NON-NLS
121  + "(?:(?:\\^|(?<serviceCode>\\d{3}))"//separator or 3 digit service code //NON-NLS
122  + "(?:(?<discretionary>[^?]*)" // discretionary data not containing separator //NON-NLS
123  + "(?:\\?" // end sentinal: ? //NON-NLS
124  + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
125  + "?)?)?)?)?)?");//close nested optional groups //NON-NLS
126  static final Pattern CREDIT_CARD_TRACK2_PATTERN = Pattern.compile(
127  /*
128  * Track 2 is numeric plus six punctuation symbolls :;<=>?
129  *
130  * This regex matches 12-19 digit ccns embeded in a track 2 formated
131  * string. This regex matches (and extracts groups) even if the
132  * entire track is not present as long as the part that is conforms
133  * to the track format.
134  *
135  */
136  "[:;<=>?]?" //(optional)start sentinel //NON-NLS
137  + "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
138  + "(?:[:;<=>?]" //separator //NON-NLS
139  + "(?:(?<expiration>\\d{4})" //4 digit expiration date YYMM //NON-NLS
140  + "(?:(?<serviceCode>\\d{3})" //3 digit service code //NON-NLS
141  + "(?:(?<discretionary>[^:;<=>?]*)" //discretionary data, not containing punctuation marks //NON-NLS
142  + "(?:[:;<=>?]" //end sentinel //NON-NLS
143  + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
144  + "?)?)?)?)?)?"); //close nested optional groups //NON-NLS
145  static final BlackboardAttribute.Type KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
146 
147  private final List<KeywordQueryFilter> filters = new ArrayList<>();
148  private final KeywordList keywordList;
149  private final Keyword originalKeyword; // The regular expression originalKeyword used to perform the search.
150  private final String keywordString;
151  private final boolean queryStringContainsWildcardPrefix;
152  private final boolean queryStringContainsWildcardSuffix;
153 
154  private boolean escaped;
155  private String escapedQuery;
156  private String field = Server.Schema.CONTENT_STR.toString();
157 
164  RegexQuery(KeywordList keywordList, Keyword keyword) {
165  this.keywordList = keywordList;
166  this.originalKeyword = keyword;
167  this.keywordString = keyword.getSearchTerm();
168 
169  this.queryStringContainsWildcardPrefix = this.keywordString.startsWith(".*");
170  this.queryStringContainsWildcardSuffix = this.keywordString.endsWith(".*");
171  }
172 
173  @Override
174  public KeywordList getKeywordList() {
175  return keywordList;
176  }
177 
178  @Override
179  public boolean validate() {
180  if (keywordString.isEmpty()) {
181  return false;
182  }
183  try {
184  // First we perform regular Java regex validation to catch errors.
185  Pattern.compile(keywordString, Pattern.UNICODE_CHARACTER_CLASS);
186 
187  // Then we check for the set of Java predefined and POSIX character
188  // classes. While they are valid Lucene regex characters, they will
189  // behave differently than users may expect. E.g. the regex \d\d\d
190  // will not find 3 digits but will instead find a sequence of 3 'd's.
191  for (CharSequence c : UNSUPPORTED_CHARS) {
192  if (keywordString.contains(c)) {
193  return false;
194  }
195  }
196  return true;
197  } catch (IllegalArgumentException ex) {
198  return false;
199  }
200  }
201 
202  @Override
203  public QueryResults performQuery() throws NoOpenCoreException {
204 
205  final Server solrServer = KeywordSearch.getServer();
206  SolrQuery solrQuery = new SolrQuery();
207 
208  /*
209  * The provided regular expression may include wildcards at the
210  * beginning and/or end. These wildcards are used to indicate that the
211  * user wants to find hits for the regex that are embedded within other
212  * characters. For example, if we are given .*127.0.0.1.* as a regular
213  * expression, this will produce hits for: (a) " 127.0.0.1 " as a
214  * standalone token (surrounded by whitespace). (b) "abc127.0.0.1def"
215  * where the IP address is surrounded by other characters.
216  *
217  * If we are given this type of regex, we do not need to add our own
218  * wildcards to anchor the query. Otherwise, we need to add wildcard
219  * anchors because Lucene string regex searches default to using ^ and $
220  * to match the entire string.
221  */
222  // We construct the query by surrounding it with slashes (to indicate it is
223  // a regular expression search) and .* as anchors (if the query doesn't
224  // already have them). We do not add .* if there is a boundary character.
225  boolean skipWildcardPrefix = queryStringContainsWildcardPrefix || getQueryString().startsWith("^");
226  boolean skipWildcardSuffix = queryStringContainsWildcardSuffix
227  || (getQueryString().endsWith("$") && (!getQueryString().endsWith("\\$")));
228 
243  String queryString = (originalKeyword.searchTermIsLiteral() ? getEscapedQueryString() : getQueryString());
244  double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
245  if (indexSchemaVersion >= 2.1) {
246  queryString = queryString.toLowerCase();
247  }
248 
249  solrQuery.setQuery((field == null ? Server.Schema.CONTENT_STR.toString() : field) + ":/"
250  + (skipWildcardPrefix ? "" : ".*")
251  // if the query is for a substring (i.e. literal search term) we want
252  // to escape characters such as ()[]-.
253  + queryString
254  + (skipWildcardSuffix ? "" : ".*") + "/");
255 
256  // Set the fields we want to have returned by the query.
257  solrQuery.setFields(Server.Schema.CONTENT_STR.toString(), Server.Schema.ID.toString(), Server.Schema.CHUNK_SIZE.toString());
258 
259  filters.stream()
260  .map(KeywordQueryFilter::toString)
261  .forEach(solrQuery::addFilterQuery);
262 
263  solrQuery.setRows(MAX_RESULTS_PER_CURSOR_MARK);
264  // Setting the sort order is necessary for cursor based paging to work.
265  solrQuery.setSort(SortClause.asc(Server.Schema.ID.toString()));
266 
267  String cursorMark = CursorMarkParams.CURSOR_MARK_START;
268  SolrDocumentList resultList;
269  boolean allResultsProcessed = false;
270  QueryResults results = new QueryResults(this);
271 
272  while (!allResultsProcessed) {
273  try {
274  solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
275  QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
276  resultList = response.getResults();
277 
278  for (SolrDocument resultDoc : resultList) {
279  try {
280  List<KeywordHit> keywordHits = createKeywordHits(resultDoc);
281  for (KeywordHit hit : keywordHits) {
282  Keyword keywordInstance = new Keyword(hit.getHit(), true, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm());
283  List<KeywordHit> hitsForKeyword = results.getResults(keywordInstance);
284  if (hitsForKeyword == null) {
285  hitsForKeyword = new ArrayList<>();
286  results.addResult(keywordInstance, hitsForKeyword);
287  }
288  hitsForKeyword.add(hit);
289  }
290  } catch (TskCoreException ex) {
291  LOGGER.log(Level.SEVERE, "Error creating keyword hits", ex); //NON-NLS
292  }
293  }
294 
295  String nextCursorMark = response.getNextCursorMark();
296  if (cursorMark.equals(nextCursorMark)) {
297  allResultsProcessed = true;
298  }
299  cursorMark = nextCursorMark;
300  } catch (KeywordSearchModuleException ex) {
301  LOGGER.log(Level.SEVERE, "Error executing Regex Solr Query: " + keywordString, ex); //NON-NLS
302  MessageNotifyUtil.Notify.error(NbBundle.getMessage(Server.class, "Server.query.exception.msg", keywordString), ex.getCause().getMessage());
303  }
304  }
305 
306  return results;
307  }
308 
309  private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) throws TskCoreException {
310 
311  final HashMap<String, String> keywordsFoundInThisDocument = new HashMap<>();
312 
313  List<KeywordHit> hits = new ArrayList<>();
314  final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
315  final Integer chunkSize = (Integer) solrDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
316 
317  final Collection<Object> content_str = solrDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
318 
319  String searchPattern;
320  if (originalKeyword.searchTermIsLiteral()) {
334  searchPattern = "[\\w[\\.']]*" + Pattern.quote(keywordString.toLowerCase()) + "[\\w[\\.']]*";
335  } else {
336  searchPattern = keywordString;
337  }
338 
339  final Pattern pattern = Pattern.compile(searchPattern, Pattern.CASE_INSENSITIVE);
340 
341  try {
342  for (Object content_obj : content_str) {
343  String content = (String) content_obj;
344  Matcher hitMatcher = pattern.matcher(content);
345  int offset = 0;
346 
347  while (hitMatcher.find(offset)) {
348 
349  // If the location of the hit is beyond this chunk (i.e. it
350  // exists in the overlap region), we skip the hit. It will
351  // show up again as a hit in the chunk following this one.
352  if (chunkSize != null && hitMatcher.start() >= chunkSize) {
353  break;
354  }
355 
356  String hit = hitMatcher.group();
357 
361  if ("".equals(hit)) {
362  break;
363  }
364 
365  offset = hitMatcher.end();
366  final ATTRIBUTE_TYPE artifactAttributeType = originalKeyword.getArtifactAttributeType();
367 
368  // We attempt to reduce false positives for phone numbers and IP address hits
369  // by querying Solr for hits delimited by a set of known boundary characters.
370  // See KeywordSearchList.PHONE_NUMBER_REGEX for an example.
371  // Because of this the hits may contain an extra character at the beginning or end that
372  // needs to be chopped off, unless the user has supplied their own wildcard suffix
373  // as part of the regex.
374  if (!queryStringContainsWildcardSuffix
375  && (artifactAttributeType == ATTRIBUTE_TYPE.TSK_PHONE_NUMBER
376  || artifactAttributeType == ATTRIBUTE_TYPE.TSK_IP_ADDRESS)) {
377  if (artifactAttributeType == ATTRIBUTE_TYPE.TSK_PHONE_NUMBER) {
378  // For phone numbers replace all non numeric characters (except "(") at the start of the hit.
379  hit = hit.replaceAll("^[^0-9\\(]", "");
380  } else {
381  // Replace all non numeric characters at the start of the hit.
382  hit = hit.replaceAll("^[^0-9]", "");
383  }
384  // Replace all non numeric at the end of the hit.
385  hit = hit.replaceAll("[^0-9]$", "");
386 
387  if (offset > 1) {
388  /*
389  * NOTE: our IP and phone number regex patterns look for
390  * boundary characters immediately before and after
391  * the keyword hit. After a match, Java pattern
392  * mather re-starts at the first character not
393  * matched by the previous match. This basically
394  * requires two boundary characters to be present
395  * between each pattern match. To mitigate this we
396  * are resetting the offest one character back.
397  */
398  offset--;
399  }
400  }
401 
410  if (originalKeyword.searchTermIsLiteral()) {
411  hit = hit.replaceAll("^" + KeywordSearchList.BOUNDARY_CHARACTERS + "*", "");
412  hit = hit.replaceAll(KeywordSearchList.BOUNDARY_CHARACTERS + "*$", "");
413  }
414 
423  hit = hit.intern();
424 
425  // We will only create one KeywordHit instance per document for
426  // a given hit.
427  if (keywordsFoundInThisDocument.containsKey(hit)) {
428  continue;
429  }
430  keywordsFoundInThisDocument.put(hit, hit);
431 
432  if (artifactAttributeType == null) {
433  hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit));
434  } else {
435  switch (artifactAttributeType) {
436  case TSK_EMAIL:
437  /*
438  * Reduce false positives by eliminating email
439  * address hits that are either too short or are
440  * not for valid top level domains.
441  */
442  if (hit.length() >= MIN_EMAIL_ADDR_LENGTH
443  && DomainValidator.getInstance(true).isValidTld(hit.substring(hit.lastIndexOf('.')))) {
444  hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit));
445  }
446 
447  break;
448  case TSK_CARD_NUMBER:
449  /*
450  * If searching for credit card account numbers,
451  * do extra validation on the term and discard
452  * it if it does not pass.
453  */
454  Matcher ccnMatcher = CREDIT_CARD_NUM_PATTERN.matcher(hit);
455 
456  for (int rLength = hit.length(); rLength >= 12; rLength--) {
457  ccnMatcher.region(0, rLength);
458  if (ccnMatcher.find()) {
459  final String group = ccnMatcher.group("ccn");
460  if (CreditCardValidator.isValidCCN(group)) {
461  hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit));
462  }
463  }
464  }
465 
466  break;
467  default:
468  hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit));
469  break;
470  }
471  }
472  }
473  }
474  } catch (Throwable error) {
475  /*
476  * NOTE: Matcher.find() is known to throw StackOverflowError in rare
477  * cases (see JIRA-2700). StackOverflowError is an error, not an
478  * exception, and therefore needs to be caught as a Throwable. When
479  * this occurs we should re-throw the error as TskCoreException so
480  * that it is logged by the calling method and move on to the next
481  * Solr document.
482  */
483  throw new TskCoreException("Failed to create keyword hits for Solr document id " + docId + " due to " + error.getMessage());
484  }
485  return hits;
486  }
487 
488  @Override
489  public void addFilter(KeywordQueryFilter filter) {
490  this.filters.add(filter);
491  }
492 
493  @Override
494  public void setField(String field) {
495  this.field = field;
496  }
497 
498  @Override
499  public void setSubstringQuery() {
500  }
501 
502  @Override
503  synchronized public void escape() {
504  if (isEscaped() == false) {
505  escapedQuery = KeywordSearchUtil.escapeLuceneQuery(keywordString);
506  escaped = true;
507  }
508  }
509 
510  @Override
511  synchronized public boolean isEscaped() {
512  return escaped;
513  }
514 
515  @Override
516  public boolean isLiteral() {
517  return false;
518  }
519 
520  @Override
521  public String getQueryString() {
522  return originalKeyword.getSearchTerm();
523  }
524 
525  @Override
526  synchronized public String getEscapedQueryString() {
527  if (false == isEscaped()) {
528  escape();
529  }
530  return escapedQuery;
531  }
532 
549  @Override
550  public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
551  return createKeywordHitArtifact(content, originalKeyword, foundKeyword, hit, snippet, listName, ingestJobId);
552  }
553 
554 
555  public static BlackboardArtifact createKeywordHitArtifact(Content content, Keyword originalKW, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
556  final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
557 
558  if (content == null) {
559  LOGGER.log(Level.WARNING, "Error adding artifact for keyword hit to blackboard"); //NON-NLS
560  return null;
561  }
562 
563  /*
564  * Credit Card number hits are handled differently
565  */
566  if (originalKW.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
567  createCCNAccount(content, originalKW, foundKeyword, hit, snippet, listName, ingestJobId);
568  return null;
569  }
570 
571  /*
572  * Create a "plain vanilla" keyword hit artifact with keyword and regex
573  * attributes
574  */
575  Collection<BlackboardAttribute> attributes = new ArrayList<>();
576 
577  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm().toLowerCase()));
578  if(!originalKW.searchTermIsWholeWord() || !originalKW.searchTermIsLiteral()) {
579  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, originalKW.getSearchTerm()));
580  }
581 
582  if (StringUtils.isNotBlank(listName)) {
583  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
584  }
585  if (snippet != null) {
586  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
587  }
588 
589  hit.getArtifactID().ifPresent(artifactID
590  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
591  );
592 
593  if (originalKW.searchTermIsLiteral()) {
594  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
595  } else {
596  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
597  }
598 
599  try {
600  return content.newAnalysisResult(
601  BlackboardArtifact.Type.TSK_KEYWORD_HIT, Score.SCORE_LIKELY_NOTABLE,
602  null, listName, null, attributes)
603  .getAnalysisResult();
604  } catch (TskCoreException e) {
605  LOGGER.log(Level.SEVERE, "Error adding bb attributes for terms search artifact", e); //NON-NLS
606  return null;
607  }
608  }
609 
610  private static void createCCNAccount(Content content, Keyword originalKW, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
611 
612  final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
613 
614  if (originalKW.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
615  LOGGER.log(Level.SEVERE, "Keyword hit is not a credit card number"); //NON-NLS
616  return;
617  }
618  /*
619  * Create a credit card account with attributes parsed from the snippet
620  * for the hit and looked up based on the parsed bank identifcation
621  * number.
622  */
623  List<BlackboardAttribute> attributes = new ArrayList<>();
624 
625  Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap = new HashMap<>();
626  Matcher matcher = CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
627  if (matcher.find()) {
628  parseTrack1Data(parsedTrackAttributeMap, matcher);
629  }
630  matcher = CREDIT_CARD_TRACK2_PATTERN.matcher(hit.getSnippet());
631  if (matcher.find()) {
632  parseTrack2Data(parsedTrackAttributeMap, matcher);
633  }
634  final BlackboardAttribute ccnAttribute = parsedTrackAttributeMap.get(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_CARD_NUMBER));
635  if (ccnAttribute == null || StringUtils.isBlank(ccnAttribute.getValueString())) {
636 
637  if (hit.isArtifactHit()) {
638  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS
639  } else {
640  try {
641  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getContentID())); //NON-NLS
642  } catch (TskCoreException ex) {
643  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s' ", foundKeyword.getSearchTerm(), hit.getSnippet())); //NON-NLS
644  LOGGER.log(Level.SEVERE, "There was a error getting contentID for keyword hit.", ex); //NON-NLS
645  }
646  }
647  return;
648  }
649  attributes.addAll(parsedTrackAttributeMap.values());
650 
651  /*
652  * Look up the bank name, scheme, etc. attributes for the bank
653  * indentification number (BIN).
654  */
655  final int bin = Integer.parseInt(ccnAttribute.getValueString().substring(0, 8));
656  CreditCards.BankIdentificationNumber binInfo = CreditCards.getBINInfo(bin);
657  if (binInfo != null) {
658  binInfo.getScheme().ifPresent(scheme
659  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_SCHEME, MODULE_NAME, scheme)));
660  binInfo.getCardType().ifPresent(cardType
661  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_TYPE, MODULE_NAME, cardType)));
662  binInfo.getBrand().ifPresent(brand
663  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BRAND_NAME, MODULE_NAME, brand)));
664  binInfo.getBankName().ifPresent(bankName
665  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BANK_NAME, MODULE_NAME, bankName)));
666  binInfo.getBankPhoneNumber().ifPresent(phoneNumber
667  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PHONE_NUMBER, MODULE_NAME, phoneNumber)));
668  binInfo.getBankURL().ifPresent(url
669  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_URL, MODULE_NAME, url)));
670  binInfo.getCountry().ifPresent(country
671  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_COUNTRY, MODULE_NAME, country)));
672  binInfo.getBankCity().ifPresent(city
673  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CITY, MODULE_NAME, city)));
674  }
675 
676  /*
677  * If the hit is from unused or unallocated space, record the Solr
678  * document id to support showing just the chunk that contained the hit.
679  */
680  if (content instanceof AbstractFile) {
681  AbstractFile file = (AbstractFile) content;
682  if (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS
683  || file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) {
684  attributes.add(new BlackboardAttribute(KEYWORD_SEARCH_DOCUMENT_ID, MODULE_NAME, hit.getSolrDocumentId()));
685  }
686  }
687 
688  if (StringUtils.isNotBlank(listName)) {
689  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
690  }
691  if (snippet != null) {
692  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
693  }
694 
695  hit.getArtifactID().ifPresent(artifactID
696  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
697  );
698 
699  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
700 
701  /*
702  * Create an account instance.
703  */
704  try {
705  Case.getCurrentCaseThrows().getSleuthkitCase().getCommunicationsManager().createAccountFileInstance(Account.Type.CREDIT_CARD,
706  ccnAttribute.getValueString(), MODULE_NAME, content, attributes, ingestJobId);
707  } catch (TskCoreException | NoCurrentCaseException ex) {
708  LOGGER.log(Level.SEVERE, "Error creating CCN account instance", ex); //NON-NLS
709  }
710 
711  }
712 
721  static private void parseTrack2Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributesMap, Matcher matcher) {
722  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_NUMBER, "accountNumber", matcher);
723  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_EXPIRATION, "expiration", matcher);
724  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_SERVICE_CODE, "serviceCode", matcher);
725  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_DISCRETIONARY, "discretionary", matcher);
726  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_LRC, "LRC", matcher);
727  }
728 
738  static private void parseTrack1Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, Matcher matcher) {
739  parseTrack2Data(attributeMap, matcher);
740  addAttributeIfNotAlreadyCaptured(attributeMap, ATTRIBUTE_TYPE.TSK_NAME_PERSON, "name", matcher);
741  }
742 
755  static private void addAttributeIfNotAlreadyCaptured(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) {
756  BlackboardAttribute.Type type = new BlackboardAttribute.Type(attrType);
757 
758  if (!attributeMap.containsKey(type)) {
759  String value = matcher.group(groupName);
760  if (attrType.equals(ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) {
761  attributeMap.put(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD),
762  new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, value));
763  value = CharMatcher.anyOf(" -").removeFrom(value);
764  }
765 
766  if (StringUtils.isNotBlank(value)) {
767  attributeMap.put(type, new BlackboardAttribute(attrType, MODULE_NAME, value));
768  }
769  }
770  }
771 }

Copyright © 2012-2022 Basis Technology. Generated on: Tue Aug 1 2023
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.