Autopsy  4.19.3
Graphical digital forensics platform for The Sleuth Kit and other tools.
RegexQuery.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2018 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import com.google.common.base.CharMatcher;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.List;
26 import java.util.Map;
27 import java.util.logging.Level;
28 import java.util.regex.Matcher;
29 import java.util.regex.Pattern;
30 import org.apache.commons.lang3.StringUtils;
31 import org.apache.commons.lang3.math.NumberUtils;
32 import org.apache.commons.validator.routines.DomainValidator;
33 import org.apache.solr.client.solrj.SolrQuery;
34 import org.apache.solr.client.solrj.SolrQuery.SortClause;
35 import org.apache.solr.client.solrj.SolrRequest;
36 import org.apache.solr.client.solrj.response.QueryResponse;
37 import org.apache.solr.common.SolrDocument;
38 import org.apache.solr.common.SolrDocumentList;
39 import org.apache.solr.common.params.CursorMarkParams;
40 import org.openide.util.NbBundle;
46 import static org.sleuthkit.autopsy.keywordsearch.KeywordSearchSettings.MODULE_NAME;
47 import org.sleuthkit.datamodel.AbstractFile;
48 import org.sleuthkit.datamodel.Account;
49 import org.sleuthkit.datamodel.AccountFileInstance;
50 import org.sleuthkit.datamodel.BlackboardArtifact;
51 import org.sleuthkit.datamodel.BlackboardAttribute;
52 import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
53 import org.sleuthkit.datamodel.Content;
54 import org.sleuthkit.datamodel.Score;
55 import org.sleuthkit.datamodel.TskCoreException;
56 import org.sleuthkit.datamodel.TskData;
57 
72 final class RegexQuery implements KeywordSearchQuery {
73 
74  public static final Logger LOGGER = Logger.getLogger(RegexQuery.class.getName());
75 
86  private static final CharSequence[] UNSUPPORTED_CHARS = {"\\d", "\\D", "\\w", "\\W", "\\s", "\\S", "\\n",
87  "\\t", "\\r", "\\f", "\\a", "\\e", "\\v", "\\V", "\\h", "\\H", "\\p"}; //NON-NLS
88 
89  private static final int MAX_RESULTS_PER_CURSOR_MARK = 512;
90  private static final int MIN_EMAIL_ADDR_LENGTH = 8;
91  private static final String SNIPPET_DELIMITER = String.valueOf(Character.toChars(171));
92 
93  /*
94  * The following fields are part of the initial implementation of credit
95  * card account search and should be factored into another class when time
96  * permits.
97  */
103  static final Pattern CREDIT_CARD_NUM_PATTERN
104  = Pattern.compile("(?<ccn>[2-6]([ -]?[0-9]){11,18})");
105  static final Pattern CREDIT_CARD_TRACK1_PATTERN = Pattern.compile(
106  /*
107  * Track 1 is alphanumeric.
108  *
109  * This regex matches 12-19 digit ccns embeded in a track 1 formated
110  * string. This regex matches (and extracts groups) even if the
111  * entire track is not present as long as the part that is conforms
112  * to the track format.
113  */
114  "(?:" //begin nested optinal group //NON-NLS
115  + "%?" //optional start sentinal: % //NON-NLS
116  + "B)?" //format code //NON-NLS
117  + "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
118  + "\\^" //separator //NON-NLS
119  + "(?<name>[^^]{2,26})" //2-26 charachter name, not containing ^ //NON-NLS
120  + "(?:\\^" //separator //NON-NLS
121  + "(?:(?:\\^|(?<expiration>\\d{4}))" //separator or 4 digit expiration YYMM //NON-NLS
122  + "(?:(?:\\^|(?<serviceCode>\\d{3}))"//separator or 3 digit service code //NON-NLS
123  + "(?:(?<discretionary>[^?]*)" // discretionary data not containing separator //NON-NLS
124  + "(?:\\?" // end sentinal: ? //NON-NLS
125  + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
126  + "?)?)?)?)?)?");//close nested optional groups //NON-NLS
127  static final Pattern CREDIT_CARD_TRACK2_PATTERN = Pattern.compile(
128  /*
129  * Track 2 is numeric plus six punctuation symbolls :;<=>?
130  *
131  * This regex matches 12-19 digit ccns embeded in a track 2 formated
132  * string. This regex matches (and extracts groups) even if the
133  * entire track is not present as long as the part that is conforms
134  * to the track format.
135  *
136  */
137  "[:;<=>?]?" //(optional)start sentinel //NON-NLS
138  + "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
139  + "(?:[:;<=>?]" //separator //NON-NLS
140  + "(?:(?<expiration>\\d{4})" //4 digit expiration date YYMM //NON-NLS
141  + "(?:(?<serviceCode>\\d{3})" //3 digit service code //NON-NLS
142  + "(?:(?<discretionary>[^:;<=>?]*)" //discretionary data, not containing punctuation marks //NON-NLS
143  + "(?:[:;<=>?]" //end sentinel //NON-NLS
144  + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
145  + "?)?)?)?)?)?"); //close nested optional groups //NON-NLS
146  static final BlackboardAttribute.Type KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
147 
148  private final List<KeywordQueryFilter> filters = new ArrayList<>();
149  private final KeywordList keywordList;
150  private final Keyword originalKeyword; // The regular expression originalKeyword used to perform the search.
151  private final String keywordString;
152  private final boolean queryStringContainsWildcardPrefix;
153  private final boolean queryStringContainsWildcardSuffix;
154 
155  private boolean escaped;
156  private String escapedQuery;
157  private String field = Server.Schema.CONTENT_STR.toString();
158 
165  RegexQuery(KeywordList keywordList, Keyword keyword) {
166  this.keywordList = keywordList;
167  this.originalKeyword = keyword;
168  this.keywordString = keyword.getSearchTerm();
169 
170  this.queryStringContainsWildcardPrefix = this.keywordString.startsWith(".*");
171  this.queryStringContainsWildcardSuffix = this.keywordString.endsWith(".*");
172  }
173 
174  @Override
175  public KeywordList getKeywordList() {
176  return keywordList;
177  }
178 
179  @Override
180  public boolean validate() {
181  if (keywordString.isEmpty()) {
182  return false;
183  }
184  try {
185  // First we perform regular Java regex validation to catch errors.
186  Pattern.compile(keywordString, Pattern.UNICODE_CHARACTER_CLASS);
187 
188  // Then we check for the set of Java predefined and POSIX character
189  // classes. While they are valid Lucene regex characters, they will
190  // behave differently than users may expect. E.g. the regex \d\d\d
191  // will not find 3 digits but will instead find a sequence of 3 'd's.
192  for (CharSequence c : UNSUPPORTED_CHARS) {
193  if (keywordString.contains(c)) {
194  return false;
195  }
196  }
197  return true;
198  } catch (IllegalArgumentException ex) {
199  return false;
200  }
201  }
202 
203  @Override
204  public QueryResults performQuery() throws NoOpenCoreException {
205 
206  final Server solrServer = KeywordSearch.getServer();
207  SolrQuery solrQuery = new SolrQuery();
208 
209  /*
210  * The provided regular expression may include wildcards at the
211  * beginning and/or end. These wildcards are used to indicate that the
212  * user wants to find hits for the regex that are embedded within other
213  * characters. For example, if we are given .*127.0.0.1.* as a regular
214  * expression, this will produce hits for: (a) " 127.0.0.1 " as a
215  * standalone token (surrounded by whitespace). (b) "abc127.0.0.1def"
216  * where the IP address is surrounded by other characters.
217  *
218  * If we are given this type of regex, we do not need to add our own
219  * wildcards to anchor the query. Otherwise, we need to add wildcard
220  * anchors because Lucene string regex searches default to using ^ and $
221  * to match the entire string.
222  */
223  // We construct the query by surrounding it with slashes (to indicate it is
224  // a regular expression search) and .* as anchors (if the query doesn't
225  // already have them). We do not add .* if there is a boundary character.
226  boolean skipWildcardPrefix = queryStringContainsWildcardPrefix || getQueryString().startsWith("^");
227  boolean skipWildcardSuffix = queryStringContainsWildcardSuffix
228  || (getQueryString().endsWith("$") && (!getQueryString().endsWith("\\$")));
229 
244  String queryString = (originalKeyword.searchTermIsLiteral() ? getEscapedQueryString() : getQueryString());
245  double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
246  if (indexSchemaVersion >= 2.1) {
247  queryString = queryString.toLowerCase();
248  }
249 
250  solrQuery.setQuery((field == null ? Server.Schema.CONTENT_STR.toString() : field) + ":/"
251  + (skipWildcardPrefix ? "" : ".*")
252  // if the query is for a substring (i.e. literal search term) we want
253  // to escape characters such as ()[]-.
254  + queryString
255  + (skipWildcardSuffix ? "" : ".*") + "/");
256 
257  // Set the fields we want to have returned by the query.
258  solrQuery.setFields(Server.Schema.CONTENT_STR.toString(), Server.Schema.ID.toString(), Server.Schema.CHUNK_SIZE.toString());
259 
260  filters.stream()
261  .map(KeywordQueryFilter::toString)
262  .forEach(solrQuery::addFilterQuery);
263 
264  solrQuery.setRows(MAX_RESULTS_PER_CURSOR_MARK);
265  // Setting the sort order is necessary for cursor based paging to work.
266  solrQuery.setSort(SortClause.asc(Server.Schema.ID.toString()));
267 
268  String cursorMark = CursorMarkParams.CURSOR_MARK_START;
269  SolrDocumentList resultList;
270  boolean allResultsProcessed = false;
271  QueryResults results = new QueryResults(this);
272 
273  while (!allResultsProcessed) {
274  try {
275  solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
276  QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
277  resultList = response.getResults();
278 
279  for (SolrDocument resultDoc : resultList) {
280  try {
281  List<KeywordHit> keywordHits = createKeywordHits(resultDoc);
282  for (KeywordHit hit : keywordHits) {
283  Keyword keywordInstance = new Keyword(hit.getHit(), true, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm());
284  List<KeywordHit> hitsForKeyword = results.getResults(keywordInstance);
285  if (hitsForKeyword == null) {
286  hitsForKeyword = new ArrayList<>();
287  results.addResult(keywordInstance, hitsForKeyword);
288  }
289  hitsForKeyword.add(hit);
290  }
291  } catch (TskCoreException ex) {
292  LOGGER.log(Level.SEVERE, "Error creating keyword hits", ex); //NON-NLS
293  }
294  }
295 
296  String nextCursorMark = response.getNextCursorMark();
297  if (cursorMark.equals(nextCursorMark)) {
298  allResultsProcessed = true;
299  }
300  cursorMark = nextCursorMark;
301  } catch (KeywordSearchModuleException ex) {
302  LOGGER.log(Level.SEVERE, "Error executing Regex Solr Query: " + keywordString, ex); //NON-NLS
303  MessageNotifyUtil.Notify.error(NbBundle.getMessage(Server.class, "Server.query.exception.msg", keywordString), ex.getCause().getMessage());
304  }
305  }
306 
307  return results;
308  }
309 
310  private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) throws TskCoreException {
311 
312  final HashMap<String, String> keywordsFoundInThisDocument = new HashMap<>();
313 
314  List<KeywordHit> hits = new ArrayList<>();
315  final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
316  final Integer chunkSize = (Integer) solrDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
317 
318  final Collection<Object> content_str = solrDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
319 
320  String searchPattern;
321  if (originalKeyword.searchTermIsLiteral()) {
335  searchPattern = "[\\w[\\.']]*" + Pattern.quote(keywordString.toLowerCase()) + "[\\w[\\.']]*";
336  } else {
337  searchPattern = keywordString;
338  }
339 
340  final Pattern pattern = Pattern.compile(searchPattern, Pattern.CASE_INSENSITIVE);
341 
342  try {
343  for (Object content_obj : content_str) {
344  String content = (String) content_obj;
345  Matcher hitMatcher = pattern.matcher(content);
346  int offset = 0;
347 
348  while (hitMatcher.find(offset)) {
349 
350  // If the location of the hit is beyond this chunk (i.e. it
351  // exists in the overlap region), we skip the hit. It will
352  // show up again as a hit in the chunk following this one.
353  if (chunkSize != null && hitMatcher.start() >= chunkSize) {
354  break;
355  }
356 
357  String hit = hitMatcher.group();
358 
362  if ("".equals(hit)) {
363  break;
364  }
365 
366  offset = hitMatcher.end();
367  final ATTRIBUTE_TYPE artifactAttributeType = originalKeyword.getArtifactAttributeType();
368 
369  // We attempt to reduce false positives for phone numbers and IP address hits
370  // by querying Solr for hits delimited by a set of known boundary characters.
371  // See KeywordSearchList.PHONE_NUMBER_REGEX for an example.
372  // Because of this the hits may contain an extra character at the beginning or end that
373  // needs to be chopped off, unless the user has supplied their own wildcard suffix
374  // as part of the regex.
375  if (!queryStringContainsWildcardSuffix
376  && (artifactAttributeType == ATTRIBUTE_TYPE.TSK_PHONE_NUMBER
377  || artifactAttributeType == ATTRIBUTE_TYPE.TSK_IP_ADDRESS)) {
378  if (artifactAttributeType == ATTRIBUTE_TYPE.TSK_PHONE_NUMBER) {
379  // For phone numbers replace all non numeric characters (except "(") at the start of the hit.
380  hit = hit.replaceAll("^[^0-9\\(]", "");
381  } else {
382  // Replace all non numeric characters at the start of the hit.
383  hit = hit.replaceAll("^[^0-9]", "");
384  }
385  // Replace all non numeric at the end of the hit.
386  hit = hit.replaceAll("[^0-9]$", "");
387 
388  if (offset > 1) {
389  /*
390  * NOTE: our IP and phone number regex patterns look for
391  * boundary characters immediately before and after
392  * the keyword hit. After a match, Java pattern
393  * mather re-starts at the first character not
394  * matched by the previous match. This basically
395  * requires two boundary characters to be present
396  * between each pattern match. To mitigate this we
397  * are resetting the offest one character back.
398  */
399  offset--;
400  }
401  }
402 
411  if (originalKeyword.searchTermIsLiteral()) {
412  hit = hit.replaceAll("^" + KeywordSearchList.BOUNDARY_CHARACTERS + "*", "");
413  hit = hit.replaceAll(KeywordSearchList.BOUNDARY_CHARACTERS + "*$", "");
414  }
415 
424  hit = hit.intern();
425 
426  // We will only create one KeywordHit instance per document for
427  // a given hit.
428  if (keywordsFoundInThisDocument.containsKey(hit)) {
429  continue;
430  }
431  keywordsFoundInThisDocument.put(hit, hit);
432 
433  if (artifactAttributeType == null) {
434  hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit));
435  } else {
436  switch (artifactAttributeType) {
437  case TSK_EMAIL:
438  /*
439  * Reduce false positives by eliminating email
440  * address hits that are either too short or are
441  * not for valid top level domains.
442  */
443  if (hit.length() >= MIN_EMAIL_ADDR_LENGTH
444  && DomainValidator.getInstance(true).isValidTld(hit.substring(hit.lastIndexOf('.')))) {
445  hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit));
446  }
447 
448  break;
449  case TSK_CARD_NUMBER:
450  /*
451  * If searching for credit card account numbers,
452  * do extra validation on the term and discard
453  * it if it does not pass.
454  */
455  Matcher ccnMatcher = CREDIT_CARD_NUM_PATTERN.matcher(hit);
456 
457  for (int rLength = hit.length(); rLength >= 12; rLength--) {
458  ccnMatcher.region(0, rLength);
459  if (ccnMatcher.find()) {
460  final String group = ccnMatcher.group("ccn");
461  if (CreditCardValidator.isValidCCN(group)) {
462  hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit));
463  }
464  }
465  }
466 
467  break;
468  default:
469  hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit));
470  break;
471  }
472  }
473  }
474  }
475  } catch (Throwable error) {
476  /*
477  * NOTE: Matcher.find() is known to throw StackOverflowError in rare
478  * cases (see JIRA-2700). StackOverflowError is an error, not an
479  * exception, and therefore needs to be caught as a Throwable. When
480  * this occurs we should re-throw the error as TskCoreException so
481  * that it is logged by the calling method and move on to the next
482  * Solr document.
483  */
484  throw new TskCoreException("Failed to create keyword hits for Solr document id " + docId + " due to " + error.getMessage());
485  }
486  return hits;
487  }
488 
502  private String makeSnippet(String content, Matcher hitMatcher, String hit) {
503  // Get the snippet from the document.
504  int maxIndex = content.length() - 1;
505  final int end = hitMatcher.end();
506  final int start = hitMatcher.start();
507 
508  return content.substring(Integer.max(0, start - 20), Integer.max(0, start))
509  + SNIPPET_DELIMITER + hit + SNIPPET_DELIMITER
510  + content.substring(Integer.min(maxIndex, end), Integer.min(maxIndex, end + 20));
511  }
512 
513  @Override
514  public void addFilter(KeywordQueryFilter filter) {
515  this.filters.add(filter);
516  }
517 
518  @Override
519  public void setField(String field) {
520  this.field = field;
521  }
522 
523  @Override
524  public void setSubstringQuery() {
525  }
526 
527  @Override
528  synchronized public void escape() {
529  if (isEscaped() == false) {
530  escapedQuery = KeywordSearchUtil.escapeLuceneQuery(keywordString);
531  escaped = true;
532  }
533  }
534 
535  @Override
536  synchronized public boolean isEscaped() {
537  return escaped;
538  }
539 
540  @Override
541  public boolean isLiteral() {
542  return false;
543  }
544 
545  @Override
546  public String getQueryString() {
547  return originalKeyword.getSearchTerm();
548  }
549 
550  @Override
551  synchronized public String getEscapedQueryString() {
552  if (false == isEscaped()) {
553  escape();
554  }
555  return escapedQuery;
556  }
557 
574  @Override
575  public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
576  final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
577 
578  if (content == null) {
579  LOGGER.log(Level.WARNING, "Error adding artifact for keyword hit to blackboard"); //NON-NLS
580  return null;
581  }
582 
583  /*
584  * Credit Card number hits are handled differently
585  */
586  if (originalKeyword.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
587  createCCNAccount(content, foundKeyword, hit, snippet, listName, ingestJobId);
588  return null;
589  }
590 
591  /*
592  * Create a "plain vanilla" keyword hit artifact with keyword and regex
593  * attributes
594  */
595  Collection<BlackboardAttribute> attributes = new ArrayList<>();
596 
597  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm()));
598  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, getQueryString()));
599 
600  if (StringUtils.isNotBlank(listName)) {
601  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
602  }
603  if (snippet != null) {
604  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
605  }
606 
607  hit.getArtifactID().ifPresent(artifactID
608  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
609  );
610 
611  if (originalKeyword.searchTermIsLiteral()) {
612  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
613  } else {
614  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
615  }
616 
617  try {
618  return content.newAnalysisResult(
619  BlackboardArtifact.Type.TSK_KEYWORD_HIT, Score.SCORE_LIKELY_NOTABLE,
620  null, listName, null, attributes)
621  .getAnalysisResult();
622  } catch (TskCoreException e) {
623  LOGGER.log(Level.SEVERE, "Error adding bb attributes for terms search artifact", e); //NON-NLS
624  return null;
625  }
626  }
627 
628  private void createCCNAccount(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
629 
630  final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
631 
632  if (originalKeyword.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
633  LOGGER.log(Level.SEVERE, "Keyword hit is not a credit card number"); //NON-NLS
634  return;
635  }
636  /*
637  * Create a credit card account with attributes parsed from the snippet
638  * for the hit and looked up based on the parsed bank identifcation
639  * number.
640  */
641  List<BlackboardAttribute> attributes = new ArrayList<>();
642 
643  Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap = new HashMap<>();
644  Matcher matcher = CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
645  if (matcher.find()) {
646  parseTrack1Data(parsedTrackAttributeMap, matcher);
647  }
648  matcher = CREDIT_CARD_TRACK2_PATTERN.matcher(hit.getSnippet());
649  if (matcher.find()) {
650  parseTrack2Data(parsedTrackAttributeMap, matcher);
651  }
652  final BlackboardAttribute ccnAttribute = parsedTrackAttributeMap.get(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_CARD_NUMBER));
653  if (ccnAttribute == null || StringUtils.isBlank(ccnAttribute.getValueString())) {
654 
655  if (hit.isArtifactHit()) {
656  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS
657  } else {
658  try {
659  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getContentID())); //NON-NLS
660  } catch (TskCoreException ex) {
661  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s' ", foundKeyword.getSearchTerm(), hit.getSnippet())); //NON-NLS
662  LOGGER.log(Level.SEVERE, "There was a error getting contentID for keyword hit.", ex); //NON-NLS
663  }
664  }
665  return;
666  }
667  attributes.addAll(parsedTrackAttributeMap.values());
668 
669  /*
670  * Look up the bank name, scheme, etc. attributes for the bank
671  * indentification number (BIN).
672  */
673  final int bin = Integer.parseInt(ccnAttribute.getValueString().substring(0, 8));
674  CreditCards.BankIdentificationNumber binInfo = CreditCards.getBINInfo(bin);
675  if (binInfo != null) {
676  binInfo.getScheme().ifPresent(scheme
677  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_SCHEME, MODULE_NAME, scheme)));
678  binInfo.getCardType().ifPresent(cardType
679  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_TYPE, MODULE_NAME, cardType)));
680  binInfo.getBrand().ifPresent(brand
681  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BRAND_NAME, MODULE_NAME, brand)));
682  binInfo.getBankName().ifPresent(bankName
683  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BANK_NAME, MODULE_NAME, bankName)));
684  binInfo.getBankPhoneNumber().ifPresent(phoneNumber
685  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PHONE_NUMBER, MODULE_NAME, phoneNumber)));
686  binInfo.getBankURL().ifPresent(url
687  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_URL, MODULE_NAME, url)));
688  binInfo.getCountry().ifPresent(country
689  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_COUNTRY, MODULE_NAME, country)));
690  binInfo.getBankCity().ifPresent(city
691  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CITY, MODULE_NAME, city)));
692  }
693 
694  /*
695  * If the hit is from unused or unallocated space, record the Solr
696  * document id to support showing just the chunk that contained the hit.
697  */
698  if (content instanceof AbstractFile) {
699  AbstractFile file = (AbstractFile) content;
700  if (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS
701  || file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) {
702  attributes.add(new BlackboardAttribute(KEYWORD_SEARCH_DOCUMENT_ID, MODULE_NAME, hit.getSolrDocumentId()));
703  }
704  }
705 
706  if (StringUtils.isNotBlank(listName)) {
707  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
708  }
709  if (snippet != null) {
710  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
711  }
712 
713  hit.getArtifactID().ifPresent(artifactID
714  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
715  );
716 
717  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
718 
719  /*
720  * Create an account instance.
721  */
722  try {
723  Case.getCurrentCaseThrows().getSleuthkitCase().getCommunicationsManager().createAccountFileInstance(Account.Type.CREDIT_CARD,
724  ccnAttribute.getValueString(), MODULE_NAME, content, attributes, ingestJobId);
725  } catch (TskCoreException | NoCurrentCaseException ex) {
726  LOGGER.log(Level.SEVERE, "Error creating CCN account instance", ex); //NON-NLS
727  }
728 
729  }
730 
739  static private void parseTrack2Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributesMap, Matcher matcher) {
740  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_NUMBER, "accountNumber", matcher);
741  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_EXPIRATION, "expiration", matcher);
742  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_SERVICE_CODE, "serviceCode", matcher);
743  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_DISCRETIONARY, "discretionary", matcher);
744  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_LRC, "LRC", matcher);
745  }
746 
756  static private void parseTrack1Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, Matcher matcher) {
757  parseTrack2Data(attributeMap, matcher);
758  addAttributeIfNotAlreadyCaptured(attributeMap, ATTRIBUTE_TYPE.TSK_NAME_PERSON, "name", matcher);
759  }
760 
773  static private void addAttributeIfNotAlreadyCaptured(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) {
774  BlackboardAttribute.Type type = new BlackboardAttribute.Type(attrType);
775 
776  if (!attributeMap.containsKey(type)) {
777  String value = matcher.group(groupName);
778  if (attrType.equals(ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) {
779  attributeMap.put(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD),
780  new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, value));
781  value = CharMatcher.anyOf(" -").removeFrom(value);
782  }
783 
784  if (StringUtils.isNotBlank(value)) {
785  attributeMap.put(type, new BlackboardAttribute(attrType, MODULE_NAME, value));
786  }
787  }
788  }
789 }

Copyright © 2012-2022 Basis Technology. Generated on: Tue Oct 4 2022
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.