Autopsy  4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
TermsComponentQuery.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2017 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import com.google.common.base.CharMatcher;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Set;
29 import java.util.logging.Level;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import org.apache.commons.lang.StringUtils;
33 import org.apache.commons.validator.routines.checkdigit.LuhnCheckDigit;
34 import org.apache.solr.client.solrj.SolrQuery;
35 import org.apache.solr.client.solrj.response.TermsResponse.Term;
47 
54 final class TermsComponentQuery implements KeywordSearchQuery {
55 
56  private static final Logger LOGGER = Logger.getLogger(TermsComponentQuery.class.getName());
57  private static final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
58  private static final String SEARCH_HANDLER = "/terms"; //NON-NLS
59  private static final String SEARCH_FIELD = Server.Schema.TEXT.toString();
60  private static final int TERMS_SEARCH_TIMEOUT = 90 * 1000; // Milliseconds
61  private static final String CASE_INSENSITIVE = "case_insensitive"; //NON-NLS
62  private static final boolean DEBUG_FLAG = Version.Type.DEVELOPMENT.equals(Version.getBuildType());
63  private static final int MAX_TERMS_QUERY_RESULTS = 20000;
64  private final KeywordList keywordList;
65  private final Keyword originalKeyword;
66  private String searchTerm;
67  private boolean searchTermIsEscaped;
68  private final List<KeywordQueryFilter> filters = new ArrayList<>(); // THIS APPEARS TO BE UNUSED
69 
70  /*
71  * The following fields are part of the initial implementation of credit
72  * card account search and should be factored into another class when time
73  * permits.
74  */
75  static final Pattern CREDIT_CARD_NUM_PATTERN = Pattern.compile("(?<ccn>[3-6]([ -]?[0-9]){11,18})"); //12-19 digits, with possible single spaces or dashes in between. First digit is 3,4,5, or 6 //NON-NLS
76  static final LuhnCheckDigit CREDIT_CARD_NUM_LUHN_CHECK = new LuhnCheckDigit();
77  static final Pattern CREDIT_CARD_TRACK1_PATTERN = Pattern.compile(
78  /*
79  * Track 1 is alphanumeric.
80  *
81  * This regex matches 12-19 digit ccns embeded in a track 1 formated
82  * string. This regex matches (and extracts groups) even if the
83  * entire track is not present as long as the part that is conforms
84  * to the track format.
85  */
86  "(?:" //begin nested optinal group //NON-NLS
87  + "%?" //optional start sentinal: % //NON-NLS
88  + "B)?" //format code //NON-NLS
89  + "(?<accountNumber>[3-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 3,4,5, or 6 //NON-NLS
90  + "\\^" //separator //NON-NLS
91  + "(?<name>[^^]{2,26})" //2-26 charachter name, not containing ^ //NON-NLS
92  + "(?:\\^" //separator //NON-NLS
93  + "(?:(?:\\^|(?<expiration>\\d{4}))" //separator or 4 digit expiration YYMM //NON-NLS
94  + "(?:(?:\\^|(?<serviceCode>\\d{3}))"//separator or 3 digit service code //NON-NLS
95  + "(?:(?<discretionary>[^?]*)" // discretionary data not containing separator //NON-NLS
96  + "(?:\\?" // end sentinal: ? //NON-NLS
97  + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
98  + "?)?)?)?)?)?");//close nested optional groups //NON-NLS
99  static final Pattern CREDIT_CARD_TRACK2_PATTERN = Pattern.compile(
100  /*
101  * Track 2 is numeric plus six punctuation symbolls :;<=>?
102  *
103  * This regex matches 12-19 digit ccns embeded in a track 2 formated
104  * string. This regex matches (and extracts groups) even if the
105  * entire track is not present as long as the part that is conforms
106  * to the track format.
107  *
108  */
109  "[:;<=>?]?" //(optional)start sentinel //NON-NLS
110  + "(?<accountNumber>[3-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 3,4,5, or 6 //NON-NLS
111  + "(?:[:;<=>?]" //separator //NON-NLS
112  + "(?:(?<expiration>\\d{4})" //4 digit expiration date YYMM //NON-NLS
113  + "(?:(?<serviceCode>\\d{3})" //3 digit service code //NON-NLS
114  + "(?:(?<discretionary>[^:;<=>?]*)" //discretionary data, not containing punctuation marks //NON-NLS
115  + "(?:[:;<=>?]" //end sentinel //NON-NLS
116  + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
117  + "?)?)?)?)?)?"); //close nested optional groups //NON-NLS
118  static final BlackboardAttribute.Type KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
119 
132  // TODO: Why is both the list and the keyword added to the state of this
133  // object?
134  // TODO: Why is the search term not escaped and given substring wildcards,
135  // if needed, here in the constructor?
136  TermsComponentQuery(KeywordList keywordList, Keyword keyword) {
137  this.keywordList = keywordList;
138  this.originalKeyword = keyword;
139  this.searchTerm = keyword.getSearchTerm();
140  }
141 
148  @Override
149  public KeywordList getKeywordList() {
150  return keywordList;
151  }
152 
159  @Override
160  public String getQueryString() {
161  return originalKeyword.getSearchTerm();
162  }
163 
171  @Override
172  public boolean isLiteral() {
173  return false;
174  }
175 
180  @Override
181  public void setSubstringQuery() {
182  searchTerm = ".*" + searchTerm + ".*";
183  }
184 
188  @Override
189  public void escape() {
190  searchTerm = Pattern.quote(originalKeyword.getSearchTerm());
191  searchTermIsEscaped = true;
192  }
193 
199  @Override
200  public boolean isEscaped() {
201  return searchTermIsEscaped;
202  }
203 
210  @Override
211  public String getEscapedQueryString() {
212  return this.searchTerm;
213  }
214 
220  @Override
221  public boolean validate() {
222  if (searchTerm.isEmpty()) {
223  return false;
224  }
225  try {
226  Pattern.compile(searchTerm);
227  return true;
228  } catch (IllegalArgumentException ex) {
229  return false;
230  }
231  }
232 
239  @Override
240  public void setField(String field) {
241  }
242 
248  // TODO: Document this better.
249  @Override
250  public void addFilter(KeywordQueryFilter filter) {
251  this.filters.add(filter);
252  }
253 
264  @Override
265  public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
266  /*
267  * Do a query using the Solr terms component to find any terms in the
268  * index that match the regex.
269  */
270  final SolrQuery termsQuery = new SolrQuery();
271  termsQuery.setRequestHandler(SEARCH_HANDLER);
272  termsQuery.setTerms(true);
273  termsQuery.setTermsRegexFlag(CASE_INSENSITIVE);
274  termsQuery.setTermsRegex(searchTerm);
275  termsQuery.addTermsField(SEARCH_FIELD);
276  termsQuery.setTimeAllowed(TERMS_SEARCH_TIMEOUT);
277  termsQuery.setShowDebugInfo(DEBUG_FLAG);
278  termsQuery.setTermsLimit(MAX_TERMS_QUERY_RESULTS);
279  List<Term> terms = KeywordSearch.getServer().queryTerms(termsQuery).getTerms(SEARCH_FIELD);
280  /*
281  * Do a term query for each term that matched the regex.
282  */
283  QueryResults results = new QueryResults(this);
284  for (Term term : terms) {
285  /*
286  * If searching for credit card account numbers, do a Luhn check on
287  * the term and discard it if it does not pass.
288  */
289  if (originalKeyword.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
290  Matcher matcher = CREDIT_CARD_NUM_PATTERN.matcher(term.getTerm());
291  matcher.find();
292  final String ccn = CharMatcher.anyOf(" -").removeFrom(matcher.group("ccn"));
293  if (false == CREDIT_CARD_NUM_LUHN_CHECK.isValid(ccn)) {
294  continue;
295  }
296  }
297 
298  /*
299  * Do an ordinary query with the escaped term and convert the query
300  * results into a single list of keyword hits without duplicates.
301  *
302  * Note that the filters field appears to be unused. There is an old
303  * comment here, what does it mean? "Note: we can't set filter query
304  * on terms query but setting filter query on fileResults query will
305  * yield the same result." The filter is NOT being added to the term
306  * query.
307  */
308  String escapedTerm = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());
309  LuceneQuery termQuery = new LuceneQuery(keywordList, new Keyword(escapedTerm, true, true));
310  filters.forEach(termQuery::addFilter); // This appears to be unused
311  QueryResults termQueryResult = termQuery.performQuery();
312  Set<KeywordHit> termHits = new HashSet<>();
313  for (Keyword word : termQueryResult.getKeywords()) {
314  termHits.addAll(termQueryResult.getResults(word));
315  }
316  results.addResult(new Keyword(term.getTerm(), false, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), new ArrayList<>(termHits));
317  }
318  return results;
319  }
320 
335  // TODO: Are we actually making meaningful use of the KeywordCachedArtifact
336  // class?
337  @Override
338  public KeywordCachedArtifact writeSingleFileHitsToBlackBoard(Keyword foundKeyword, KeywordHit hit, String snippet, String listName) {
339  /*
340  * Create either a "plain vanilla" keyword hit artifact with keyword and
341  * regex attributes, or a credit card account artifact with attributes
342  * parsed from from the snippet for the hit and looked up based on the
343  * parsed bank identifcation number.
344  */
345  BlackboardArtifact newArtifact;
346  Collection<BlackboardAttribute> attributes = new ArrayList<>();
347  if (originalKeyword.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
348  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm()));
349  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, originalKeyword.getSearchTerm()));
350 
351  try {
352  newArtifact = hit.getContent().newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
353 
354  } catch (TskCoreException ex) {
355  LOGGER.log(Level.SEVERE, "Error adding artifact for keyword hit to blackboard", ex); //NON-NLS
356  return null;
357  }
358  } else {
359  /*
360  * Parse the credit card account attributes from the snippet for the
361  * hit.
362  */
363  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ACCOUNT_TYPE, MODULE_NAME, Account.Type.CREDIT_CARD.name()));
364  Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap = new HashMap<>();
365  Matcher matcher = CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
366  if (matcher.find()) {
367  parseTrack1Data(parsedTrackAttributeMap, matcher);
368  }
369  matcher = CREDIT_CARD_TRACK2_PATTERN.matcher(hit.getSnippet());
370  if (matcher.find()) {
371  parseTrack2Data(parsedTrackAttributeMap, matcher);
372  }
373  final BlackboardAttribute ccnAttribute = parsedTrackAttributeMap.get(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_CARD_NUMBER));
374  if (ccnAttribute == null || StringUtils.isBlank(ccnAttribute.getValueString())) {
375  if (hit.isArtifactHit()) {
376  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", searchTerm, hit.getSnippet(), hit.getArtifact().getArtifactID())); //NON-NLS
377  } else {
378  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", searchTerm, hit.getSnippet(), hit.getContent().getId())); //NON-NLS
379  }
380  return null;
381  }
382  attributes.addAll(parsedTrackAttributeMap.values());
383 
384  /*
385  * Look up the bank name, scheme, etc. attributes for the bank
386  * indentification number (BIN).
387  */
388  final int bin = Integer.parseInt(ccnAttribute.getValueString().substring(0, 8));
389  CreditCards.BankIdentificationNumber binInfo = CreditCards.getBINInfo(bin);
390  if (binInfo != null) {
391  binInfo.getScheme().ifPresent(scheme
392  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_SCHEME, MODULE_NAME, scheme)));
393  binInfo.getCardType().ifPresent(cardType
394  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_TYPE, MODULE_NAME, cardType)));
395  binInfo.getBrand().ifPresent(brand
396  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BRAND_NAME, MODULE_NAME, brand)));
397  binInfo.getBankName().ifPresent(bankName
398  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BANK_NAME, MODULE_NAME, bankName)));
399  binInfo.getBankPhoneNumber().ifPresent(phoneNumber
400  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PHONE_NUMBER, MODULE_NAME, phoneNumber)));
401  binInfo.getBankURL().ifPresent(url
402  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_URL, MODULE_NAME, url)));
403  binInfo.getCountry().ifPresent(country
404  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_COUNTRY, MODULE_NAME, country)));
405  binInfo.getBankCity().ifPresent(city
406  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CITY, MODULE_NAME, city)));
407  }
408 
409  /*
410  * If the hit is from unused or unallocated space, record the Solr
411  * document id to support showing just the chunk that contained the
412  * hit.
413  */
414  if (hit.getContent() instanceof AbstractFile) {
415  AbstractFile file = (AbstractFile) hit.getContent();
416  if (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS
417  || file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) {
418  attributes.add(new BlackboardAttribute(KEYWORD_SEARCH_DOCUMENT_ID, MODULE_NAME, hit.getSolrDocumentId()));
419  }
420  }
421 
422  /*
423  * Create an account artifact.
424  */
425  try {
426  newArtifact = hit.getContent().newArtifact(ARTIFACT_TYPE.TSK_ACCOUNT);
427  } catch (TskCoreException ex) {
428  LOGGER.log(Level.SEVERE, "Error adding artifact for account to blackboard", ex); //NON-NLS
429  return null;
430  }
431  }
432 
433  if (StringUtils.isNotBlank(listName)) {
434  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
435  }
436  if (snippet != null) {
437  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
438  }
439  if (hit.isArtifactHit()) {
440  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, hit.getArtifact().getArtifactID()));
441  }
442 
443  // TermsComponentQuery is now being used exclusively for substring searches.
444  attributes.add(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
445 
446  try {
447  newArtifact.addAttributes(attributes);
448  KeywordCachedArtifact writeResult = new KeywordCachedArtifact(newArtifact);
449  writeResult.add(attributes);
450  return writeResult;
451  } catch (TskCoreException e) {
452  LOGGER.log(Level.SEVERE, "Error adding bb attributes for terms search artifact", e); //NON-NLS
453  return null;
454  }
455  }
456 
465  static private void parseTrack2Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributesMap, Matcher matcher) {
466  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_NUMBER, "accountNumber", matcher);
467  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_EXPIRATION, "expiration", matcher);
468  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_SERVICE_CODE, "serviceCode", matcher);
469  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_DISCRETIONARY, "discretionary", matcher);
470  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_LRC, "LRC", matcher);
471  }
472 
482  static private void parseTrack1Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, Matcher matcher) {
483  parseTrack2Data(attributeMap, matcher);
484  addAttributeIfNotAlreadyCaptured(attributeMap, ATTRIBUTE_TYPE.TSK_NAME_PERSON, "name", matcher);
485  }
486 
498  static private void addAttributeIfNotAlreadyCaptured(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) {
499  BlackboardAttribute.Type type = new BlackboardAttribute.Type(attrType);
500  attributeMap.computeIfAbsent(type, (BlackboardAttribute.Type t) -> {
501  String value = matcher.group(groupName);
502  if (attrType.equals(ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) {
503  value = CharMatcher.anyOf(" -").removeFrom(value);
504  }
505  if (StringUtils.isNotBlank(value)) {
506  return new BlackboardAttribute(attrType, MODULE_NAME, value);
507  }
508  return null;
509  });
510  }
511 
512 }

Copyright © 2012-2016 Basis Technology. Generated on: Mon Apr 24 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.