Autopsy  4.4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
TermsComponentQuery.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2017 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import com.google.common.base.CharMatcher;
22 import java.util.ArrayList;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Set;
29 import java.util.logging.Level;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import org.apache.commons.lang.StringUtils;
33 import org.apache.solr.client.solrj.SolrQuery;
34 import org.apache.solr.client.solrj.response.TermsResponse.Term;
38 import org.sleuthkit.datamodel.AbstractFile;
39 import org.sleuthkit.datamodel.Account;
40 import org.sleuthkit.datamodel.BlackboardArtifact;
41 import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
42 import org.sleuthkit.datamodel.BlackboardAttribute;
43 import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
44 import org.sleuthkit.datamodel.Content;
45 import org.sleuthkit.datamodel.TskCoreException;
46 import org.sleuthkit.datamodel.TskData;
47 
54 final class TermsComponentQuery implements KeywordSearchQuery {
55 
56  private static final Logger LOGGER = Logger.getLogger(TermsComponentQuery.class.getName());
57  private static final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
58  private static final String SEARCH_HANDLER = "/terms"; //NON-NLS
59  private static final String SEARCH_FIELD = Server.Schema.TEXT.toString();
60  private static final int TERMS_SEARCH_TIMEOUT = 90 * 1000; // Milliseconds
61  private static final String CASE_INSENSITIVE = "case_insensitive"; //NON-NLS
62  private static final boolean DEBUG_FLAG = Version.Type.DEVELOPMENT.equals(Version.getBuildType());
63  private static final int MAX_TERMS_QUERY_RESULTS = 20000;
64 
65  private final KeywordList keywordList;
66  private final Keyword originalKeyword;
67  private final List<KeywordQueryFilter> filters = new ArrayList<>(); // THIS APPEARS TO BE UNUSED
68 
69  private String searchTerm;
70  private boolean searchTermIsEscaped;
71 
72  /*
73  * The following fields are part of the initial implementation of credit
74  * card account search and should be factored into another class when time
75  * permits.
76  */
82  static final Pattern CREDIT_CARD_NUM_PATTERN =
83  Pattern.compile("(?<ccn>[2-6]([ -]?[0-9]){11,18})");
84  static final Pattern CREDIT_CARD_TRACK1_PATTERN = Pattern.compile(
85  /*
86  * Track 1 is alphanumeric.
87  *
88  * This regex matches 12-19 digit ccns embeded in a track 1 formated
89  * string. This regex matches (and extracts groups) even if the
90  * entire track is not present as long as the part that is conforms
91  * to the track format.
92  */
93  "(?:" //begin nested optinal group //NON-NLS
94  + "%?" //optional start sentinal: % //NON-NLS
95  + "B)?" //format code //NON-NLS
96  + "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
97  + "\\^" //separator //NON-NLS
98  + "(?<name>[^^]{2,26})" //2-26 charachter name, not containing ^ //NON-NLS
99  + "(?:\\^" //separator //NON-NLS
100  + "(?:(?:\\^|(?<expiration>\\d{4}))" //separator or 4 digit expiration YYMM //NON-NLS
101  + "(?:(?:\\^|(?<serviceCode>\\d{3}))"//separator or 3 digit service code //NON-NLS
102  + "(?:(?<discretionary>[^?]*)" // discretionary data not containing separator //NON-NLS
103  + "(?:\\?" // end sentinal: ? //NON-NLS
104  + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
105  + "?)?)?)?)?)?");//close nested optional groups //NON-NLS
106  static final Pattern CREDIT_CARD_TRACK2_PATTERN = Pattern.compile(
107  /*
108  * Track 2 is numeric plus six punctuation symbolls :;<=>?
109  *
110  * This regex matches 12-19 digit ccns embeded in a track 2 formated
111  * string. This regex matches (and extracts groups) even if the
112  * entire track is not present as long as the part that is conforms
113  * to the track format.
114  *
115  */
116  "[:;<=>?]?" //(optional)start sentinel //NON-NLS
117  + "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
118  + "(?:[:;<=>?]" //separator //NON-NLS
119  + "(?:(?<expiration>\\d{4})" //4 digit expiration date YYMM //NON-NLS
120  + "(?:(?<serviceCode>\\d{3})" //3 digit service code //NON-NLS
121  + "(?:(?<discretionary>[^:;<=>?]*)" //discretionary data, not containing punctuation marks //NON-NLS
122  + "(?:[:;<=>?]" //end sentinel //NON-NLS
123  + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
124  + "?)?)?)?)?)?"); //close nested optional groups //NON-NLS
125  static final BlackboardAttribute.Type KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
126 
127 
140  // TODO: Why is both the list and the keyword added to the state of this
141  // object?
142  // TODO: Why is the search term not escaped and given substring wildcards,
143  // if needed, here in the constructor?
144  TermsComponentQuery(KeywordList keywordList, Keyword keyword) {
145  this.keywordList = keywordList;
146  this.originalKeyword = keyword;
147  this.searchTerm = keyword.getSearchTerm();
148  }
149 
156  @Override
157  public KeywordList getKeywordList() {
158  return keywordList;
159  }
160 
167  @Override
168  public String getQueryString() {
169  return originalKeyword.getSearchTerm();
170  }
171 
179  @Override
180  public boolean isLiteral() {
181  return false;
182  }
183 
188  @Override
189  public void setSubstringQuery() {
190  searchTerm = ".*" + searchTerm + ".*";
191  }
192 
196  @Override
197  public void escape() {
198  searchTerm = Pattern.quote(originalKeyword.getSearchTerm());
199  searchTermIsEscaped = true;
200  }
201 
207  @Override
208  public boolean isEscaped() {
209  return searchTermIsEscaped;
210  }
211 
218  @Override
219  public String getEscapedQueryString() {
220  return this.searchTerm;
221  }
222 
228  @Override
229  public boolean validate() {
230  if (searchTerm.isEmpty()) {
231  return false;
232  }
233  try {
234  Pattern.compile(searchTerm);
235  return true;
236  } catch (IllegalArgumentException ex) {
237  return false;
238  }
239  }
240 
247  @Override
248  public void setField(String field) {
249  }
250 
256  // TODO: Document this better.
257  @Override
258  public void addFilter(KeywordQueryFilter filter) {
259  this.filters.add(filter);
260  }
261 
272  @Override
273  public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
274  /*
275  * Do a query using the Solr terms component to find any terms in the
276  * index that match the regex.
277  */
278  final SolrQuery termsQuery = new SolrQuery();
279  termsQuery.setRequestHandler(SEARCH_HANDLER);
280  termsQuery.setTerms(true);
281  termsQuery.setTermsRegexFlag(CASE_INSENSITIVE);
282  termsQuery.setTermsRegex(searchTerm);
283  termsQuery.addTermsField(SEARCH_FIELD);
284  termsQuery.setTimeAllowed(TERMS_SEARCH_TIMEOUT);
285  termsQuery.setShowDebugInfo(DEBUG_FLAG);
286  termsQuery.setTermsLimit(MAX_TERMS_QUERY_RESULTS);
287  List<Term> terms = KeywordSearch.getServer().queryTerms(termsQuery).getTerms(SEARCH_FIELD);
288  /*
289  * Do a term query for each term that matched the regex.
290  */
291  QueryResults results = new QueryResults(this);
292  for (Term term : terms) {
293  /*
294  * If searching for credit card account numbers, do a Luhn check on
295  * the term and discard it if it does not pass.
296  */
297  if (originalKeyword.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
298  Matcher matcher = CREDIT_CARD_NUM_PATTERN.matcher(term.getTerm());
299  if (false == matcher.find()
300  || false == CreditCardValidator.isValidCCN(matcher.group("ccn"))) {
301  continue;
302  }
303  }
304 
305  /*
306  * Do an ordinary query with the escaped term and convert the query
307  * results into a single list of keyword hits without duplicates.
308  *
309  * Note that the filters field appears to be unused. There is an old
310  * comment here, what does it mean? "Note: we can't set filter query
311  * on terms query but setting filter query on fileResults query will
312  * yield the same result." The filter is NOT being added to the term
313  * query.
314  */
315  String escapedTerm = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());
316  LuceneQuery termQuery = new LuceneQuery(keywordList, new Keyword(escapedTerm, true, true));
317  filters.forEach(termQuery::addFilter); // This appears to be unused
318  QueryResults termQueryResult = termQuery.performQuery();
319  Set<KeywordHit> termHits = new HashSet<>();
320  for (Keyword word : termQueryResult.getKeywords()) {
321  termHits.addAll(termQueryResult.getResults(word));
322  }
323  results.addResult(new Keyword(term.getTerm(), false, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), new ArrayList<>(termHits));
324  }
325  return results;
326  }
327 
328  @Override
329  public BlackboardArtifact writeSingleFileHitsToBlackBoard(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName) {
330  /*
331  * Create either a "plain vanilla" keyword hit artifact with keyword and
332  * regex attributes, or a credit card account artifact with attributes
333  * parsed from from the snippet for the hit and looked up based on the
334  * parsed bank identifcation number.
335  */
336  BlackboardArtifact newArtifact;
337  Collection<BlackboardAttribute> attributes = new ArrayList<>();
338  if (originalKeyword.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
339  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm()));
340  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, originalKeyword.getSearchTerm()));
341 
342  try {
343  newArtifact = content.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
344 
345  } catch (TskCoreException ex) {
346  LOGGER.log(Level.SEVERE, "Error adding artifact for keyword hit to blackboard", ex); //NON-NLS
347  return null;
348  }
349  } else {
350  /*
351  * Parse the credit card account attributes from the snippet for the
352  * hit.
353  */
354  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ACCOUNT_TYPE, MODULE_NAME, Account.Type.CREDIT_CARD.name()));
355  Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap = new HashMap<>();
356  Matcher matcher = CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
357  if (matcher.find()) {
358  parseTrack1Data(parsedTrackAttributeMap, matcher);
359  }
360  matcher = CREDIT_CARD_TRACK2_PATTERN.matcher(hit.getSnippet());
361  if (matcher.find()) {
362  parseTrack2Data(parsedTrackAttributeMap, matcher);
363  }
364  final BlackboardAttribute ccnAttribute = parsedTrackAttributeMap.get(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_CARD_NUMBER));
365  if (ccnAttribute == null || StringUtils.isBlank(ccnAttribute.getValueString())) {
366  if (hit.isArtifactHit()) {
367  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", searchTerm, hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS
368  } else {
369  LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", searchTerm, hit.getSnippet(), hit.getContentID())); //NON-NLS
370  }
371  return null;
372  }
373  attributes.addAll(parsedTrackAttributeMap.values());
374 
375  /*
376  * Look up the bank name, scheme, etc. attributes for the bank
377  * indentification number (BIN).
378  */
379  final int bin = Integer.parseInt(ccnAttribute.getValueString().substring(0, 8));
380  CreditCards.BankIdentificationNumber binInfo = CreditCards.getBINInfo(bin);
381  if (binInfo != null) {
382  binInfo.getScheme().ifPresent(scheme
383  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_SCHEME, MODULE_NAME, scheme)));
384  binInfo.getCardType().ifPresent(cardType
385  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_TYPE, MODULE_NAME, cardType)));
386  binInfo.getBrand().ifPresent(brand
387  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BRAND_NAME, MODULE_NAME, brand)));
388  binInfo.getBankName().ifPresent(bankName
389  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BANK_NAME, MODULE_NAME, bankName)));
390  binInfo.getBankPhoneNumber().ifPresent(phoneNumber
391  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PHONE_NUMBER, MODULE_NAME, phoneNumber)));
392  binInfo.getBankURL().ifPresent(url
393  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_URL, MODULE_NAME, url)));
394  binInfo.getCountry().ifPresent(country
395  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_COUNTRY, MODULE_NAME, country)));
396  binInfo.getBankCity().ifPresent(city
397  -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CITY, MODULE_NAME, city)));
398  }
399 
400  /*
401  * If the hit is from unused or unallocated space, record the Solr
402  * document id to support showing just the chunk that contained the
403  * hit.
404  */
405  if (content instanceof AbstractFile) {
406  AbstractFile file = (AbstractFile) content;
407  if (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS
408  || file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) {
409  attributes.add(new BlackboardAttribute(KEYWORD_SEARCH_DOCUMENT_ID, MODULE_NAME, hit.getSolrDocumentId()));
410  }
411  }
412 
413  /*
414  * Create an account artifact.
415  */
416  try {
417  newArtifact = content.newArtifact(ARTIFACT_TYPE.TSK_ACCOUNT);
418  } catch (TskCoreException ex) {
419  LOGGER.log(Level.SEVERE, "Error adding artifact for account to blackboard", ex); //NON-NLS
420  return null;
421  }
422  }
423 
424  if (StringUtils.isNotBlank(listName)) {
425  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
426  }
427  if (snippet != null) {
428  attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
429  }
430 
431  hit.getArtifactID().ifPresent(
432  artifactID -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
433  );
434 
435  // TermsComponentQuery is now being used exclusively for substring searches.
436  attributes.add(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
437 
438  try {
439  newArtifact.addAttributes(attributes);
440  return newArtifact;
441  } catch (TskCoreException e) {
442  LOGGER.log(Level.SEVERE, "Error adding bb attributes for terms search artifact", e); //NON-NLS
443  return null;
444  }
445  }
446 
455  static private void parseTrack2Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributesMap, Matcher matcher) {
456  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_NUMBER, "accountNumber", matcher);
457  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_EXPIRATION, "expiration", matcher);
458  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_SERVICE_CODE, "serviceCode", matcher);
459  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_DISCRETIONARY, "discretionary", matcher);
460  addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_LRC, "LRC", matcher);
461  }
462 
472  static private void parseTrack1Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, Matcher matcher) {
473  parseTrack2Data(attributeMap, matcher);
474  addAttributeIfNotAlreadyCaptured(attributeMap, ATTRIBUTE_TYPE.TSK_NAME_PERSON, "name", matcher);
475  }
476 
488  static private void addAttributeIfNotAlreadyCaptured(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) {
489  BlackboardAttribute.Type type = new BlackboardAttribute.Type(attrType);
490  attributeMap.computeIfAbsent(type, (BlackboardAttribute.Type t) -> {
491  String value = matcher.group(groupName);
492  if (attrType.equals(ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) {
493  value = CharMatcher.anyOf(" -").removeFrom(value);
494  }
495  if (StringUtils.isNotBlank(value)) {
496  return new BlackboardAttribute(attrType, MODULE_NAME, value);
497  }
498  return null;
499  });
500  }
501 
502 }

Copyright © 2012-2016 Basis Technology. Generated on: Fri Sep 29 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.