19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import com.google.common.base.CharMatcher;
 
   22 import com.google.common.collect.ArrayListMultimap;
 
   23 import com.google.common.collect.ListMultimap;
 
   24 import java.util.ArrayList;
 
   25 import java.util.Collection;
 
   26 import java.util.HashMap;
 
   27 import java.util.HashSet;
 
   28 import java.util.List;
 
   31 import java.util.logging.Level;
 
   32 import java.util.regex.Matcher;
 
   33 import java.util.regex.Pattern;
 
   34 import org.apache.commons.lang.StringUtils;
 
   35 import org.apache.commons.validator.routines.DomainValidator;
 
   36 import org.apache.solr.client.solrj.SolrQuery;
 
   37 import org.apache.solr.client.solrj.SolrQuery.SortClause;
 
   38 import org.apache.solr.client.solrj.SolrRequest;
 
   39 import org.apache.solr.client.solrj.response.QueryResponse;
 
   40 import org.apache.solr.common.SolrDocument;
 
   41 import org.apache.solr.common.SolrDocumentList;
 
   42 import org.apache.solr.common.params.CursorMarkParams;
 
   43 import org.openide.util.NbBundle;
 
   75 final class RegexQuery 
implements KeywordSearchQuery {
 
   77     public static final Logger LOGGER = Logger.getLogger(RegexQuery.class.getName());
 
   78     private final List<KeywordQueryFilter> filters = 
new ArrayList<>();
 
   80     private final KeywordList keywordList;
 
   81     private final Keyword originalKeyword; 
 
   82     private String field = Server.Schema.CONTENT_STR.toString();
 
   83     private final String keywordString;
 
   84     static final private int MAX_RESULTS_PER_CURSOR_MARK = 512;
 
   85     private boolean escaped;
 
   86     private String escapedQuery;
 
   88     private final int MIN_EMAIL_ADDR_LENGTH = 8;
 
   90     private final ListMultimap<Keyword, KeywordHit> hitsMultiMap = ArrayListMultimap.create();
 
  100     private static final CharSequence[] UNSUPPORTED_CHARS = {
"\\d", 
"\\D", 
"\\w", 
"\\W", 
"\\s", 
"\\S", 
"\\n",
 
  101         "\\t", 
"\\r", 
"\\f", 
"\\a", 
"\\e", 
"\\v", 
"\\V", 
"\\h", 
"\\H", 
"\\p"}; 
 
  103     private boolean queryStringContainsWildcardPrefix = 
false;
 
  104     private boolean queryStringContainsWildcardSuffix = 
false;
 
  112     RegexQuery(KeywordList keywordList, Keyword keyword) {
 
  113         this.keywordList = keywordList;
 
  114         this.originalKeyword = keyword;
 
  115         this.keywordString = keyword.getSearchTerm();
 
  117         if (this.keywordString.startsWith(
".*")) {
 
  118             this.queryStringContainsWildcardPrefix = 
true;
 
  121         if (this.keywordString.endsWith(
".*")) {
 
  122             this.queryStringContainsWildcardSuffix = 
true;
 
  127     public KeywordList getKeywordList() {
 
  132     public boolean validate() {
 
  133         if (keywordString.isEmpty()) {
 
  138             Pattern.compile(keywordString, Pattern.UNICODE_CHARACTER_CLASS);
 
  144             for (CharSequence c : UNSUPPORTED_CHARS) {
 
  145                 if (keywordString.contains(c)) {
 
  150         } 
catch (IllegalArgumentException ex) {
 
  156     public QueryResults performQuery() throws NoOpenCoreException {
 
  158         final Server solrServer = KeywordSearch.getServer();
 
  159         SolrQuery solrQuery = 
new SolrQuery();
 
  178         solrQuery.setQuery((field == null ? Server.Schema.CONTENT_STR.toString() : field) + 
":/" 
  179                 + (queryStringContainsWildcardPrefix ? 
"" : 
".*") + getQueryString()
 
  180                 + (queryStringContainsWildcardSuffix ? 
"" : 
".*") + 
"/");
 
  183         solrQuery.setFields(Server.Schema.CONTENT_STR.toString(), Server.Schema.ID.toString(), Server.Schema.CHUNK_SIZE.toString());
 
  186                 .map(KeywordQueryFilter::toString)
 
  187                 .forEach(solrQuery::addFilterQuery);
 
  189         solrQuery.setRows(MAX_RESULTS_PER_CURSOR_MARK);
 
  191         solrQuery.setSort(SortClause.asc(Server.Schema.ID.toString()));
 
  193         String cursorMark = CursorMarkParams.CURSOR_MARK_START;
 
  194         SolrDocumentList resultList ;
 
  195         boolean allResultsProcessed = 
false;
 
  197         while (!allResultsProcessed) {
 
  199                 solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
 
  200                 QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
 
  201                 resultList = response.getResults();
 
  203                 for (SolrDocument resultDoc : resultList) {
 
  205                         List<KeywordHit> keywordHits = createKeywordHits(resultDoc);
 
  206                         for (KeywordHit hit : keywordHits) {
 
  207                             hitsMultiMap.put(
new Keyword(hit.getHit(), 
true, 
true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), hit);
 
  209                     } 
catch (TskException ex) { 
 
  214                 String nextCursorMark = response.getNextCursorMark();
 
  215                 if (cursorMark.equals(nextCursorMark)) {
 
  216                     allResultsProcessed = 
true;
 
  218                 cursorMark = nextCursorMark;
 
  219             } 
catch (KeywordSearchModuleException ex) {
 
  220                 LOGGER.log(Level.SEVERE, 
"Error executing Regex Solr Query: " + keywordString, ex); 
 
  221                 MessageNotifyUtil.Notify.error(NbBundle.getMessage(Server.class, 
"Server.query.exception.msg", keywordString), ex.getCause().getMessage());
 
  224         QueryResults results = 
new QueryResults(
this);
 
  225         for (Keyword k : hitsMultiMap.keySet()) {
 
  226             results.addResult(k, hitsMultiMap.get(k));
 
  231     private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) 
throws TskException {
 
  233         List<KeywordHit> hits = 
new ArrayList<>();
 
  234         final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
 
  235         final Integer chunkSize = (Integer) solrDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
 
  237         final Collection<Object> content_str = solrDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
 
  239         final Pattern pattern = Pattern.compile(keywordString);
 
  240         for (Object content_obj : content_str) {
 
  241             String content = (String) content_obj;
 
  242             Matcher hitMatcher = pattern.matcher(content);
 
  245             while (hitMatcher.find(offset)) {
 
  246                 StringBuilder snippet = 
new StringBuilder();
 
  251                 if (chunkSize != null && hitMatcher.start() >= chunkSize) {
 
  255                 String hit = hitMatcher.group();
 
  257                 offset = hitMatcher.end();
 
  265                 if (!queryStringContainsWildcardSuffix
 
  266                         && (originalKeyword.getArtifactAttributeType() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PHONE_NUMBER
 
  267                         || originalKeyword.getArtifactAttributeType() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_IP_ADDRESS)) {
 
  268                     if (originalKeyword.getArtifactAttributeType() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PHONE_NUMBER) {
 
  270                         hit = hit.replaceAll(
"^[^0-9\\(]", 
"");
 
  273                         hit = hit.replaceAll(
"^[^0-9]", 
"");
 
  276                     hit = hit.replaceAll(
"[^0-9]$", 
"");
 
  279                 if (originalKeyword.getArtifactAttributeType() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_EMAIL) {
 
  282                     if (hit.length() < MIN_EMAIL_ADDR_LENGTH
 
  283                             || !DomainValidator.getInstance(
true).isValidTld(hit.substring(hit.lastIndexOf(
'.')))) {
 
  292                 if (originalKeyword.getArtifactAttributeType() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
 
  293                     Matcher ccnMatcher = CREDIT_CARD_NUM_PATTERN.matcher(hit);
 
  294                     if (ccnMatcher.find()) {
 
  295                         final String ccn = CharMatcher.anyOf(
" -").removeFrom(ccnMatcher.group(
"ccn"));
 
  296                         if (
false == TermsComponentQuery.CREDIT_CARD_NUM_LUHN_CHECK.isValid(ccn)) {
 
  308                 int maxIndex = content.length() - 1;
 
  309                 snippet.append(content.substring(Integer.max(0, hitMatcher.start() - 20), Integer.max(0, hitMatcher.start())));
 
  310                 snippet.appendCodePoint(171);
 
  312                 snippet.appendCodePoint(171);
 
  313                 snippet.append(content.substring(Integer.min(maxIndex, hitMatcher.end()), Integer.min(maxIndex, hitMatcher.end() + 20)));
 
  315                 hits.add(
new KeywordHit(docId, snippet.toString(), hit));
 
  322     public void addFilter(KeywordQueryFilter filter) {
 
  323         this.filters.add(filter);
 
  327     public void setField(String field) {
 
  332     public void setSubstringQuery() {
 
  336     synchronized public void escape() {
 
  337         if (isEscaped() == 
false) {
 
  338             escapedQuery = KeywordSearchUtil.escapeLuceneQuery(keywordString);
 
  344     synchronized public boolean isEscaped() {
 
  349     public boolean isLiteral() {
 
  354     public String getQueryString() {
 
  355         return originalKeyword.getSearchTerm();
 
  359     synchronized public String getEscapedQueryString() {
 
  360         if (
false == isEscaped()) {
 
  376     private String getDocumentIds(Keyword keyword, KeywordHit hit) {
 
  377         Set<String> documentIds = 
new HashSet<>();
 
  379         for (KeywordHit h : hitsMultiMap.get(keyword)) {
 
  382             if (h.getSolrObjectId() == hit.getSolrObjectId() && !documentIds.contains(h.getSolrDocumentId())) {
 
  383                 documentIds.add(h.getSolrDocumentId());
 
  387         return StringUtils.join(documentIds, 
",");
 
  407     public KeywordCachedArtifact writeSingleFileHitsToBlackBoard(Keyword foundKeyword, KeywordHit hit, String snippet, String listName) {
 
  408         final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
 
  416         BlackboardArtifact newArtifact;
 
  417         Collection<BlackboardAttribute> attributes = 
new ArrayList<>();
 
  418         if (originalKeyword.getArtifactAttributeType() != BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
 
  419             attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm()));
 
  420             attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, getQueryString()));
 
  422                 newArtifact = hit.getContent().newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT);
 
  424             } 
catch (TskCoreException ex) {
 
  425                 LOGGER.log(Level.SEVERE, 
"Error adding artifact for keyword hit to blackboard", ex); 
 
  433             attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ACCOUNT_TYPE, MODULE_NAME, Account.Type.CREDIT_CARD.name()));
 
  434             Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap = 
new HashMap<>();
 
  435             Matcher matcher = TermsComponentQuery.CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
 
  436             if (matcher.find()) {
 
  437                 parseTrack1Data(parsedTrackAttributeMap, matcher);
 
  439             matcher = CREDIT_CARD_TRACK2_PATTERN.matcher(hit.getSnippet());
 
  440             if (matcher.find()) {
 
  441                 parseTrack2Data(parsedTrackAttributeMap, matcher);
 
  443             final BlackboardAttribute ccnAttribute = parsedTrackAttributeMap.get(
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER));
 
  444             if (ccnAttribute == null || StringUtils.isBlank(ccnAttribute.getValueString())) {
 
  445                 if (hit.isArtifactHit()) {
 
  446                     LOGGER.log(Level.SEVERE, String.format(
"Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getArtifact().getArtifactID())); 
 
  448                     LOGGER.log(Level.SEVERE, String.format(
"Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getContent().getId())); 
 
  452             attributes.addAll(parsedTrackAttributeMap.values());
 
  458             final int bin = Integer.parseInt(ccnAttribute.getValueString().substring(0, 8));
 
  459             CreditCards.BankIdentificationNumber binInfo = CreditCards.getBINInfo(bin);
 
  460             if (binInfo != null) {
 
  461                 binInfo.getScheme().ifPresent(scheme
 
  462                         -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_SCHEME, MODULE_NAME, scheme)));
 
  463                 binInfo.getCardType().ifPresent(cardType
 
  464                         -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_TYPE, MODULE_NAME, cardType)));
 
  465                 binInfo.getBrand().ifPresent(brand
 
  466                         -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_BRAND_NAME, MODULE_NAME, brand)));
 
  467                 binInfo.getBankName().ifPresent(bankName
 
  468                         -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_BANK_NAME, MODULE_NAME, bankName)));
 
  469                 binInfo.getBankPhoneNumber().ifPresent(phoneNumber
 
  470                         -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PHONE_NUMBER, MODULE_NAME, phoneNumber)));
 
  471                 binInfo.getBankURL().ifPresent(url
 
  472                         -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL, MODULE_NAME, url)));
 
  473                 binInfo.getCountry().ifPresent(country
 
  474                         -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_COUNTRY, MODULE_NAME, country)));
 
  475                 binInfo.getBankCity().ifPresent(city
 
  476                         -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CITY, MODULE_NAME, city)));
 
  484             if (hit.getContent() instanceof AbstractFile) {
 
  485                 AbstractFile file = (AbstractFile) hit.getContent();
 
  486                 if (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS
 
  487                         || file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) {
 
  488                     attributes.add(
new BlackboardAttribute(KEYWORD_SEARCH_DOCUMENT_ID, MODULE_NAME, hit.getSolrDocumentId()));
 
  496                 newArtifact = hit.getContent().newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_ACCOUNT);
 
  497             } 
catch (TskCoreException ex) {
 
  498                 LOGGER.log(Level.SEVERE, 
"Error adding artifact for account to blackboard", ex); 
 
  503         if (StringUtils.isNotBlank(listName)) {
 
  504             attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
 
  506         if (snippet != null) {
 
  507             attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
 
  509         if (hit.isArtifactHit()) {
 
  510             attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, hit.getArtifact().getArtifactID()));
 
  513         attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
 
  516             newArtifact.addAttributes(attributes);
 
  517             KeywordCachedArtifact writeResult = 
new KeywordCachedArtifact(newArtifact);
 
  518             writeResult.add(attributes);
 
  520         } 
catch (TskCoreException e) {
 
  521             LOGGER.log(Level.SEVERE, 
"Error adding bb attributes for terms search artifact", e); 
 
  534     static private void parseTrack2Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributesMap, Matcher matcher) {
 
  535         addAttributeIfNotAlreadyCaptured(attributesMap, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER, 
"accountNumber", matcher);
 
  536         addAttributeIfNotAlreadyCaptured(attributesMap, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_EXPIRATION, 
"expiration", matcher);
 
  537         addAttributeIfNotAlreadyCaptured(attributesMap, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_SERVICE_CODE, 
"serviceCode", matcher);
 
  538         addAttributeIfNotAlreadyCaptured(attributesMap, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_DISCRETIONARY, 
"discretionary", matcher);
 
  539         addAttributeIfNotAlreadyCaptured(attributesMap, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_LRC, 
"LRC", matcher);
 
  551     static private void parseTrack1Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, Matcher matcher) {
 
  552         parseTrack2Data(attributeMap, matcher);
 
  553         addAttributeIfNotAlreadyCaptured(attributeMap, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_NAME_PERSON, 
"name", matcher);
 
  567     static private void addAttributeIfNotAlreadyCaptured(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, BlackboardAttribute.ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) {
 
  568         BlackboardAttribute.Type type = 
new BlackboardAttribute.Type(attrType);
 
  569         attributeMap.computeIfAbsent(type, (BlackboardAttribute.Type t) -> {
 
  570             String value = matcher.group(groupName);
 
  571             if (attrType.equals(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) {
 
  572                 attributeMap.put(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD),
 
  573                         new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, value));
 
  574                 value = CharMatcher.anyOf(
" -").removeFrom(value);
 
  576             if (StringUtils.isNotBlank(value)) {
 
  577                 return new BlackboardAttribute(attrType, MODULE_NAME, value);