19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import java.util.ArrayList;
 
   22 import java.util.Collection;
 
   23 import java.util.List;
 
   25 import java.util.logging.Level;
 
   26 import org.apache.commons.lang3.StringUtils;
 
   27 import org.apache.commons.lang3.math.NumberUtils;
 
   28 import org.apache.solr.client.solrj.SolrQuery;
 
   29 import org.apache.solr.client.solrj.SolrRequest;
 
   30 import org.apache.solr.client.solrj.SolrRequest.METHOD;
 
   31 import org.apache.solr.client.solrj.response.QueryResponse;
 
   32 import org.apache.solr.common.SolrDocument;
 
   33 import org.apache.solr.common.SolrDocumentList;
 
   34 import org.apache.solr.common.params.CursorMarkParams;
 
   39 import org.
sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
 
   41 import org.
sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
 
   50 class LuceneQuery 
implements KeywordSearchQuery {
 
   52     private static final Logger logger = Logger.getLogger(LuceneQuery.class.getName());
 
   53     private String keywordStringEscaped;
 
   54     private boolean isEscaped;
 
   55     private final Keyword originalKeyword;
 
   56     private final KeywordList keywordList;
 
   57     private final List<KeywordQueryFilter> filters = 
new ArrayList<>();
 
   58     private String field = null;
 
   59     private static final int MAX_RESULTS_PER_CURSOR_MARK = 512;
 
   60     static final int SNIPPET_LENGTH = 50;
 
   61     static final String HIGHLIGHT_FIELD = Server.Schema.TEXT.toString();
 
   63     private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
 
   70     LuceneQuery(KeywordList keywordList, Keyword keyword) {
 
   71         this.keywordList = keywordList;
 
   72         this.originalKeyword = keyword;
 
   73         this.keywordStringEscaped = this.originalKeyword.getSearchTerm();
 
   77     public void addFilter(KeywordQueryFilter filter) {
 
   78         this.filters.add(filter);
 
   82     public void setField(String field) {
 
   87     public void setSubstringQuery() {
 
   90         keywordStringEscaped += 
"*";
 
   94     public void escape() {
 
   95         keywordStringEscaped = KeywordSearchUtil.escapeLuceneQuery(originalKeyword.getSearchTerm());
 
  100     public boolean isEscaped() {
 
  105     public boolean isLiteral() {
 
  106         return originalKeyword.searchTermIsLiteral();
 
  110     public String getEscapedQueryString() {
 
  111         return this.keywordStringEscaped;
 
  115     public String getQueryString() {
 
  116         return this.originalKeyword.getSearchTerm();
 
  120     public KeywordList getKeywordList() {
 
  125     public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
 
  127         final Server solrServer = KeywordSearch.getServer();
 
  128         double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
 
  130         SolrQuery solrQuery = createAndConfigureSolrQuery(KeywordSearchSettings.getShowSnippets());
 
  132         final String strippedQueryString = StringUtils.strip(getQueryString(), 
"\"");
 
  134         String cursorMark = CursorMarkParams.CURSOR_MARK_START;
 
  135         boolean allResultsProcessed = 
false;
 
  136         List<KeywordHit> matches = 
new ArrayList<>();
 
  137         LanguageSpecificContentQueryHelper.QueryResults languageSpecificQueryResults = 
new LanguageSpecificContentQueryHelper.QueryResults();
 
  138         while (!allResultsProcessed) {
 
  139             solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
 
  140             QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
 
  141             SolrDocumentList resultList = response.getResults();
 
  143             Map<String, Map<String, List<String>>> highlightResponse = response.getHighlighting();
 
  145             if (2.2 <= indexSchemaVersion) {
 
  146                 languageSpecificQueryResults.highlighting.putAll(response.getHighlighting());
 
  149             for (SolrDocument resultDoc : resultList) {
 
  150                 if (2.2 <= indexSchemaVersion) {
 
  151                     Object language = resultDoc.getFieldValue(Server.Schema.LANGUAGE.toString());
 
  152                     if (language != null) {
 
  153                         LanguageSpecificContentQueryHelper.updateQueryResults(languageSpecificQueryResults, resultDoc);
 
  164                     final String docId = resultDoc.getFieldValue(Server.Schema.ID.toString()).toString();
 
  165                     final Integer chunkSize = (Integer) resultDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
 
  166                     final Collection<Object> content = resultDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
 
  169                     if (resultDoc.containsKey(Server.Schema.LANGUAGE.toString())) {
 
  173                     if (indexSchemaVersion < 2.0) {
 
  175                         matches.add(createKeywordtHit(highlightResponse, docId));
 
  178                         for (Object content_obj : content) {
 
  179                             String content_str = (String) content_obj;
 
  181                             int firstOccurence = StringUtils.indexOfIgnoreCase(content_str, strippedQueryString);
 
  183                             if (chunkSize == null || chunkSize == 0 || (firstOccurence > -1 && firstOccurence < chunkSize)) {
 
  184                                 matches.add(createKeywordtHit(highlightResponse, docId));
 
  188                 } 
catch (TskException ex) {
 
  189                     throw new KeywordSearchModuleException(ex);
 
  192             String nextCursorMark = response.getNextCursorMark();
 
  193             if (cursorMark.equals(nextCursorMark)) {
 
  194                 allResultsProcessed = 
true;
 
  196             cursorMark = nextCursorMark;
 
  199         List<KeywordHit> mergedMatches;
 
  200         if (2.2 <= indexSchemaVersion) {
 
  201             mergedMatches = LanguageSpecificContentQueryHelper.mergeKeywordHits(matches, originalKeyword, languageSpecificQueryResults);
 
  203             mergedMatches = matches;
 
  206         QueryResults results = 
new QueryResults(
this);
 
  208         results.addResult(
new Keyword(originalKeyword.getSearchTerm(), 
true, 
true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), mergedMatches);
 
  214     public boolean validate() {
 
  215         return StringUtils.isNotBlank(originalKeyword.getSearchTerm());
 
  235     public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName) {
 
  236         final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
 
  238         Collection<BlackboardAttribute> attributes = 
new ArrayList<>();
 
  239         BlackboardArtifact bba;
 
  241             bba = content.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
 
  242         } 
catch (TskCoreException e) {
 
  243             logger.log(Level.WARNING, 
"Error adding bb artifact for keyword hit", e); 
 
  247         if (snippet != null) {
 
  248             attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
 
  250         attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm()));
 
  251         if (StringUtils.isNotBlank(listName)) {
 
  252             attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
 
  255         if (originalKeyword != null) {
 
  256             BlackboardAttribute.ATTRIBUTE_TYPE selType = originalKeyword.getArtifactAttributeType();
 
  257             if (selType != null) {
 
  258                 attributes.add(
new BlackboardAttribute(selType, MODULE_NAME, foundKeyword.getSearchTerm()));
 
  261             if (originalKeyword.searchTermIsWholeWord()) {
 
  262                 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.LITERAL.ordinal()));
 
  264                 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
 
  268         hit.getArtifactID().ifPresent(artifactID
 
  269                 -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
 
  273             bba.addAttributes(attributes); 
 
  275         } 
catch (TskCoreException e) {
 
  276             logger.log(Level.WARNING, 
"Error adding bb attributes to artifact", e); 
 
  289     private SolrQuery createAndConfigureSolrQuery(
boolean snippets) 
throws NoOpenCoreException, KeywordSearchModuleException {
 
  290         double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
 
  292         SolrQuery q = 
new SolrQuery();
 
  293         q.setShowDebugInfo(DEBUG); 
 
  295         String queryStr = originalKeyword.searchTermIsLiteral()
 
  296             ? KeywordSearchUtil.quoteQuery(keywordStringEscaped) : keywordStringEscaped;
 
  301             queryStr = field + 
":" + queryStr;
 
  302             q.setQuery(queryStr);
 
  303         } 
else if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) {
 
  304             q.setQuery(LanguageSpecificContentQueryHelper.expandQueryString(queryStr));
 
  306             q.setQuery(queryStr);
 
  308         q.setRows(MAX_RESULTS_PER_CURSOR_MARK);
 
  310         q.setSort(SolrQuery.SortClause.asc(Server.Schema.ID.toString()));
 
  312         q.setFields(Server.Schema.ID.toString(),
 
  313                 Server.Schema.CHUNK_SIZE.toString(),
 
  314                 Server.Schema.CONTENT_STR.toString());
 
  316         if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) {
 
  317             q.addField(Server.Schema.LANGUAGE.toString());
 
  318             LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywordStringEscaped);
 
  321         for (KeywordQueryFilter filter : filters) {
 
  322             q.addFilterQuery(filter.toString());
 
  326             configurwQueryForHighlighting(q);
 
  338     private static void configurwQueryForHighlighting(SolrQuery q) 
throws NoOpenCoreException {
 
  339         double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
 
  340         if (2.2 <= indexSchemaVersion) {
 
  341             for (Server.Schema field : LanguageSpecificContentQueryHelper.getQueryFields()) {
 
  342                 q.addHighlightField(field.toString());
 
  345             q.addHighlightField(HIGHLIGHT_FIELD);
 
  348         q.setHighlightSnippets(1);
 
  349         q.setHighlightFragsize(SNIPPET_LENGTH);
 
  352         q.setParam(
"hl.useFastVectorHighlighter", 
"on"); 
 
  353         q.setParam(
"hl.tag.pre", 
"«"); 
 
  354         q.setParam(
"hl.tag.post", 
"«"); 
 
  355         q.setParam(
"hl.fragListBuilder", 
"simple"); 
 
  358         q.setParam(
"hl.fragCharSize", Integer.toString(q.getQuery().length())); 
 
  362         q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); 
 
  365     private KeywordHit createKeywordtHit(Map<String, Map<String, List<String>>> highlightResponse, String docId) 
throws TskException {
 
  371         if (KeywordSearchSettings.getShowSnippets()) {
 
  372             List<String> snippetList = highlightResponse.get(docId).get(Server.Schema.TEXT.toString());
 
  374             if (snippetList != null) {
 
  375                 snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
 
  379         return new KeywordHit(docId, snippet, originalKeyword.getSearchTerm());
 
  396     static String querySnippet(String query, 
long solrObjectId, 
boolean isRegex, 
boolean group) 
throws NoOpenCoreException {
 
  397         return querySnippet(query, solrObjectId, 0, isRegex, group);
 
  415     static String querySnippet(String query, 
long solrObjectId, 
int chunkID, 
boolean isRegex, 
boolean group) 
throws NoOpenCoreException {
 
  416         SolrQuery q = 
new SolrQuery();
 
  417         q.setShowDebugInfo(DEBUG); 
 
  421             queryStr = HIGHLIGHT_FIELD + 
":" 
  422                     + (group ? KeywordSearchUtil.quoteQuery(query)
 
  429             queryStr = KeywordSearchUtil.quoteQuery(query);
 
  431         q.setQuery(queryStr);
 
  433         String contentIDStr = (chunkID == 0)
 
  434                 ? Long.toString(solrObjectId)
 
  435                 : Server.getChunkIdString(solrObjectId, chunkID);
 
  436         String idQuery = Server.Schema.ID.toString() + 
":" + KeywordSearchUtil.escapeLuceneQuery(contentIDStr);
 
  437         q.addFilterQuery(idQuery);
 
  439         configurwQueryForHighlighting(q);
 
  441         Server solrServer = KeywordSearch.getServer();
 
  444             QueryResponse response = solrServer.query(q, METHOD.POST);
 
  445             Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
 
  446             Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIDStr);
 
  447             if (responseHighlightID == null) {
 
  450             double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
 
  451             List<String> contentHighlights;
 
  452             if (2.2 <= indexSchemaVersion) {
 
  453                 contentHighlights = LanguageSpecificContentQueryHelper.getHighlights(responseHighlightID).orElse(null);
 
  455                 contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
 
  457             if (contentHighlights == null) {
 
  461                 return EscapeUtil.unEscapeHtml(contentHighlights.get(0)).trim();
 
  463         } 
catch (NoOpenCoreException ex) {
 
  464             logger.log(Level.SEVERE, 
"Error executing Lucene Solr Query: " + query + 
". Solr doc id " + solrObjectId + 
", chunkID " + chunkID, ex); 
 
  466         } 
catch (KeywordSearchModuleException ex) {
 
  467             logger.log(Level.SEVERE, 
"Error executing Lucene Solr Query: " + query + 
". Solr doc id " + solrObjectId + 
", chunkID " + chunkID, ex);