19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import java.util.ArrayList;
 
   22 import java.util.Collection;
 
   23 import java.util.Collections;
 
   24 import java.util.Comparator;
 
   25 import java.util.List;
 
   28 import java.util.TreeSet;
 
   29 import java.util.logging.Level;
 
   31 import org.apache.solr.client.solrj.SolrQuery;
 
   32 import org.apache.solr.client.solrj.SolrRequest.METHOD;
 
   33 import org.apache.solr.client.solrj.response.QueryResponse;
 
   34 import org.apache.solr.common.SolrDocument;
 
   35 import org.apache.solr.common.SolrDocumentList;
 
   51 class LuceneQuery 
implements KeywordSearchQuery {
 
   53     private static final Logger logger = Logger.getLogger(LuceneQuery.class.getName());
 
   54     private final String keywordString; 
 
   55     private String keywordStringEscaped;
 
   56     private boolean isEscaped;
 
   57     private Keyword keywordQuery = null;
 
   58     private KeywordList keywordList = null;
 
   59     private final List<KeywordQueryFilter> filters = 
new ArrayList<>();
 
   60     private String field = null;
 
   61     private static final int MAX_RESULTS = 20000;
 
   62     static final int SNIPPET_LENGTH = 50;
 
   64     static final String HIGHLIGHT_FIELD_LITERAL = Server.Schema.TEXT.toString();
 
   65     static final String HIGHLIGHT_FIELD_REGEX = Server.Schema.TEXT.toString();
 
   69     private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
 
   76     public LuceneQuery(KeywordList keywordList, Keyword keywordQuery) {
 
   77         this.keywordList = keywordList;
 
   78         this.keywordQuery = keywordQuery;
 
   82         this.keywordString = keywordQuery.getQuery();
 
   83         this.keywordStringEscaped = this.keywordString;
 
   87     public void addFilter(KeywordQueryFilter filter) {
 
   88         this.filters.add(filter);
 
   92     public void setField(String field) {
 
   97     public void setSubstringQuery() {
 
  100         keywordStringEscaped = keywordStringEscaped + 
"*";
 
  104     public void escape() {
 
  105         keywordStringEscaped = KeywordSearchUtil.escapeLuceneQuery(keywordString);
 
  110     public boolean isEscaped() {
 
  115     public boolean isLiteral() {
 
  120     public String getEscapedQueryString() {
 
  121         return this.keywordStringEscaped;
 
  125     public String getQueryString() {
 
  126         return this.keywordString;
 
  130     public QueryResults performQuery() throws NoOpenCoreException {
 
  131         QueryResults results = 
new QueryResults(
this, keywordList);
 
  133         boolean showSnippets = KeywordSearchSettings.getShowSnippets();
 
  134         results.addResult(
new Keyword(keywordString, 
true), performLuceneQuery(showSnippets));
 
  140     public boolean validate() {
 
  141         return keywordString != null && !keywordString.equals(
"");
 
  145     public KeywordCachedArtifact writeSingleFileHitsToBlackBoard(String termHit, KeywordHit hit, String snippet, String listName) {
 
  146         final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
 
  148         Collection<BlackboardAttribute> attributes = 
new ArrayList<>();
 
  149         BlackboardArtifact bba;
 
  150         KeywordCachedArtifact writeResult;
 
  152             bba = hit.getContent().newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
 
  153             writeResult = 
new KeywordCachedArtifact(bba);
 
  154         } 
catch (Exception e) {
 
  155             logger.log(Level.WARNING, 
"Error adding bb artifact for keyword hit", e); 
 
  159         if (snippet != null) {
 
  160             attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, snippet));
 
  162         attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, termHit));
 
  163         if ((listName != null) && (listName.equals(
"") == 
false)) {
 
  164             attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME.getTypeID(), MODULE_NAME, listName));
 
  170         if (keywordQuery != null) {
 
  171             BlackboardAttribute.ATTRIBUTE_TYPE selType = keywordQuery.getType();
 
  172             if (selType != null) {
 
  173                 attributes.add(
new BlackboardAttribute(selType.getTypeID(), MODULE_NAME, termHit));
 
  177         if (hit.isArtifactHit()) {
 
  178             attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT.getTypeID(), MODULE_NAME, hit.getArtifact().getArtifactID()));
 
  182             bba.addAttributes(attributes); 
 
  183             writeResult.add(attributes);
 
  185         } 
catch (TskException e) {
 
  186             logger.log(Level.WARNING, 
"Error adding bb attributes to artifact", e); 
 
  199     private List<KeywordHit> performLuceneQuery(
boolean snippets) 
throws NoOpenCoreException {
 
  200         List<KeywordHit> matches = 
new ArrayList<>();
 
  201         boolean allMatchesFetched = 
false;
 
  202         final Server solrServer = KeywordSearch.getServer();
 
  204         SolrQuery q = createAndConfigureSolrQuery(snippets);
 
  207         for (
int start = 0; !allMatchesFetched; start = start + MAX_RESULTS) {
 
  211                 QueryResponse response = solrServer.query(q, METHOD.POST);
 
  212                 SolrDocumentList resultList = response.getResults();
 
  215                 Map<String, Map<String, List<String>>> highlightResponse = response.getHighlighting();
 
  218                 Set<SolrDocument> uniqueSolrDocumentsWithHits = filterOneHitPerDocument(resultList);
 
  220                 allMatchesFetched = start + MAX_RESULTS >= resultList.getNumFound();
 
  222                 SleuthkitCase sleuthkitCase;
 
  224                     sleuthkitCase = Case.getCurrentCase().getSleuthkitCase();
 
  225                 } 
catch (IllegalStateException ex) {
 
  230                 for (SolrDocument resultDoc : uniqueSolrDocumentsWithHits) {
 
  231                     KeywordHit contentHit;
 
  233                         contentHit = createKeywordtHit(resultDoc, highlightResponse, sleuthkitCase);
 
  234                     } 
catch (TskException ex) {
 
  237                     matches.add(contentHit);
 
  240             } 
catch (NoOpenCoreException ex) {
 
  241                 logger.log(Level.WARNING, 
"Error executing Lucene Solr Query: " + keywordString, ex); 
 
  243             } 
catch (KeywordSearchModuleException ex) {
 
  244                 logger.log(Level.WARNING, 
"Error executing Lucene Solr Query: " + keywordString, ex); 
 
  257     private SolrQuery createAndConfigureSolrQuery(
boolean snippets) {
 
  258         SolrQuery q = 
new SolrQuery();
 
  259         q.setShowDebugInfo(DEBUG); 
 
  261         final String groupedQuery = KeywordSearchUtil.quoteQuery(keywordStringEscaped);
 
  262         String theQueryStr = groupedQuery;
 
  265             StringBuilder sb = 
new StringBuilder();
 
  266             sb.append(field).append(
":").append(groupedQuery);
 
  267             theQueryStr = sb.toString();
 
  269         q.setQuery(theQueryStr);
 
  270         q.setRows(MAX_RESULTS);
 
  272         q.setFields(Server.Schema.ID.toString());
 
  274         for (KeywordQueryFilter filter : filters) {
 
  275             q.addFilterQuery(filter.toString());
 
  279             q.addHighlightField(Server.Schema.TEXT.toString());
 
  282             q.setHighlightSnippets(1);
 
  283             q.setHighlightFragsize(SNIPPET_LENGTH);
 
  286             q.setParam(
"hl.useFastVectorHighlighter", 
"on"); 
 
  287             q.setParam(
"hl.tag.pre", 
"«"); 
 
  288             q.setParam(
"hl.tag.post", 
"«"); 
 
  289             q.setParam(
"hl.fragListBuilder", 
"simple"); 
 
  292             q.setParam(
"hl.fragCharSize", Integer.toString(theQueryStr.length())); 
 
  296             q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); 
 
  309     private Set<SolrDocument> filterOneHitPerDocument(SolrDocumentList resultList) {
 
  313         Collections.sort(resultList, 
new Comparator<SolrDocument>() {
 
  315             public int compare(SolrDocument left, SolrDocument right) {
 
  317                 String leftID = left.getFieldValue(Server.Schema.ID.toString()).toString();
 
  318                 String rightID = right.getFieldValue(Server.Schema.ID.toString()).toString();
 
  319                 return leftID.compareTo(rightID);
 
  327         Set<SolrDocument> solrDocumentsWithMatches = 
new TreeSet<>(
new SolrDocumentComparatorIgnoresChunkId());
 
  328         solrDocumentsWithMatches.addAll(resultList);
 
  329         return solrDocumentsWithMatches;
 
  332     private KeywordHit createKeywordtHit(SolrDocument solrDoc, Map<String, Map<String, List<String>>> highlightResponse, SleuthkitCase caseDb) 
throws TskException {
 
  337         final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
 
  339         if (KeywordSearchSettings.getShowSnippets()) {
 
  340             List<String> snippetList = highlightResponse.get(docId).get(Server.Schema.TEXT.toString());
 
  342             if (snippetList != null) {
 
  343                 snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
 
  346         return new KeywordHit(docId, snippet);
 
  361     public static String querySnippet(String query, 
long solrObjectId, 
boolean isRegex, 
boolean group) 
throws NoOpenCoreException {
 
  362         return querySnippet(query, solrObjectId, 0, isRegex, group);
 
  379     public static String querySnippet(String query, 
long solrObjectId, 
int chunkID, 
boolean isRegex, 
boolean group) 
throws NoOpenCoreException {
 
  380         Server solrServer = KeywordSearch.getServer();
 
  382         String highlightField;
 
  384             highlightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
 
  386             highlightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
 
  389         SolrQuery q = 
new SolrQuery();
 
  394             StringBuilder sb = 
new StringBuilder();
 
  395             sb.append(highlightField).append(
":");
 
  404             queryStr = sb.toString();
 
  408             queryStr = KeywordSearchUtil.quoteQuery(query);
 
  411         q.setQuery(queryStr);
 
  416             contentIDStr = Long.toString(solrObjectId);
 
  418             contentIDStr = Server.getChunkIdString(solrObjectId, chunkID);
 
  421         String idQuery = Server.Schema.ID.toString() + 
":" + KeywordSearchUtil.escapeLuceneQuery(contentIDStr);
 
  422         q.setShowDebugInfo(DEBUG); 
 
  423         q.addFilterQuery(idQuery);
 
  424         q.addHighlightField(highlightField);
 
  427         q.setHighlightSnippets(1);
 
  428         q.setHighlightFragsize(SNIPPET_LENGTH);
 
  431         q.setParam(
"hl.useFastVectorHighlighter", 
"on"); 
 
  432         q.setParam(
"hl.tag.pre", 
"«"); 
 
  433         q.setParam(
"hl.tag.post", 
"«"); 
 
  434         q.setParam(
"hl.fragListBuilder", 
"simple"); 
 
  437         q.setParam(
"hl.fragCharSize", Integer.toString(queryStr.length())); 
 
  441         q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);  
 
  444             QueryResponse response = solrServer.query(q, METHOD.POST);
 
  445             Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
 
  446             Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIDStr);
 
  447             if (responseHighlightID == null) {
 
  450             List<String> contentHighlights = responseHighlightID.get(highlightField);
 
  451             if (contentHighlights == null) {
 
  455                 return EscapeUtil.unEscapeHtml(contentHighlights.get(0)).trim();
 
  457         } 
catch (NoOpenCoreException ex) {
 
  458             logger.log(Level.WARNING, 
"Error executing Lucene Solr Query: " + query, ex); 
 
  460         } 
catch (KeywordSearchModuleException ex) {
 
  461             logger.log(Level.WARNING, 
"Error executing Lucene Solr Query: " + query, ex); 
 
  467     public KeywordList getKeywordList() {
 
  478         public int compare(SolrDocument left, SolrDocument right) {
 
  484             String leftID = left.getFieldValue(idName).toString();
 
  487                 leftID = leftID.substring(0, index);
 
  491             String rightID = right.getFieldValue(idName).toString();
 
  494                 rightID = rightID.substring(0, index);
 
  497             Integer leftInt = 
new Integer(leftID);
 
  498             Integer rightInt = 
new Integer(rightID);
 
  499             return leftInt.compareTo(rightInt);
 
static final char ID_CHUNK_SEP
 
int compare(SolrDocument left, SolrDocument right)