19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.util.ArrayList;
22 import java.util.Collection;
23 import java.util.Collections;
24 import java.util.Comparator;
25 import java.util.List;
28 import java.util.TreeSet;
29 import java.util.logging.Level;
31 import org.apache.solr.client.solrj.SolrQuery;
32 import org.apache.solr.client.solrj.SolrRequest.METHOD;
33 import org.apache.solr.client.solrj.response.QueryResponse;
34 import org.apache.solr.common.SolrDocument;
35 import org.apache.solr.common.SolrDocumentList;
51 class LuceneQuery
implements KeywordSearchQuery {
53 private static final Logger logger = Logger.getLogger(LuceneQuery.class.getName());
54 private final String keywordString;
55 private String keywordStringEscaped;
56 private boolean isEscaped;
57 private Keyword keywordQuery = null;
58 private KeywordList keywordList = null;
59 private final List<KeywordQueryFilter> filters =
new ArrayList<>();
60 private String field = null;
61 private static final int MAX_RESULTS = 20000;
62 static final int SNIPPET_LENGTH = 50;
64 static final String HIGHLIGHT_FIELD_LITERAL = Server.Schema.TEXT.toString();
65 static final String HIGHLIGHT_FIELD_REGEX = Server.Schema.TEXT.toString();
69 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
76 public LuceneQuery(KeywordList keywordList, Keyword keywordQuery) {
77 this.keywordList = keywordList;
78 this.keywordQuery = keywordQuery;
82 this.keywordString = keywordQuery.getQuery();
83 this.keywordStringEscaped = this.keywordString;
87 public void addFilter(KeywordQueryFilter filter) {
88 this.filters.add(filter);
92 public void setField(String field) {
97 public void setSubstringQuery() {
100 keywordStringEscaped = keywordStringEscaped +
"*";
104 public void escape() {
105 keywordStringEscaped = KeywordSearchUtil.escapeLuceneQuery(keywordString);
110 public boolean isEscaped() {
115 public boolean isLiteral() {
120 public String getEscapedQueryString() {
121 return this.keywordStringEscaped;
125 public String getQueryString() {
126 return this.keywordString;
130 public QueryResults performQuery() throws NoOpenCoreException {
131 QueryResults results =
new QueryResults(
this, keywordList);
133 boolean showSnippets = KeywordSearchSettings.getShowSnippets();
134 results.addResult(
new Keyword(keywordString,
true), performLuceneQuery(showSnippets));
140 public boolean validate() {
141 return keywordString != null && !keywordString.equals(
"");
145 public KeywordCachedArtifact writeSingleFileHitsToBlackBoard(String termHit, KeywordHit hit, String snippet, String listName) {
146 final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
148 Collection<BlackboardAttribute> attributes =
new ArrayList<>();
149 BlackboardArtifact bba;
150 KeywordCachedArtifact writeResult;
152 bba = hit.getContent().newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
153 writeResult =
new KeywordCachedArtifact(bba);
154 }
catch (Exception e) {
155 logger.log(Level.WARNING,
"Error adding bb artifact for keyword hit", e);
159 if (snippet != null) {
160 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, snippet));
162 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, termHit));
163 if ((listName != null) && (listName.equals(
"") ==
false)) {
164 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME.getTypeID(), MODULE_NAME, listName));
170 if (keywordQuery != null) {
171 BlackboardAttribute.ATTRIBUTE_TYPE selType = keywordQuery.getType();
172 if (selType != null) {
173 attributes.add(
new BlackboardAttribute(selType.getTypeID(), MODULE_NAME, termHit));
177 if (hit.isArtifactHit()) {
178 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT.getTypeID(), MODULE_NAME, hit.getArtifact().getArtifactID()));
182 bba.addAttributes(attributes);
183 writeResult.add(attributes);
185 }
catch (TskException e) {
186 logger.log(Level.WARNING,
"Error adding bb attributes to artifact", e);
199 private List<KeywordHit> performLuceneQuery(
boolean snippets)
throws NoOpenCoreException {
200 List<KeywordHit> matches =
new ArrayList<>();
201 boolean allMatchesFetched =
false;
202 final Server solrServer = KeywordSearch.getServer();
204 SolrQuery q = createAndConfigureSolrQuery(snippets);
207 for (
int start = 0; !allMatchesFetched; start = start + MAX_RESULTS) {
211 QueryResponse response = solrServer.query(q, METHOD.POST);
212 SolrDocumentList resultList = response.getResults();
215 Map<String, Map<String, List<String>>> highlightResponse = response.getHighlighting();
218 Set<SolrDocument> uniqueSolrDocumentsWithHits = filterOneHitPerDocument(resultList);
220 allMatchesFetched = start + MAX_RESULTS >= resultList.getNumFound();
222 SleuthkitCase sleuthkitCase;
224 sleuthkitCase = Case.getCurrentCase().getSleuthkitCase();
225 }
catch (IllegalStateException ex) {
230 for (SolrDocument resultDoc : uniqueSolrDocumentsWithHits) {
231 KeywordHit contentHit;
233 contentHit = createKeywordtHit(resultDoc, highlightResponse, sleuthkitCase);
234 }
catch (TskException ex) {
237 matches.add(contentHit);
240 }
catch (NoOpenCoreException ex) {
241 logger.log(Level.WARNING,
"Error executing Lucene Solr Query: " + keywordString, ex);
243 }
catch (KeywordSearchModuleException ex) {
244 logger.log(Level.WARNING,
"Error executing Lucene Solr Query: " + keywordString, ex);
257 private SolrQuery createAndConfigureSolrQuery(
boolean snippets) {
258 SolrQuery q =
new SolrQuery();
259 q.setShowDebugInfo(DEBUG);
261 final String groupedQuery = KeywordSearchUtil.quoteQuery(keywordStringEscaped);
262 String theQueryStr = groupedQuery;
265 StringBuilder sb =
new StringBuilder();
266 sb.append(field).append(
":").append(groupedQuery);
267 theQueryStr = sb.toString();
269 q.setQuery(theQueryStr);
270 q.setRows(MAX_RESULTS);
272 q.setFields(Server.Schema.ID.toString());
274 for (KeywordQueryFilter filter : filters) {
275 q.addFilterQuery(filter.toString());
279 q.addHighlightField(Server.Schema.TEXT.toString());
282 q.setHighlightSnippets(1);
283 q.setHighlightFragsize(SNIPPET_LENGTH);
286 q.setParam(
"hl.useFastVectorHighlighter",
"on");
287 q.setParam(
"hl.tag.pre",
"«");
288 q.setParam(
"hl.tag.post",
"«");
289 q.setParam(
"hl.fragListBuilder",
"simple");
292 q.setParam(
"hl.fragCharSize", Integer.toString(theQueryStr.length()));
296 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
309 private Set<SolrDocument> filterOneHitPerDocument(SolrDocumentList resultList) {
313 Collections.sort(resultList,
new Comparator<SolrDocument>() {
315 public int compare(SolrDocument left, SolrDocument right) {
317 String leftID = left.getFieldValue(Server.Schema.ID.toString()).toString();
318 String rightID = right.getFieldValue(Server.Schema.ID.toString()).toString();
319 return leftID.compareTo(rightID);
327 Set<SolrDocument> solrDocumentsWithMatches =
new TreeSet<>(
new SolrDocumentComparatorIgnoresChunkId());
328 solrDocumentsWithMatches.addAll(resultList);
329 return solrDocumentsWithMatches;
332 private KeywordHit createKeywordtHit(SolrDocument solrDoc, Map<String, Map<String, List<String>>> highlightResponse, SleuthkitCase caseDb)
throws TskException {
337 final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
339 if (KeywordSearchSettings.getShowSnippets()) {
340 List<String> snippetList = highlightResponse.get(docId).get(Server.Schema.TEXT.toString());
342 if (snippetList != null) {
343 snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
346 return new KeywordHit(docId, snippet);
361 public static String querySnippet(String query,
long solrObjectId,
boolean isRegex,
boolean group)
throws NoOpenCoreException {
362 return querySnippet(query, solrObjectId, 0, isRegex, group);
379 public static String querySnippet(String query,
long solrObjectId,
int chunkID,
boolean isRegex,
boolean group)
throws NoOpenCoreException {
380 Server solrServer = KeywordSearch.getServer();
382 String highlightField;
384 highlightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
386 highlightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
389 SolrQuery q =
new SolrQuery();
394 StringBuilder sb =
new StringBuilder();
395 sb.append(highlightField).append(
":");
404 queryStr = sb.toString();
408 queryStr = KeywordSearchUtil.quoteQuery(query);
411 q.setQuery(queryStr);
416 contentIDStr = Long.toString(solrObjectId);
418 contentIDStr = Server.getChunkIdString(solrObjectId, chunkID);
421 String idQuery = Server.Schema.ID.toString() +
":" + KeywordSearchUtil.escapeLuceneQuery(contentIDStr);
422 q.setShowDebugInfo(DEBUG);
423 q.addFilterQuery(idQuery);
424 q.addHighlightField(highlightField);
427 q.setHighlightSnippets(1);
428 q.setHighlightFragsize(SNIPPET_LENGTH);
431 q.setParam(
"hl.useFastVectorHighlighter",
"on");
432 q.setParam(
"hl.tag.pre",
"«");
433 q.setParam(
"hl.tag.post",
"«");
434 q.setParam(
"hl.fragListBuilder",
"simple");
437 q.setParam(
"hl.fragCharSize", Integer.toString(queryStr.length()));
441 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
444 QueryResponse response = solrServer.query(q, METHOD.POST);
445 Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
446 Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIDStr);
447 if (responseHighlightID == null) {
450 List<String> contentHighlights = responseHighlightID.get(highlightField);
451 if (contentHighlights == null) {
455 return EscapeUtil.unEscapeHtml(contentHighlights.get(0)).trim();
457 }
catch (NoOpenCoreException ex) {
458 logger.log(Level.WARNING,
"Error executing Lucene Solr Query: " + query, ex);
460 }
catch (KeywordSearchModuleException ex) {
461 logger.log(Level.WARNING,
"Error executing Lucene Solr Query: " + query, ex);
467 public KeywordList getKeywordList() {
478 public int compare(SolrDocument left, SolrDocument right) {
484 String leftID = left.getFieldValue(idName).toString();
487 leftID = leftID.substring(0, index);
491 String rightID = right.getFieldValue(idName).toString();
494 rightID = rightID.substring(0, index);
497 Integer leftInt =
new Integer(leftID);
498 Integer rightInt =
new Integer(rightID);
499 return leftInt.compareTo(rightInt);
static final char ID_CHUNK_SEP
int compare(SolrDocument left, SolrDocument right)