19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.util.ArrayList;
22 import java.util.Collection;
23 import java.util.Collections;
24 import java.util.Comparator;
25 import java.util.List;
28 import java.util.TreeSet;
29 import java.util.logging.Level;
31 import org.apache.solr.client.solrj.SolrQuery;
32 import org.apache.solr.client.solrj.SolrRequest.METHOD;
33 import org.apache.solr.client.solrj.response.QueryResponse;
34 import org.apache.solr.common.SolrDocument;
35 import org.apache.solr.common.SolrDocumentList;
36 import org.openide.util.NbBundle;
42 import org.
sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
44 import org.
sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
52 class LuceneQuery
implements KeywordSearchQuery {
54 private static final Logger logger = Logger.getLogger(LuceneQuery.class.getName());
55 private final String keywordString;
56 private String keywordStringEscaped;
57 private boolean isEscaped;
58 private Keyword keywordQuery = null;
59 private KeywordList keywordList = null;
60 private final List<KeywordQueryFilter> filters =
new ArrayList<>();
61 private String field = null;
62 private static final int MAX_RESULTS = 20000;
63 static final int SNIPPET_LENGTH = 50;
65 static final String HIGHLIGHT_FIELD_LITERAL = Server.Schema.TEXT.toString();
66 static final String HIGHLIGHT_FIELD_REGEX = Server.Schema.TEXT.toString();
70 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
77 public LuceneQuery(KeywordList keywordList, Keyword keywordQuery) {
78 this.keywordList = keywordList;
79 this.keywordQuery = keywordQuery;
83 this.keywordString = keywordQuery.getQuery();
84 this.keywordStringEscaped = this.keywordString;
88 public void addFilter(KeywordQueryFilter filter) {
89 this.filters.add(filter);
93 public void setField(String field) {
98 public void setSubstringQuery() {
101 keywordStringEscaped = keywordStringEscaped +
"*";
105 public void escape() {
106 keywordStringEscaped = KeywordSearchUtil.escapeLuceneQuery(keywordString);
111 public boolean isEscaped() {
116 public boolean isLiteral() {
121 public String getEscapedQueryString() {
122 return this.keywordStringEscaped;
126 public String getQueryString() {
127 return this.keywordString;
131 public QueryResults performQuery() throws NoOpenCoreException {
132 QueryResults results =
new QueryResults(
this, keywordList);
134 boolean showSnippets = KeywordSearchSettings.getShowSnippets();
135 results.addResult(
new Keyword(keywordString,
true), performLuceneQuery(showSnippets));
141 public boolean validate() {
142 return keywordString != null && !keywordString.equals(
"");
146 public KeywordCachedArtifact writeSingleFileHitsToBlackBoard(String termHit, KeywordHit hit, String snippet, String listName) {
147 final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
149 Collection<BlackboardAttribute> attributes =
new ArrayList<>();
150 BlackboardArtifact bba;
151 KeywordCachedArtifact writeResult;
153 bba = hit.getContent().newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
154 writeResult =
new KeywordCachedArtifact(bba);
155 }
catch (Exception e) {
156 logger.log(Level.WARNING,
"Error adding bb artifact for keyword hit", e);
160 if (snippet != null) {
161 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
163 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, termHit));
164 if ((listName != null) && (listName.equals(
"") ==
false)) {
165 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
171 if (keywordQuery != null) {
172 BlackboardAttribute.ATTRIBUTE_TYPE selType = keywordQuery.getType();
173 if (selType != null) {
174 attributes.add(
new BlackboardAttribute(selType, MODULE_NAME, termHit));
178 if (hit.isArtifactHit()) {
179 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, hit.getArtifact().getArtifactID()));
183 bba.addAttributes(attributes);
184 writeResult.add(attributes);
186 }
catch (TskException e) {
187 logger.log(Level.WARNING,
"Error adding bb attributes to artifact", e);
202 private List<KeywordHit> performLuceneQuery(
boolean snippets)
throws NoOpenCoreException {
203 List<KeywordHit> matches =
new ArrayList<>();
204 boolean allMatchesFetched =
false;
205 final Server solrServer = KeywordSearch.getServer();
207 SolrQuery q = createAndConfigureSolrQuery(snippets);
208 QueryResponse response;
209 SolrDocumentList resultList;
210 Map<String, Map<String, List<String>>> highlightResponse;
211 Set<SolrDocument> uniqueSolrDocumentsWithHits;
214 response = solrServer.query(q, METHOD.POST);
216 resultList = response.getResults();
219 highlightResponse = response.getHighlighting();
222 uniqueSolrDocumentsWithHits = filterOneHitPerDocument(resultList);
223 }
catch (KeywordSearchModuleException ex) {
224 logger.log(Level.SEVERE,
"Error executing Lucene Solr Query: " + keywordString, ex);
225 MessageNotifyUtil.Notify.error(NbBundle.getMessage(Server.class,
"Server.query.exception.msg", keywordString), ex.getCause().getMessage());
230 for (
int start = 0; !allMatchesFetched; start = start + MAX_RESULTS) {
233 allMatchesFetched = start + MAX_RESULTS >= resultList.getNumFound();
235 SleuthkitCase sleuthkitCase;
237 sleuthkitCase = Case.getCurrentCase().getSleuthkitCase();
238 }
catch (IllegalStateException ex) {
243 for (SolrDocument resultDoc : uniqueSolrDocumentsWithHits) {
244 KeywordHit contentHit;
246 contentHit = createKeywordtHit(resultDoc, highlightResponse, sleuthkitCase);
247 }
catch (TskException ex) {
250 matches.add(contentHit);
263 private SolrQuery createAndConfigureSolrQuery(
boolean snippets) {
264 SolrQuery q =
new SolrQuery();
265 q.setShowDebugInfo(DEBUG);
267 final String groupedQuery = KeywordSearchUtil.quoteQuery(keywordStringEscaped);
268 String theQueryStr = groupedQuery;
271 StringBuilder sb =
new StringBuilder();
272 sb.append(field).append(
":").append(groupedQuery);
273 theQueryStr = sb.toString();
275 q.setQuery(theQueryStr);
276 q.setRows(MAX_RESULTS);
278 q.setFields(Server.Schema.ID.toString());
280 for (KeywordQueryFilter filter : filters) {
281 q.addFilterQuery(filter.toString());
285 q.addHighlightField(Server.Schema.TEXT.toString());
288 q.setHighlightSnippets(1);
289 q.setHighlightFragsize(SNIPPET_LENGTH);
292 q.setParam(
"hl.useFastVectorHighlighter",
"on");
293 q.setParam(
"hl.tag.pre",
"«");
294 q.setParam(
"hl.tag.post",
"«");
295 q.setParam(
"hl.fragListBuilder",
"simple");
298 q.setParam(
"hl.fragCharSize", Integer.toString(theQueryStr.length()));
302 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
316 private Set<SolrDocument> filterOneHitPerDocument(SolrDocumentList resultList) {
320 Collections.sort(resultList,
new Comparator<SolrDocument>() {
322 public int compare(SolrDocument left, SolrDocument right) {
324 String leftID = left.getFieldValue(Server.Schema.ID.toString()).toString();
325 String rightID = right.getFieldValue(Server.Schema.ID.toString()).toString();
326 return leftID.compareTo(rightID);
333 Set<SolrDocument> solrDocumentsWithMatches =
new TreeSet<>(
new SolrDocumentComparatorIgnoresChunkId());
334 solrDocumentsWithMatches.addAll(resultList);
335 return solrDocumentsWithMatches;
338 private KeywordHit createKeywordtHit(SolrDocument solrDoc, Map<String, Map<String, List<String>>> highlightResponse, SleuthkitCase caseDb)
throws TskException {
343 final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
345 if (KeywordSearchSettings.getShowSnippets()) {
346 List<String> snippetList = highlightResponse.get(docId).get(Server.Schema.TEXT.toString());
348 if (snippetList != null) {
349 snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
352 return new KeywordHit(docId, snippet);
369 public static String querySnippet(String query,
long solrObjectId,
boolean isRegex,
boolean group)
throws NoOpenCoreException {
370 return querySnippet(query, solrObjectId, 0, isRegex, group);
388 public static String querySnippet(String query,
long solrObjectId,
int chunkID,
boolean isRegex,
boolean group)
throws NoOpenCoreException {
389 Server solrServer = KeywordSearch.getServer();
391 String highlightField;
393 highlightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
395 highlightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
398 SolrQuery q =
new SolrQuery();
403 StringBuilder sb =
new StringBuilder();
404 sb.append(highlightField).append(
":");
413 queryStr = sb.toString();
417 queryStr = KeywordSearchUtil.quoteQuery(query);
420 q.setQuery(queryStr);
425 contentIDStr = Long.toString(solrObjectId);
427 contentIDStr = Server.getChunkIdString(solrObjectId, chunkID);
430 String idQuery = Server.Schema.ID.toString() +
":" + KeywordSearchUtil.escapeLuceneQuery(contentIDStr);
431 q.setShowDebugInfo(DEBUG);
432 q.addFilterQuery(idQuery);
433 q.addHighlightField(highlightField);
436 q.setHighlightSnippets(1);
437 q.setHighlightFragsize(SNIPPET_LENGTH);
440 q.setParam(
"hl.useFastVectorHighlighter",
"on");
441 q.setParam(
"hl.tag.pre",
"«");
442 q.setParam(
"hl.tag.post",
"«");
443 q.setParam(
"hl.fragListBuilder",
"simple");
446 q.setParam(
"hl.fragCharSize", Integer.toString(queryStr.length()));
450 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
453 QueryResponse response = solrServer.query(q, METHOD.POST);
454 Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
455 Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIDStr);
456 if (responseHighlightID == null) {
459 List<String> contentHighlights = responseHighlightID.get(highlightField);
460 if (contentHighlights == null) {
464 return EscapeUtil.unEscapeHtml(contentHighlights.get(0)).trim();
466 }
catch (NoOpenCoreException ex) {
467 logger.log(Level.WARNING,
"Error executing Lucene Solr Query: " + query, ex);
469 }
catch (KeywordSearchModuleException ex) {
470 logger.log(Level.WARNING,
"Error executing Lucene Solr Query: " + query, ex);
476 public KeywordList getKeywordList() {
487 public int compare(SolrDocument left, SolrDocument right) {
493 String leftID = left.getFieldValue(idName).toString();
496 leftID = leftID.substring(0, index);
500 String rightID = right.getFieldValue(idName).toString();
503 rightID = rightID.substring(0, index);
506 Long leftLong =
new Long(leftID);
507 Long rightLong =
new Long(rightID);
508 return leftLong.compareTo(rightLong);
static final char ID_CHUNK_SEP
int compare(SolrDocument left, SolrDocument right)