19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.util.ArrayList;
22 import java.util.Collection;
23 import java.util.List;
25 import java.util.logging.Level;
26 import org.apache.commons.lang3.StringUtils;
27 import org.apache.commons.lang3.math.NumberUtils;
28 import org.apache.solr.client.solrj.SolrQuery;
29 import org.apache.solr.client.solrj.SolrRequest;
30 import org.apache.solr.client.solrj.SolrRequest.METHOD;
31 import org.apache.solr.client.solrj.response.QueryResponse;
32 import org.apache.solr.common.SolrDocument;
33 import org.apache.solr.common.SolrDocumentList;
34 import org.apache.solr.common.params.CursorMarkParams;
39 import org.
sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
41 import org.
sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
51 class LuceneQuery
implements KeywordSearchQuery {
53 private static final Logger logger = Logger.getLogger(LuceneQuery.class.getName());
54 private String keywordStringEscaped;
55 private boolean isEscaped;
56 private final Keyword originalKeyword;
57 private final KeywordList keywordList;
58 private final List<KeywordQueryFilter> filters =
new ArrayList<>();
59 private String field = null;
60 private static final int MAX_RESULTS_PER_CURSOR_MARK = 512;
61 static final int SNIPPET_LENGTH = 50;
62 static final String HIGHLIGHT_FIELD = Server.Schema.TEXT.toString();
64 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
71 LuceneQuery(KeywordList keywordList, Keyword keyword) {
72 this.keywordList = keywordList;
73 this.originalKeyword = keyword;
74 this.keywordStringEscaped = this.originalKeyword.getSearchTerm();
78 public void addFilter(KeywordQueryFilter filter) {
79 this.filters.add(filter);
83 public void setField(String field) {
88 public void setSubstringQuery() {
91 keywordStringEscaped +=
"*";
95 public void escape() {
96 keywordStringEscaped = KeywordSearchUtil.escapeLuceneQuery(originalKeyword.getSearchTerm());
101 public boolean isEscaped() {
106 public boolean isLiteral() {
107 return originalKeyword.searchTermIsLiteral();
111 public String getEscapedQueryString() {
112 return this.keywordStringEscaped;
116 public String getQueryString() {
117 return this.originalKeyword.getSearchTerm();
121 public KeywordList getKeywordList() {
126 public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
128 final Server solrServer = KeywordSearch.getServer();
129 double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
131 SolrQuery solrQuery = createAndConfigureSolrQuery(KeywordSearchSettings.getShowSnippets());
133 final String strippedQueryString = StringUtils.strip(getQueryString(),
"\"");
135 String cursorMark = CursorMarkParams.CURSOR_MARK_START;
136 boolean allResultsProcessed =
false;
137 List<KeywordHit> matches =
new ArrayList<>();
138 LanguageSpecificContentQueryHelper.QueryResults languageSpecificQueryResults =
new LanguageSpecificContentQueryHelper.QueryResults();
139 while (!allResultsProcessed) {
140 solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
141 QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
142 SolrDocumentList resultList = response.getResults();
144 Map<String, Map<String, List<String>>> highlightResponse = response.getHighlighting();
146 if (2.2 <= indexSchemaVersion) {
147 languageSpecificQueryResults.highlighting.putAll(response.getHighlighting());
150 for (SolrDocument resultDoc : resultList) {
151 if (2.2 <= indexSchemaVersion) {
152 Object language = resultDoc.getFieldValue(Server.Schema.LANGUAGE.toString());
153 if (language != null) {
154 LanguageSpecificContentQueryHelper.updateQueryResults(languageSpecificQueryResults, resultDoc);
165 final String docId = resultDoc.getFieldValue(Server.Schema.ID.toString()).toString();
166 final Integer chunkSize = (Integer) resultDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
167 final Collection<Object> content = resultDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
170 if (resultDoc.containsKey(Server.Schema.LANGUAGE.toString())) {
174 if (indexSchemaVersion < 2.0) {
176 matches.add(createKeywordtHit(highlightResponse, docId));
179 for (Object content_obj : content) {
180 String content_str = (String) content_obj;
182 int firstOccurence = StringUtils.indexOfIgnoreCase(content_str, strippedQueryString);
184 if (chunkSize == null || chunkSize == 0 || (firstOccurence > -1 && firstOccurence < chunkSize)) {
185 matches.add(createKeywordtHit(highlightResponse, docId));
189 }
catch (TskException ex) {
190 throw new KeywordSearchModuleException(ex);
193 String nextCursorMark = response.getNextCursorMark();
194 if (cursorMark.equals(nextCursorMark)) {
195 allResultsProcessed =
true;
197 cursorMark = nextCursorMark;
200 List<KeywordHit> mergedMatches;
201 if (2.2 <= indexSchemaVersion) {
202 mergedMatches = LanguageSpecificContentQueryHelper.mergeKeywordHits(matches, originalKeyword, languageSpecificQueryResults);
204 mergedMatches = matches;
207 QueryResults results =
new QueryResults(
this);
209 results.addResult(
new Keyword(originalKeyword.getSearchTerm(),
true,
true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), mergedMatches);
215 public boolean validate() {
216 return StringUtils.isNotBlank(originalKeyword.getSearchTerm());
236 public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
237 return createKeywordHitArtifact(content, originalKeyword, foundKeyword, hit, snippet, listName, ingestJobId);
240 public static BlackboardArtifact createKeywordHitArtifact(Content content, Keyword originalKW, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
241 final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
243 Collection<BlackboardAttribute> attributes =
new ArrayList<>();
244 if (snippet != null) {
245 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
247 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm().toLowerCase()));
248 if (StringUtils.isNotBlank(listName)) {
249 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
252 if (originalKW != null) {
253 BlackboardAttribute.ATTRIBUTE_TYPE selType = originalKW.getArtifactAttributeType();
254 if (selType != null) {
255 attributes.add(
new BlackboardAttribute(selType, MODULE_NAME, foundKeyword.getSearchTerm()));
258 if (originalKW.searchTermIsWholeWord()) {
259 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.LITERAL.ordinal()));
261 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
265 hit.getArtifactID().ifPresent(artifactID
266 -> attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
270 return content.newAnalysisResult(
271 BlackboardArtifact.Type.TSK_KEYWORD_HIT, Score.SCORE_LIKELY_NOTABLE,
272 null, listName, null,
274 .getAnalysisResult();
275 }
catch (TskCoreException e) {
276 logger.log(Level.WARNING,
"Error adding bb artifact for keyword hit", e);
289 private SolrQuery createAndConfigureSolrQuery(
boolean snippets)
throws NoOpenCoreException, KeywordSearchModuleException {
290 double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
292 SolrQuery q =
new SolrQuery();
293 q.setShowDebugInfo(DEBUG);
295 String queryStr = originalKeyword.searchTermIsLiteral()
296 ? KeywordSearchUtil.quoteQuery(keywordStringEscaped) : keywordStringEscaped;
301 queryStr = field +
":" + queryStr;
302 q.setQuery(queryStr);
303 }
else if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) {
304 q.setQuery(LanguageSpecificContentQueryHelper.expandQueryString(queryStr));
306 q.setQuery(queryStr);
308 q.setRows(MAX_RESULTS_PER_CURSOR_MARK);
310 q.setSort(SolrQuery.SortClause.asc(Server.Schema.ID.toString()));
312 q.setFields(Server.Schema.ID.toString(),
313 Server.Schema.CHUNK_SIZE.toString(),
314 Server.Schema.CONTENT_STR.toString());
316 if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) {
317 q.addField(Server.Schema.LANGUAGE.toString());
318 LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywordStringEscaped);
321 for (KeywordQueryFilter filter : filters) {
322 q.addFilterQuery(filter.toString());
326 configurwQueryForHighlighting(q);
338 private static void configurwQueryForHighlighting(SolrQuery q)
throws NoOpenCoreException {
339 double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
340 if (2.2 <= indexSchemaVersion) {
341 for (Server.Schema field : LanguageSpecificContentQueryHelper.getQueryFields()) {
342 q.addHighlightField(field.toString());
345 q.addHighlightField(HIGHLIGHT_FIELD);
348 q.setHighlightSnippets(1);
349 q.setHighlightFragsize(SNIPPET_LENGTH);
352 q.setParam(
"hl.useFastVectorHighlighter",
"on");
353 q.setParam(
"hl.tag.pre",
"«");
354 q.setParam(
"hl.tag.post",
"«");
355 q.setParam(
"hl.fragListBuilder",
"simple");
358 q.setParam(
"hl.fragCharSize", Integer.toString(q.getQuery().length()));
362 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
365 private KeywordHit createKeywordtHit(Map<String, Map<String, List<String>>> highlightResponse, String docId)
throws TskException {
371 if (KeywordSearchSettings.getShowSnippets()) {
372 List<String> snippetList = highlightResponse.get(docId).get(Server.Schema.TEXT.toString());
374 if (snippetList != null) {
375 snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
379 return new KeywordHit(docId, snippet, originalKeyword.getSearchTerm());
396 static String querySnippet(String query,
long solrObjectId,
boolean isRegex,
boolean group)
throws NoOpenCoreException {
397 return querySnippet(query, solrObjectId, 0, isRegex, group);
415 static String querySnippet(String query,
long solrObjectId,
int chunkID,
boolean isRegex,
boolean group)
throws NoOpenCoreException {
416 SolrQuery q =
new SolrQuery();
417 q.setShowDebugInfo(DEBUG);
421 queryStr = HIGHLIGHT_FIELD +
":"
422 + (group ? KeywordSearchUtil.quoteQuery(query)
429 queryStr = KeywordSearchUtil.quoteQuery(query);
431 q.setQuery(queryStr);
433 String contentIDStr = (chunkID == 0)
434 ? Long.toString(solrObjectId)
435 : Server.getChunkIdString(solrObjectId, chunkID);
436 String idQuery = Server.Schema.ID.toString() +
":" + KeywordSearchUtil.escapeLuceneQuery(contentIDStr);
437 q.addFilterQuery(idQuery);
439 configurwQueryForHighlighting(q);
441 Server solrServer = KeywordSearch.getServer();
444 QueryResponse response = solrServer.query(q, METHOD.POST);
445 Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
446 Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIDStr);
447 if (responseHighlightID == null) {
450 double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
451 List<String> contentHighlights;
452 if (2.2 <= indexSchemaVersion) {
453 contentHighlights = LanguageSpecificContentQueryHelper.getHighlights(responseHighlightID).orElse(null);
455 contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
457 if (contentHighlights == null) {
461 return EscapeUtil.unEscapeHtml(contentHighlights.get(0)).trim();
463 }
catch (NoOpenCoreException ex) {
464 logger.log(Level.SEVERE,
"Error executing Lucene Solr Query: " + query +
". Solr doc id " + solrObjectId +
", chunkID " + chunkID, ex);
466 }
catch (KeywordSearchModuleException ex) {
467 logger.log(Level.SEVERE,
"Error executing Lucene Solr Query: " + query +
". Solr doc id " + solrObjectId +
", chunkID " + chunkID, ex);