19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.util.ArrayList;
22 import java.util.Collection;
23 import java.util.Comparator;
24 import java.util.List;
26 import java.util.logging.Level;
27 import org.apache.commons.lang3.StringUtils;
28 import org.apache.commons.lang3.math.NumberUtils;
29 import org.apache.solr.client.solrj.SolrQuery;
30 import org.apache.solr.client.solrj.SolrRequest;
31 import org.apache.solr.client.solrj.SolrRequest.METHOD;
32 import org.apache.solr.client.solrj.response.QueryResponse;
33 import org.apache.solr.common.SolrDocument;
34 import org.apache.solr.common.SolrDocumentList;
35 import org.apache.solr.common.params.CursorMarkParams;
50 class LuceneQuery
implements KeywordSearchQuery {
52 private static final Logger logger = Logger.getLogger(LuceneQuery.class.getName());
53 private String keywordStringEscaped;
54 private boolean isEscaped;
55 private final Keyword originalKeyword ;
56 private final KeywordList keywordList ;
57 private final List<KeywordQueryFilter> filters =
new ArrayList<>();
58 private String field = null;
59 private static final int MAX_RESULTS_PER_CURSOR_MARK = 512;
60 static final int SNIPPET_LENGTH = 50;
61 static final String HIGHLIGHT_FIELD = Server.Schema.TEXT.toString();
63 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
70 LuceneQuery(KeywordList keywordList, Keyword keyword) {
71 this.keywordList = keywordList;
72 this.originalKeyword = keyword;
73 this.keywordStringEscaped = this.originalKeyword.getSearchTerm();
77 public void addFilter(KeywordQueryFilter filter) {
78 this.filters.add(filter);
82 public void setField(String field) {
87 public void setSubstringQuery() {
90 keywordStringEscaped +=
"*";
94 public void escape() {
95 keywordStringEscaped = KeywordSearchUtil.escapeLuceneQuery(originalKeyword.getSearchTerm());
100 public boolean isEscaped() {
105 public boolean isLiteral() {
106 return originalKeyword.searchTermIsLiteral();
110 public String getEscapedQueryString() {
111 return this.keywordStringEscaped;
115 public String getQueryString() {
116 return this.originalKeyword.getSearchTerm();
120 public KeywordList getKeywordList() {
125 public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
127 final Server solrServer = KeywordSearch.getServer();
128 double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
130 SolrQuery solrQuery = createAndConfigureSolrQuery(KeywordSearchSettings.getShowSnippets());
132 final String strippedQueryString = StringUtils.strip(getQueryString(),
"\"");
134 String cursorMark = CursorMarkParams.CURSOR_MARK_START;
135 boolean allResultsProcessed =
false;
136 List<KeywordHit> matches =
new ArrayList<>();
137 while (!allResultsProcessed) {
138 solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
139 QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
140 SolrDocumentList resultList = response.getResults();
142 Map<String, Map<String, List<String>>> highlightResponse = response.getHighlighting();
144 for (SolrDocument resultDoc : resultList) {
152 final String docId = resultDoc.getFieldValue(Server.Schema.ID.toString()).toString();
153 final Integer chunkSize = (Integer) resultDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
154 final Collection<Object> content = resultDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
156 if (indexSchemaVersion < 2.0) {
158 matches.add(createKeywordtHit(highlightResponse, docId));
161 for (Object content_obj : content) {
162 String content_str = (String) content_obj;
164 int firstOccurence = StringUtils.indexOfIgnoreCase(content_str, strippedQueryString);
166 if (chunkSize == null || chunkSize == 0 || (firstOccurence > -1 && firstOccurence < chunkSize)) {
167 matches.add(createKeywordtHit(highlightResponse, docId));
171 }
catch (TskException ex) {
172 throw new KeywordSearchModuleException(ex);
175 String nextCursorMark = response.getNextCursorMark();
176 if (cursorMark.equals(nextCursorMark)) {
177 allResultsProcessed =
true;
179 cursorMark = nextCursorMark;
182 QueryResults results =
new QueryResults(
this);
184 results.addResult(
new Keyword(originalKeyword.getSearchTerm(),
true,
true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), matches);
190 public boolean validate() {
191 return StringUtils.isNotBlank(originalKeyword.getSearchTerm());
195 public KeywordCachedArtifact writeSingleFileHitsToBlackBoard(Keyword foundKeyword, KeywordHit hit, String snippet, String listName) {
196 final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
198 Collection<BlackboardAttribute> attributes =
new ArrayList<>();
199 BlackboardArtifact bba;
200 KeywordCachedArtifact writeResult;
202 bba = hit.getContent().newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
203 writeResult =
new KeywordCachedArtifact(bba);
204 }
catch (TskCoreException e) {
205 logger.log(Level.WARNING,
"Error adding bb artifact for keyword hit", e);
209 if (snippet != null) {
210 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
212 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm()));
213 if (StringUtils.isNotBlank(listName)) {
214 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
217 if (originalKeyword != null) {
218 BlackboardAttribute.ATTRIBUTE_TYPE selType = originalKeyword.getArtifactAttributeType();
219 if (selType != null) {
220 attributes.add(
new BlackboardAttribute(selType, MODULE_NAME, foundKeyword.getSearchTerm()));
223 if (originalKeyword.searchTermIsWholeWord()) {
224 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.LITERAL.ordinal()));
226 attributes.add(
new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
230 if (hit.isArtifactHit()) {
231 attributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, hit.getArtifact().getArtifactID()));
235 bba.addAttributes(attributes);
236 writeResult.add(attributes);
238 }
catch (TskCoreException e) {
239 logger.log(Level.WARNING,
"Error adding bb attributes to artifact", e);
252 private SolrQuery createAndConfigureSolrQuery(
boolean snippets) {
253 SolrQuery q =
new SolrQuery();
254 q.setShowDebugInfo(DEBUG);
256 String queryStr = originalKeyword.searchTermIsLiteral()
257 ? KeywordSearchUtil.quoteQuery(keywordStringEscaped) : keywordStringEscaped;
262 queryStr = field +
":" + queryStr;
264 q.setQuery(queryStr);
265 q.setRows(MAX_RESULTS_PER_CURSOR_MARK);
267 q.setSort(SolrQuery.SortClause.asc(Server.Schema.ID.toString()));
269 q.setFields(Server.Schema.ID.toString(),
270 Server.Schema.CHUNK_SIZE.toString(),
271 Server.Schema.CONTENT_STR.toString());
273 for (KeywordQueryFilter filter : filters) {
274 q.addFilterQuery(filter.toString());
278 configurwQueryForHighlighting(q);
290 private static void configurwQueryForHighlighting(SolrQuery q) {
291 q.addHighlightField(HIGHLIGHT_FIELD);
292 q.setHighlightSnippets(1);
293 q.setHighlightFragsize(SNIPPET_LENGTH);
296 q.setParam(
"hl.useFastVectorHighlighter",
"on");
297 q.setParam(
"hl.tag.pre",
"«");
298 q.setParam(
"hl.tag.post",
"«");
299 q.setParam(
"hl.fragListBuilder",
"simple");
302 q.setParam(
"hl.fragCharSize", Integer.toString(q.getQuery().length()));
306 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
309 private KeywordHit createKeywordtHit(Map<String, Map<String, List<String>>> highlightResponse, String docId)
throws TskException {
315 if (KeywordSearchSettings.getShowSnippets()) {
316 List<String> snippetList = highlightResponse.get(docId).get(Server.Schema.TEXT.toString());
318 if (snippetList != null) {
319 snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
323 return new KeywordHit(docId, snippet, originalKeyword.getSearchTerm());
340 static String querySnippet(String query,
long solrObjectId,
boolean isRegex,
boolean group)
throws NoOpenCoreException {
341 return querySnippet(query, solrObjectId, 0, isRegex, group);
359 static String querySnippet(String query,
long solrObjectId,
int chunkID,
boolean isRegex,
boolean group)
throws NoOpenCoreException {
360 SolrQuery q =
new SolrQuery();
361 q.setShowDebugInfo(DEBUG);
365 queryStr = HIGHLIGHT_FIELD +
":"
366 + (group ? KeywordSearchUtil.quoteQuery(query)
373 queryStr = KeywordSearchUtil.quoteQuery(query);
375 q.setQuery(queryStr);
377 String contentIDStr = (chunkID == 0)
378 ? Long.toString(solrObjectId)
379 : Server.getChunkIdString(solrObjectId, chunkID);
380 String idQuery = Server.Schema.ID.toString() +
":" + KeywordSearchUtil.escapeLuceneQuery(contentIDStr);
381 q.addFilterQuery(idQuery);
383 configurwQueryForHighlighting(q);
385 Server solrServer = KeywordSearch.getServer();
388 QueryResponse response = solrServer.query(q, METHOD.POST);
389 Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
390 Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIDStr);
391 if (responseHighlightID == null) {
394 List<String> contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
395 if (contentHighlights == null) {
399 return EscapeUtil.unEscapeHtml(contentHighlights.get(0)).trim();
401 }
catch (NoOpenCoreException ex) {
402 logger.log(Level.WARNING,
"Error executing Lucene Solr Query: " + query, ex);
404 }
catch (KeywordSearchModuleException ex) {
405 logger.log(Level.WARNING,
"Error executing Lucene Solr Query: " + query, ex);