19 package org.sleuthkit.autopsy.keywordsearch;
21 import org.apache.solr.client.solrj.SolrQuery;
22 import org.apache.solr.client.solrj.SolrRequest;
23 import org.apache.solr.client.solrj.response.QueryResponse;
24 import org.apache.solr.common.SolrDocument;
25 import org.apache.solr.common.SolrDocumentList;
30 import java.util.ArrayList;
31 import java.util.Collections;
32 import java.util.HashMap;
33 import java.util.List;
35 import java.util.Optional;
37 import java.util.stream.Collectors;
42 final class LanguageSpecificContentQueryHelper {
44 private LanguageSpecificContentQueryHelper() {}
46 private static final List<Server.Schema> QUERY_FIELDS =
new ArrayList<>();
47 private static final List<Server.Schema> LANGUAGE_SPECIFIC_CONTENT_FIELDS
48 = Collections.singletonList(Server.Schema.CONTENT_JA);
49 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
52 QUERY_FIELDS.add(Server.Schema.TEXT);
53 QUERY_FIELDS.addAll(LANGUAGE_SPECIFIC_CONTENT_FIELDS);
59 static class QueryResults {
60 List<SolrDocument> chunks =
new ArrayList<>();
61 Map< String, SolrDocument> miniChunks =
new HashMap<>();
63 Map<String, Map<String, List<String>>> highlighting =
new HashMap<>();
72 static String expandQueryString(
final String queryStr) {
73 List<String> fieldQueries =
new ArrayList<>();
74 fieldQueries.add(Server.Schema.TEXT.toString() +
":" + queryStr);
75 fieldQueries.addAll(LANGUAGE_SPECIFIC_CONTENT_FIELDS.stream().map(field -> field.toString() +
":" + queryStr).collect(Collectors.toList()));
76 return String.join(
" OR ", fieldQueries);
79 static List<Server.Schema> getQueryFields() {
83 static void updateQueryResults(QueryResults results, SolrDocument document) {
84 String
id = (String) document.getFieldValue(Server.Schema.ID.toString());
85 if (MiniChunkHelper.isMiniChunkID(
id)) {
86 results.miniChunks.put(MiniChunkHelper.getBaseChunkID(
id), document);
88 results.chunks.add(document);
99 static Optional<List<String>> getHighlights(Map<String, List<String>> highlight) {
100 for (Server.Schema field : LANGUAGE_SPECIFIC_CONTENT_FIELDS) {
101 if (highlight.containsKey(field.toString())) {
102 return Optional.of(highlight.get(field.toString()));
105 return Optional.empty();
113 static List<KeywordHit> mergeKeywordHits(List<KeywordHit> matches, Keyword originalKeyword, QueryResults queryResults)
throws KeywordSearchModuleException {
114 Map<String, KeywordHit> map = findMatches(originalKeyword, queryResults).stream().collect(Collectors.toMap(KeywordHit::getSolrDocumentId, x -> x));
115 List<KeywordHit> merged =
new ArrayList<>();
118 for (KeywordHit match : matches) {
119 String key = match.getSolrDocumentId();
120 if (map.containsKey(key)) {
121 merged.add(map.get(key));
128 merged.addAll(map.values());
133 static void configureTermfreqQuery(SolrQuery query, String keyword)
throws KeywordSearchModuleException, NoOpenCoreException {
135 QueryTermHelper.Result queryParserResult = QueryTermHelper.parse(keyword, LANGUAGE_SPECIFIC_CONTENT_FIELDS);
136 query.addField(buildTermfreqQuery(keyword, queryParserResult));
139 static String buildTermfreqQuery(String keyword, QueryTermHelper.Result result) {
140 List<String> termfreqs =
new ArrayList<>();
141 for (Map.Entry<String, List<String>> e : result.fieldTermsMap.entrySet()) {
142 String field = e.getKey();
143 for (String term : e.getValue()) {
144 termfreqs.add(String.format(
"termfreq(\"%s\",\"%s\")", field, KeywordSearchUtil.escapeLuceneQuery(term)));
150 return String.format(
"termfreq:sum(%s)", String.join(
",", termfreqs));
153 static int queryChunkTermfreq(Set<String> keywords, String contentID)
throws KeywordSearchModuleException, NoOpenCoreException {
154 SolrQuery q =
new SolrQuery();
155 q.setShowDebugInfo(DEBUG);
157 final String filterQuery = Server.Schema.ID.toString() +
":" + KeywordSearchUtil.escapeLuceneQuery(contentID);
158 final String highlightQuery = keywords.stream()
159 .map(s -> LanguageSpecificContentQueryHelper.expandQueryString(
160 KeywordSearchUtil.quoteQuery(KeywordSearchUtil.escapeLuceneQuery(s))))
161 .collect(Collectors.joining(
" "));
163 q.addFilterQuery(filterQuery);
164 q.setQuery(highlightQuery);
165 LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywords.iterator().next());
167 QueryResponse response = KeywordSearch.getServer().query(q, SolrRequest.METHOD.POST);
168 SolrDocumentList results = response.getResults();
169 if (results.isEmpty()) {
173 SolrDocument document = results.get(0);
174 return ((Float) document.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue();
177 static int findNthIndexOf(String s, String pattern,
int n) {
180 int len = s.length();
181 while (idx < len && found <= n) {
182 idx = s.indexOf(pattern, idx + 1);
192 private static List<KeywordHit> findMatches(Keyword originalKeyword, QueryResults queryResults)
throws KeywordSearchModuleException {
193 List<KeywordHit> matches =
new ArrayList<>();
194 for (SolrDocument document : queryResults.chunks) {
195 String docId = (String) document.getFieldValue(Server.Schema.ID.toString());
198 int hitCountInChunk = ((Float) document.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue();
199 SolrDocument miniChunk = queryResults.miniChunks.get(docId);
200 if (miniChunk == null) {
202 matches.add(createKeywordHit(originalKeyword, queryResults.highlighting, docId));
204 int hitCountInMiniChunk = ((Float) miniChunk.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue();
205 if (hitCountInMiniChunk < hitCountInChunk) {
207 matches.add(createKeywordHit(originalKeyword, queryResults.highlighting, docId));
210 }
catch (TskException ex) {
211 throw new KeywordSearchModuleException(ex);
220 private static KeywordHit createKeywordHit(Keyword originalKeyword, Map<String, Map<String, List<String>>> highlightResponse, String docId)
throws TskException {
226 if (KeywordSearchSettings.getShowSnippets()) {
227 List<String> snippetList = getHighlightFieldValue(highlightResponse.get(docId)).orElse(null);
229 if (snippetList != null) {
230 snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
234 return new KeywordHit(docId, snippet, originalKeyword.getSearchTerm());
240 private static Optional<List<String>> getHighlightFieldValue(Map<String, List<String>> highlight) {
241 for (Server.Schema field : LANGUAGE_SPECIFIC_CONTENT_FIELDS) {
242 if (highlight.containsKey(field.toString())) {
243 return Optional.of(highlight.get(field.toString()));
246 return Optional.empty();