19 package org.sleuthkit.autopsy.keywordsearch;
21 import com.google.common.collect.Iterators;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.List;
29 import java.util.TreeMap;
30 import java.util.logging.Level;
31 import java.util.stream.Collectors;
32 import javax.annotation.concurrent.GuardedBy;
33 import org.apache.commons.lang.StringEscapeUtils;
34 import org.apache.commons.lang.StringUtils;
35 import org.apache.solr.client.solrj.SolrQuery;
36 import org.apache.solr.client.solrj.SolrRequest.METHOD;
37 import org.apache.solr.client.solrj.response.QueryResponse;
38 import org.apache.solr.common.SolrDocumentList;
39 import org.openide.util.NbBundle;
40 import org.openide.util.NbBundle.Messages;
53 class HighlightedText
implements IndexedText {
55 private static final Logger logger = Logger.getLogger(HighlightedText.class.getName());
57 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
59 private static final BlackboardAttribute.Type TSK_KEYWORD_SEARCH_TYPE =
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE);
60 private static final BlackboardAttribute.Type TSK_KEYWORD =
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD);
61 static private final BlackboardAttribute.Type TSK_ASSOCIATED_ARTIFACT =
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT);
63 private static final String HIGHLIGHT_PRE =
"<span style='background:yellow'>";
64 private static final String HIGHLIGHT_POST =
"</span>";
65 private static final String ANCHOR_PREFIX = HighlightedText.class.getName() +
"_";
67 final private Server solrServer = KeywordSearch.getServer();
69 private final long objectId;
73 private final Set<String> keywords =
new HashSet<>();
75 private int numberPages;
76 private Integer currentPage = 0;
79 private
boolean isPageInfoLoaded = false;
84 private final TreeMap<Integer, Integer> numberOfHitsPerPage = new TreeMap<>();
89 private final Set<Integer> pages = numberOfHitsPerPage.keySet();
93 private final HashMap<Integer, Integer> currentHitPerPage = new HashMap<>();
95 private QueryResults hits = null;
96 private BlackboardArtifact artifact;
97 private KeywordSearch.QueryType qt;
98 private
boolean isLiteral;
111 HighlightedText(
long objectId, QueryResults hits) {
112 this.objectId = objectId;
124 HighlightedText(BlackboardArtifact artifact)
throws TskCoreException {
125 this.artifact = artifact;
126 BlackboardAttribute attribute = artifact.getAttribute(TSK_ASSOCIATED_ARTIFACT);
127 if (attribute != null) {
128 this.objectId = attribute.getValueLong();
130 this.objectId = artifact.getObjectID();
139 @Messages({
"HighlightedText.query.exception.msg=Could not perform the query to get chunk info and get highlights:"})
140 synchronized private void loadPageInfo() throws TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
141 if (isPageInfoLoaded) {
145 this.numberPages = solrServer.queryNumFileChunks(this.objectId);
147 if (artifact != null) {
148 loadPageInfoFromArtifact();
149 }
else if (numberPages != 0) {
151 loadPageInfoFromHits();
154 this.numberPages = 1;
155 this.currentPage = 1;
156 numberOfHitsPerPage.put(1, 0);
158 currentHitPerPage.put(1, 0);
159 isPageInfoLoaded =
true;
169 synchronized private void loadPageInfoFromArtifact() throws TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
170 final String keyword = artifact.getAttribute(TSK_KEYWORD).getValueString();
171 this.keywords.add(keyword);
174 final BlackboardAttribute queryTypeAttribute = artifact.getAttribute(TSK_KEYWORD_SEARCH_TYPE);
175 qt = (queryTypeAttribute != null)
176 ? KeywordSearch.QueryType.values()[queryTypeAttribute.getValueInt()] : null;
178 isLiteral = qt != QueryType.REGEX;
182 Keyword keywordQuery =
new Keyword(keyword, isLiteral,
true);
183 KeywordSearchQuery chunksQuery =
new LuceneQuery(
new KeywordList(Arrays.asList(keywordQuery)), keywordQuery);
184 chunksQuery.escape();
185 chunksQuery.addFilter(
new KeywordQueryFilter(FilterType.CHUNK,
this.objectId));
187 hits = chunksQuery.performQuery();
188 loadPageInfoFromHits();
194 synchronized private void loadPageInfoFromHits() {
195 isLiteral = hits.getQuery().isLiteral();
197 for (Keyword k : hits.getKeywords()) {
198 for (KeywordHit hit : hits.getResults(k)) {
199 int chunkID = hit.getChunkId();
200 if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) {
201 numberOfHitsPerPage.put(chunkID, 0);
202 currentHitPerPage.put(chunkID, 0);
203 if (StringUtils.isNotBlank(hit.getHit())) {
204 this.keywords.add(hit.getHit());
211 this.currentPage = pages.stream().findFirst().orElse(1);
213 isPageInfoLoaded =
true;
224 static private String constructEscapedSolrQuery(String query) {
225 return LuceneQuery.HIGHLIGHT_FIELD +
":" +
"\"" + KeywordSearchUtil.escapeLuceneQuery(query) +
"\"";
228 private int getIndexOfCurrentPage() {
229 return Iterators.indexOf(pages.iterator(), this.currentPage::equals);
233 public int getNumberPages() {
235 return this.numberPages;
239 public int getCurrentPage() {
240 return this.currentPage;
244 public boolean hasNextPage() {
245 return getIndexOfCurrentPage() < pages.size() - 1;
249 public boolean hasPreviousPage() {
250 return getIndexOfCurrentPage() > 0;
254 public int nextPage() {
256 currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() + 1);
259 throw new IllegalStateException(
"No next page.");
264 public int previousPage() {
265 if (hasPreviousPage()) {
266 currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() - 1);
269 throw new IllegalStateException(
"No previous page.");
274 public boolean hasNextItem() {
275 if (!this.currentHitPerPage.containsKey(currentPage)) {
278 return this.currentHitPerPage.get(currentPage) < this.numberOfHitsPerPage.get(currentPage);
282 public boolean hasPreviousItem() {
283 if (!this.currentHitPerPage.containsKey(currentPage)) {
286 return this.currentHitPerPage.get(currentPage) > 1;
290 public int nextItem() {
291 if (!hasNextItem()) {
292 throw new IllegalStateException(
"No next item.");
294 int cur = currentHitPerPage.get(currentPage) + 1;
295 currentHitPerPage.put(currentPage, cur);
300 public int previousItem() {
301 if (!hasPreviousItem()) {
302 throw new IllegalStateException(
"No previous item.");
304 int cur = currentHitPerPage.get(currentPage) - 1;
305 currentHitPerPage.put(currentPage, cur);
310 public int currentItem() {
311 if (!this.currentHitPerPage.containsKey(currentPage)) {
314 return currentHitPerPage.get(currentPage);
318 public String getText() {
322 SolrQuery q =
new SolrQuery();
323 q.setShowDebugInfo(DEBUG);
325 String contentIdStr = Long.toString(this.objectId);
326 if (numberPages != 0) {
327 final String chunkID = Integer.toString(this.currentPage);
328 contentIdStr +=
"0".equals(chunkID) ?
"" :
"_" + chunkID;
330 final String filterQuery = Server.Schema.ID.toString() +
":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
332 final String highlightQuery = keywords.stream()
333 .map(HighlightedText::constructEscapedSolrQuery)
334 .collect(Collectors.joining(
" "));
336 q.setQuery(highlightQuery);
337 q.addField(Server.Schema.TEXT.toString());
338 q.addFilterQuery(filterQuery);
339 q.addHighlightField(LuceneQuery.HIGHLIGHT_FIELD);
340 q.setHighlightFragsize(0);
343 q.setParam(
"hl.useFastVectorHighlighter",
"on");
344 q.setParam(
"hl.tag.pre", HIGHLIGHT_PRE);
345 q.setParam(
"hl.tag.post", HIGHLIGHT_POST);
346 q.setParam(
"hl.fragListBuilder",
"single");
349 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
351 q.setQuery(filterQuery);
352 q.addField(Server.Schema.CONTENT_STR.toString());
355 QueryResponse response = solrServer.query(q, METHOD.POST);
360 if (response.getResults().size() > 1) {
361 logger.log(Level.WARNING,
"Unexpected number of results for Solr highlighting query: {0}", q);
363 String highlightedContent;
364 Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
365 String highlightField = isLiteral
366 ? LuceneQuery.HIGHLIGHT_FIELD
367 : Server.Schema.CONTENT_STR.toString();
368 if (responseHighlight == null) {
369 highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
371 Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
373 if (responseHighlightID == null) {
374 highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
376 List<String> contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
377 if (contentHighlights == null) {
378 highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
381 highlightedContent = contentHighlights.get(0).trim();
385 highlightedContent = insertAnchors(highlightedContent);
387 return "<html><pre>" + highlightedContent +
"</pre></html>";
388 }
catch (TskCoreException | KeywordSearchModuleException | NoOpenCoreException ex) {
389 logger.log(Level.SEVERE,
"Error getting highlighted text for " + objectId, ex);
390 return NbBundle.getMessage(this.getClass(),
"HighlightedMatchesSource.getMarkup.queryFailedMsg");
395 public String toString() {
396 return NbBundle.getMessage(this.getClass(),
"HighlightedMatchesSource.toString");
400 public boolean isSearchable() {
405 public String getAnchorPrefix() {
406 return ANCHOR_PREFIX;
410 public int getNumberHits() {
411 if (!this.numberOfHitsPerPage.containsKey(
this.currentPage)) {
414 return this.numberOfHitsPerPage.get(this.currentPage);
430 static String attemptManualHighlighting(SolrDocumentList solrDocumentList, String highlightField, Collection<String> keywords) {
431 if (solrDocumentList.isEmpty()) {
432 return NbBundle.getMessage(HighlightedText.class,
"HighlightedMatchesSource.getMarkup.noMatchMsg");
438 String text = solrDocumentList.get(0).getOrDefault(highlightField,
"").toString();
445 text = StringEscapeUtils.escapeHtml(text);
447 StringBuilder highlightedText =
new StringBuilder(
"");
450 for (String keyword : keywords) {
452 final String escapedKeyword = StringEscapeUtils.escapeHtml(keyword);
454 int hitOffset = StringUtils.indexOfIgnoreCase(text, escapedKeyword, textOffset);
455 while (hitOffset != -1) {
457 highlightedText.append(text.substring(textOffset, hitOffset));
459 highlightedText.append(HIGHLIGHT_PRE);
460 highlightedText.append(keyword);
461 highlightedText.append(HIGHLIGHT_POST);
464 textOffset = hitOffset + escapedKeyword.length();
466 hitOffset = StringUtils.indexOfIgnoreCase(text, escapedKeyword, textOffset);
469 highlightedText.append(text.substring(textOffset, text.length()));
471 if (highlightedText.length() == 0) {
472 return NbBundle.getMessage(HighlightedText.class,
"HighlightedMatchesSource.getMarkup.noMatchMsg");
475 text = highlightedText.toString();
476 highlightedText =
new StringBuilder(
"");
489 private String insertAnchors(String searchableContent) {
490 StringBuilder buf =
new StringBuilder(searchableContent);
491 final String searchToken = HIGHLIGHT_PRE;
492 final int indexSearchTokLen = searchToken.length();
493 final String insertPre =
"<a name='" + ANCHOR_PREFIX;
494 final String insertPost =
"'></a>";
496 int searchOffset = 0;
497 int index = buf.indexOf(searchToken, searchOffset);
499 String insertString = insertPre + Integer.toString(count + 1) + insertPost;
500 int insertStringLen = insertString.length();
501 buf.insert(index, insertString);
502 searchOffset = index + indexSearchTokLen + insertStringLen;
504 index = buf.indexOf(searchToken, searchOffset);
508 this.numberOfHitsPerPage.put(this.currentPage, count);
509 if (this.currentItem() == 0 && this.hasNextItem()) {
513 return buf.toString();