19 package org.sleuthkit.autopsy.keywordsearch;
21 import com.google.common.collect.Iterators;
22 import com.google.common.collect.Range;
23 import com.google.common.collect.RangeSet;
24 import com.google.common.collect.TreeRangeSet;
25 import java.util.Arrays;
26 import java.util.Collection;
27 import java.util.HashMap;
28 import java.util.HashSet;
29 import java.util.List;
32 import java.util.TreeMap;
33 import java.util.logging.Level;
34 import java.util.stream.Collectors;
35 import javax.annotation.concurrent.GuardedBy;
36 import org.apache.commons.lang.StringEscapeUtils;
37 import org.apache.commons.lang.StringUtils;
38 import org.apache.commons.lang3.math.NumberUtils;
39 import org.apache.solr.client.solrj.SolrQuery;
40 import org.apache.solr.client.solrj.SolrRequest.METHOD;
41 import org.apache.solr.client.solrj.response.QueryResponse;
42 import org.apache.solr.common.SolrDocumentList;
43 import org.openide.util.NbBundle;
44 import org.openide.util.NbBundle.Messages;
56 class HighlightedText
implements IndexedText {
58 private static final Logger logger = Logger.getLogger(HighlightedText.class.getName());
60 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
62 private static final BlackboardAttribute.Type TSK_KEYWORD_SEARCH_TYPE =
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE);
63 private static final BlackboardAttribute.Type TSK_KEYWORD =
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD);
64 static private final BlackboardAttribute.Type TSK_ASSOCIATED_ARTIFACT =
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT);
65 static private final BlackboardAttribute.Type TSK_KEYWORD_REGEXP =
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP);
67 private static final String HIGHLIGHT_PRE =
"<span style='background:yellow'>";
68 private static final String HIGHLIGHT_POST =
"</span>";
69 private static final String ANCHOR_PREFIX = HighlightedText.class.getName() +
"_";
71 final private Server solrServer = KeywordSearch.getServer();
73 private final long objectId;
77 private final Set<String> keywords =
new HashSet<>();
79 private int numberPages;
80 private Integer currentPage = 0;
83 private
boolean isPageInfoLoaded = false;
88 private final TreeMap<Integer, Integer> numberOfHitsPerPage = new TreeMap<>();
93 private final Set<Integer> pages = numberOfHitsPerPage.keySet();
97 private final HashMap<Integer, Integer> currentHitPerPage = new HashMap<>();
99 private QueryResults hits = null;
100 private BlackboardArtifact artifact;
101 private KeywordSearch.QueryType qt;
102 private
boolean isLiteral;
115 HighlightedText(
long objectId, QueryResults hits) {
116 this.objectId = objectId;
128 HighlightedText(BlackboardArtifact artifact)
throws TskCoreException {
129 this.artifact = artifact;
130 BlackboardAttribute attribute = artifact.getAttribute(TSK_ASSOCIATED_ARTIFACT);
131 if (attribute != null) {
132 this.objectId = attribute.getValueLong();
134 this.objectId = artifact.getObjectID();
143 @Messages({
"HighlightedText.query.exception.msg=Could not perform the query to get chunk info and get highlights:"})
144 synchronized private void loadPageInfo() throws TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
145 if (isPageInfoLoaded) {
149 this.numberPages = solrServer.queryNumFileChunks(this.objectId);
151 if (artifact != null) {
152 loadPageInfoFromArtifact();
153 }
else if (numberPages != 0) {
155 loadPageInfoFromHits();
158 this.numberPages = 1;
159 this.currentPage = 1;
160 numberOfHitsPerPage.put(1, 0);
162 currentHitPerPage.put(1, 0);
163 isPageInfoLoaded =
true;
173 synchronized private void loadPageInfoFromArtifact() throws TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
174 final String keyword = artifact.getAttribute(TSK_KEYWORD).getValueString();
175 this.keywords.add(keyword);
178 final BlackboardAttribute queryTypeAttribute = artifact.getAttribute(TSK_KEYWORD_SEARCH_TYPE);
179 qt = (queryTypeAttribute != null)
180 ? KeywordSearch.QueryType.values()[queryTypeAttribute.getValueInt()] : null;
182 Keyword keywordQuery = null;
186 keywordQuery =
new Keyword(keyword,
true,
true);
189 String regexp = artifact.getAttribute(TSK_KEYWORD_REGEXP).getValueString();
190 keywordQuery =
new Keyword(regexp,
false,
false);
193 KeywordSearchQuery chunksQuery = KeywordSearchUtil.getQueryForKeyword(keywordQuery,
new KeywordList(Arrays.asList(keywordQuery)));
197 chunksQuery.addFilter(
new KeywordQueryFilter(FilterType.CHUNK,
this.objectId));
199 hits = chunksQuery.performQuery();
200 loadPageInfoFromHits();
206 synchronized private void loadPageInfoFromHits() {
207 isLiteral = hits.getQuery().isLiteral();
209 for (Keyword k : hits.getKeywords()) {
210 for (KeywordHit hit : hits.getResults(k)) {
211 int chunkID = hit.getChunkId();
212 if (artifact != null) {
213 if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) {
214 String hit1 = hit.getHit();
215 if (keywords.stream().anyMatch(hit1::contains)) {
216 numberOfHitsPerPage.put(chunkID, 0);
217 currentHitPerPage.put(chunkID, 0);
222 if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) {
224 numberOfHitsPerPage.put(chunkID, 0);
225 currentHitPerPage.put(chunkID, 0);
227 if (StringUtils.isNotBlank(hit.getHit())) {
228 this.keywords.add(hit.getHit());
236 this.currentPage = pages.stream().findFirst().orElse(1);
238 isPageInfoLoaded =
true;
249 static private String constructEscapedSolrQuery(String query) {
250 return LuceneQuery.HIGHLIGHT_FIELD +
":" +
"\"" + KeywordSearchUtil.escapeLuceneQuery(query) +
"\"";
253 private int getIndexOfCurrentPage() {
254 return Iterators.indexOf(pages.iterator(), this.currentPage::equals);
258 public int getNumberPages() {
260 return this.numberPages;
264 public int getCurrentPage() {
265 return this.currentPage;
269 public boolean hasNextPage() {
270 return getIndexOfCurrentPage() < pages.size() - 1;
274 public boolean hasPreviousPage() {
275 return getIndexOfCurrentPage() > 0;
279 public int nextPage() {
281 currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() + 1);
284 throw new IllegalStateException(
"No next page.");
289 public int previousPage() {
290 if (hasPreviousPage()) {
291 currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() - 1);
294 throw new IllegalStateException(
"No previous page.");
299 public boolean hasNextItem() {
300 if (!this.currentHitPerPage.containsKey(currentPage)) {
303 return this.currentHitPerPage.get(currentPage) < this.numberOfHitsPerPage.get(currentPage);
307 public boolean hasPreviousItem() {
308 if (!this.currentHitPerPage.containsKey(currentPage)) {
311 return this.currentHitPerPage.get(currentPage) > 1;
315 public int nextItem() {
316 if (!hasNextItem()) {
317 throw new IllegalStateException(
"No next item.");
319 int cur = currentHitPerPage.get(currentPage) + 1;
320 currentHitPerPage.put(currentPage, cur);
325 public int previousItem() {
326 if (!hasPreviousItem()) {
327 throw new IllegalStateException(
"No previous item.");
329 int cur = currentHitPerPage.get(currentPage) - 1;
330 currentHitPerPage.put(currentPage, cur);
335 public int currentItem() {
336 if (!this.currentHitPerPage.containsKey(currentPage)) {
339 return currentHitPerPage.get(currentPage);
343 public String getText() {
345 String highlightField =
"";
348 SolrQuery q =
new SolrQuery();
349 q.setShowDebugInfo(DEBUG);
351 String contentIdStr = Long.toString(this.objectId);
352 if (numberPages != 0) {
353 chunkID = Integer.toString(this.currentPage);
354 contentIdStr +=
"0".equals(chunkID) ?
"" :
"_" + chunkID;
356 final String filterQuery = Server.Schema.ID.toString() +
":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
358 double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
360 highlightField = (isLiteral || (indexSchemaVersion < 2.0))
361 ? LuceneQuery.HIGHLIGHT_FIELD
362 : Server.Schema.CONTENT_STR.toString();
365 final String highlightQuery = keywords.stream()
366 .map(HighlightedText::constructEscapedSolrQuery)
367 .collect(Collectors.joining(
" "));
369 q.setQuery(highlightQuery);
370 q.addField(highlightField);
371 q.addFilterQuery(filterQuery);
372 q.addHighlightField(highlightField);
373 q.setHighlightFragsize(0);
376 q.setParam(
"hl.useFastVectorHighlighter",
"on");
377 q.setParam(
"hl.tag.pre", HIGHLIGHT_PRE);
378 q.setParam(
"hl.tag.post", HIGHLIGHT_POST);
379 q.setParam(
"hl.fragListBuilder",
"single");
382 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
388 q.setQuery(filterQuery);
389 q.addField(highlightField);
392 QueryResponse response = solrServer.query(q, METHOD.POST);
397 if (response.getResults().size() > 1) {
398 logger.log(Level.WARNING,
"Unexpected number of results for Solr highlighting query: {0}", q);
400 String highlightedContent;
401 Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
403 if (responseHighlight == null) {
404 highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
406 Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
408 if (responseHighlightID == null) {
409 highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
411 List<String> contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
412 if (contentHighlights == null) {
413 highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
416 highlightedContent = contentHighlights.get(0).trim();
420 highlightedContent = insertAnchors(highlightedContent);
422 return "<html><pre>" + highlightedContent +
"</pre></html>";
423 }
catch (TskCoreException | KeywordSearchModuleException | NoOpenCoreException ex) {
424 logger.log(Level.SEVERE,
"Error getting highlighted text for Solr doc id " + objectId +
", chunkID " + chunkID +
", highlight query: " + highlightField, ex);
425 return NbBundle.getMessage(this.getClass(),
"HighlightedMatchesSource.getMarkup.queryFailedMsg");
430 public String toString() {
431 return NbBundle.getMessage(this.getClass(),
"HighlightedMatchesSource.toString");
435 public boolean isSearchable() {
440 public String getAnchorPrefix() {
441 return ANCHOR_PREFIX;
445 public int getNumberHits() {
446 if (!this.numberOfHitsPerPage.containsKey(
this.currentPage)) {
449 return this.numberOfHitsPerPage.get(this.currentPage);
466 static String attemptManualHighlighting(SolrDocumentList solrDocumentList, String highlightField, Collection<String> keywords) {
467 if (solrDocumentList.isEmpty()) {
468 return NbBundle.getMessage(HighlightedText.class,
"HighlightedMatchesSource.getMarkup.noMatchMsg");
474 String text = solrDocumentList.get(0).getOrDefault(highlightField,
"").toString();
481 text = StringEscapeUtils.escapeHtml(text);
483 TreeRangeSet<Integer> highlights = TreeRangeSet.create();
486 for (String keyword : keywords) {
488 final String escapedKeyword = StringEscapeUtils.escapeHtml(keyword);
489 int searchOffset = 0;
490 int hitOffset = StringUtils.indexOfIgnoreCase(text, escapedKeyword, searchOffset);
491 while (hitOffset != -1) {
493 searchOffset = hitOffset + escapedKeyword.length();
496 highlights.add(Range.closedOpen(hitOffset, searchOffset));
499 hitOffset = StringUtils.indexOfIgnoreCase(text, escapedKeyword, searchOffset);
503 StringBuilder highlightedText =
new StringBuilder(text);
504 int totalHighLightLengthInserted = 0;
506 for (Range<Integer> highlightRange : highlights.asRanges()) {
507 int hStart = highlightRange.lowerEndpoint();
508 int hEnd = highlightRange.upperEndpoint();
511 highlightedText.insert(hStart + totalHighLightLengthInserted, HIGHLIGHT_PRE);
512 totalHighLightLengthInserted += HIGHLIGHT_PRE.length();
513 highlightedText.insert(hEnd + totalHighLightLengthInserted, HIGHLIGHT_POST);
514 totalHighLightLengthInserted += HIGHLIGHT_POST.length();
517 return highlightedText.toString();
528 private String insertAnchors(String searchableContent) {
529 StringBuilder buf =
new StringBuilder(searchableContent);
530 final String searchToken = HIGHLIGHT_PRE;
531 final int indexSearchTokLen = searchToken.length();
532 final String insertPre =
"<a name='" + ANCHOR_PREFIX;
533 final String insertPost =
"'></a>";
535 int searchOffset = 0;
536 int index = buf.indexOf(searchToken, searchOffset);
538 String insertString = insertPre + Integer.toString(count + 1) + insertPost;
539 int insertStringLen = insertString.length();
540 buf.insert(index, insertString);
541 searchOffset = index + indexSearchTokLen + insertStringLen;
543 index = buf.indexOf(searchToken, searchOffset);
547 this.numberOfHitsPerPage.put(this.currentPage, count);
548 if (this.currentItem() == 0 && this.hasNextItem()) {
552 return buf.toString();