19 package org.sleuthkit.autopsy.keywordsearch;
21 import com.google.common.collect.Iterators;
22 import com.google.common.collect.Range;
23 import com.google.common.collect.TreeRangeSet;
24 import java.util.Arrays;
25 import java.util.Collection;
26 import java.util.HashMap;
27 import java.util.HashSet;
28 import java.util.List;
31 import java.util.TreeMap;
32 import java.util.logging.Level;
33 import java.util.stream.Collectors;
34 import javax.annotation.concurrent.GuardedBy;
35 import org.apache.commons.lang.StringEscapeUtils;
36 import org.apache.commons.lang.StringUtils;
37 import org.apache.commons.lang3.math.NumberUtils;
38 import org.apache.solr.client.solrj.SolrQuery;
39 import org.apache.solr.client.solrj.SolrRequest.METHOD;
40 import org.apache.solr.client.solrj.response.QueryResponse;
41 import org.apache.solr.common.SolrDocumentList;
42 import org.openide.util.NbBundle;
54 class HighlightedText
implements IndexedText {
56 private static final Logger logger = Logger.getLogger(HighlightedText.class.getName());
58 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
60 private static final BlackboardAttribute.Type TSK_KEYWORD_SEARCH_TYPE =
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE);
61 private static final BlackboardAttribute.Type TSK_KEYWORD =
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD);
62 static private final BlackboardAttribute.Type TSK_ASSOCIATED_ARTIFACT =
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT);
63 static private final BlackboardAttribute.Type TSK_KEYWORD_REGEXP =
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP);
65 private static final String HIGHLIGHT_PRE =
"<span style='background:yellow'>";
66 private static final String HIGHLIGHT_POST =
"</span>";
67 private static final String ANCHOR_PREFIX = HighlightedText.class.getName() +
"_";
69 final private Server solrServer = KeywordSearch.getServer();
71 private final long solrObjectId;
75 private final Set<String> keywords =
new HashSet<>();
77 private int numberPages;
78 private Integer currentPage = 0;
81 private
boolean isPageInfoLoaded = false;
86 private final TreeMap<Integer, Integer> numberOfHitsPerPage = new TreeMap<>();
91 private final Set<Integer> pages = numberOfHitsPerPage.keySet();
95 private final HashMap<Integer, Integer> currentHitPerPage = new HashMap<>();
97 private QueryResults hits = null;
98 private BlackboardArtifact artifact;
99 private KeywordSearch.QueryType qt;
100 private
boolean isLiteral;
113 HighlightedText(
long solrObjectId, QueryResults hits) {
114 this.solrObjectId = solrObjectId;
126 HighlightedText(BlackboardArtifact artifact)
throws TskCoreException {
127 this.artifact = artifact;
128 BlackboardAttribute attribute = artifact.getAttribute(TSK_ASSOCIATED_ARTIFACT);
129 if (attribute != null) {
130 this.solrObjectId = attribute.getValueLong();
132 this.solrObjectId = artifact.getObjectID();
141 synchronized private void loadPageInfo() throws TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
142 if (isPageInfoLoaded) {
146 this.numberPages = solrServer.queryNumFileChunks(this.solrObjectId);
148 if (artifact != null) {
149 loadPageInfoFromArtifact();
150 }
else if (numberPages != 0) {
152 loadPageInfoFromHits();
155 this.numberPages = 1;
156 this.currentPage = 1;
157 numberOfHitsPerPage.put(1, 0);
158 currentHitPerPage.put(1, 0);
159 isPageInfoLoaded =
true;
169 synchronized private void loadPageInfoFromArtifact() throws TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
170 final String keyword = artifact.getAttribute(TSK_KEYWORD).getValueString();
171 this.keywords.add(keyword);
174 final BlackboardAttribute queryTypeAttribute = artifact.getAttribute(TSK_KEYWORD_SEARCH_TYPE);
175 qt = (queryTypeAttribute != null)
176 ? KeywordSearch.QueryType.values()[queryTypeAttribute.getValueInt()] : null;
178 Keyword keywordQuery = null;
182 keywordQuery =
new Keyword(keyword,
true,
true);
185 String regexp = artifact.getAttribute(TSK_KEYWORD_REGEXP).getValueString();
186 keywordQuery =
new Keyword(regexp,
false,
false);
189 KeywordSearchQuery chunksQuery = KeywordSearchUtil.getQueryForKeyword(keywordQuery,
new KeywordList(Arrays.asList(keywordQuery)));
193 chunksQuery.addFilter(
new KeywordQueryFilter(FilterType.CHUNK,
this.solrObjectId));
195 hits = chunksQuery.performQuery();
196 loadPageInfoFromHits();
202 synchronized private void loadPageInfoFromHits() {
203 isLiteral = hits.getQuery().isLiteral();
211 for (Keyword k : hits.getKeywords()) {
212 for (KeywordHit hit : hits.getResults(k)) {
213 int chunkID = hit.getChunkId();
214 if (artifact != null) {
215 if (chunkID != 0 && this.solrObjectId == hit.getSolrObjectId()) {
216 String hit1 = hit.getHit();
217 if (keywords.stream().anyMatch(hit1::contains)) {
218 numberOfHitsPerPage.put(chunkID, 0);
219 currentHitPerPage.put(chunkID, 0);
224 if (chunkID != 0 && this.solrObjectId == hit.getSolrObjectId()) {
226 numberOfHitsPerPage.put(chunkID, 0);
227 currentHitPerPage.put(chunkID, 0);
229 if (StringUtils.isNotBlank(hit.getHit())) {
230 this.keywords.add(hit.getHit());
238 this.currentPage = pages.stream().findFirst().orElse(1);
240 isPageInfoLoaded =
true;
251 static private String constructEscapedSolrQuery(String query) {
252 return LuceneQuery.HIGHLIGHT_FIELD +
":" +
"\"" + KeywordSearchUtil.escapeLuceneQuery(query) +
"\"";
255 private int getIndexOfCurrentPage() {
256 return Iterators.indexOf(pages.iterator(), this.currentPage::equals);
260 public int getNumberPages() {
262 return this.numberPages;
266 public int getCurrentPage() {
267 return this.currentPage;
271 public boolean hasNextPage() {
272 return getIndexOfCurrentPage() < pages.size() - 1;
276 public boolean hasPreviousPage() {
277 return getIndexOfCurrentPage() > 0;
281 public int nextPage() {
283 currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() + 1);
286 throw new IllegalStateException(
"No next page.");
291 public int previousPage() {
292 if (hasPreviousPage()) {
293 currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() - 1);
296 throw new IllegalStateException(
"No previous page.");
301 public boolean hasNextItem() {
302 if (!this.currentHitPerPage.containsKey(currentPage)) {
305 return this.currentHitPerPage.get(currentPage) < this.numberOfHitsPerPage.get(currentPage);
309 public boolean hasPreviousItem() {
310 if (!this.currentHitPerPage.containsKey(currentPage)) {
313 return this.currentHitPerPage.get(currentPage) > 1;
317 public int nextItem() {
318 if (!hasNextItem()) {
319 throw new IllegalStateException(
"No next item.");
321 int cur = currentHitPerPage.get(currentPage) + 1;
322 currentHitPerPage.put(currentPage, cur);
327 public int previousItem() {
328 if (!hasPreviousItem()) {
329 throw new IllegalStateException(
"No previous item.");
331 int cur = currentHitPerPage.get(currentPage) - 1;
332 currentHitPerPage.put(currentPage, cur);
337 public int currentItem() {
338 if (!this.currentHitPerPage.containsKey(currentPage)) {
341 return currentHitPerPage.get(currentPage);
345 public String getText() {
347 String highlightField =
"";
350 SolrQuery q =
new SolrQuery();
351 q.setShowDebugInfo(DEBUG);
353 String contentIdStr = Long.toString(this.solrObjectId);
354 if (numberPages != 0) {
355 chunkID = Integer.toString(this.currentPage);
356 contentIdStr +=
"0".equals(chunkID) ?
"" :
"_" + chunkID;
358 final String filterQuery = Server.Schema.ID.toString() +
":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
360 highlightField = LuceneQuery.HIGHLIGHT_FIELD;
363 final String highlightQuery = keywords.stream()
364 .map(HighlightedText::constructEscapedSolrQuery)
365 .collect(Collectors.joining(
" "));
367 q.setQuery(highlightQuery);
368 q.addField(highlightField);
369 q.addFilterQuery(filterQuery);
370 q.addHighlightField(highlightField);
371 q.setHighlightFragsize(0);
374 q.setParam(
"hl.useFastVectorHighlighter",
"on");
375 q.setParam(
"hl.tag.pre", HIGHLIGHT_PRE);
376 q.setParam(
"hl.tag.post", HIGHLIGHT_POST);
377 q.setParam(
"hl.fragListBuilder",
"single");
380 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
386 q.setQuery(filterQuery);
387 q.addField(highlightField);
390 QueryResponse response = solrServer.query(q, METHOD.POST);
395 if (response.getResults().size() > 1) {
396 logger.log(Level.WARNING,
"Unexpected number of results for Solr highlighting query: {0}", q);
398 String highlightedContent;
399 Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
401 if (responseHighlight == null) {
402 highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
404 Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
406 if (responseHighlightID == null) {
407 highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
409 List<String> contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
410 if (contentHighlights == null) {
411 highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
414 highlightedContent = contentHighlights.get(0).trim();
418 highlightedContent = insertAnchors(highlightedContent);
420 return "<html><pre>" + highlightedContent +
"</pre></html>";
421 }
catch (TskCoreException | KeywordSearchModuleException | NoOpenCoreException ex) {
422 logger.log(Level.SEVERE,
"Error getting highlighted text for Solr doc id " + solrObjectId +
", chunkID " + chunkID +
", highlight query: " + highlightField, ex);
423 return Bundle.IndexedText_errorMessage_errorGettingText();
428 public String toString() {
429 return NbBundle.getMessage(this.getClass(),
"HighlightedMatchesSource.toString");
433 public boolean isSearchable() {
438 public String getAnchorPrefix() {
439 return ANCHOR_PREFIX;
443 public int getNumberHits() {
444 if (!this.numberOfHitsPerPage.containsKey(
this.currentPage)) {
447 return this.numberOfHitsPerPage.get(this.currentPage);
465 static String attemptManualHighlighting(SolrDocumentList solrDocumentList, String highlightField, Collection<String> keywords) {
466 if (solrDocumentList.isEmpty()) {
467 return Bundle.IndexedText_errorMessage_errorGettingText();
473 String text = solrDocumentList.get(0).getOrDefault(highlightField,
"").toString();
480 text = StringEscapeUtils.escapeHtml(text);
482 TreeRangeSet<Integer> highlights = TreeRangeSet.create();
485 for (String keyword : keywords) {
487 final String escapedKeyword = StringEscapeUtils.escapeHtml(keyword);
488 int searchOffset = 0;
489 int hitOffset = StringUtils.indexOfIgnoreCase(text, escapedKeyword, searchOffset);
490 while (hitOffset != -1) {
492 searchOffset = hitOffset + escapedKeyword.length();
495 highlights.add(Range.closedOpen(hitOffset, searchOffset));
498 hitOffset = StringUtils.indexOfIgnoreCase(text, escapedKeyword, searchOffset);
502 StringBuilder highlightedText =
new StringBuilder(text);
503 int totalHighLightLengthInserted = 0;
505 for (Range<Integer> highlightRange : highlights.asRanges()) {
506 int hStart = highlightRange.lowerEndpoint();
507 int hEnd = highlightRange.upperEndpoint();
510 highlightedText.insert(hStart + totalHighLightLengthInserted, HIGHLIGHT_PRE);
511 totalHighLightLengthInserted += HIGHLIGHT_PRE.length();
512 highlightedText.insert(hEnd + totalHighLightLengthInserted, HIGHLIGHT_POST);
513 totalHighLightLengthInserted += HIGHLIGHT_POST.length();
516 return highlightedText.toString();
527 private String insertAnchors(String searchableContent) {
528 StringBuilder buf =
new StringBuilder(searchableContent);
529 final String searchToken = HIGHLIGHT_PRE;
530 final int indexSearchTokLen = searchToken.length();
531 final String insertPre =
"<a name='" + ANCHOR_PREFIX;
532 final String insertPost =
"'></a>";
534 int searchOffset = 0;
535 int index = buf.indexOf(searchToken, searchOffset);
537 String insertString = insertPre + Integer.toString(count + 1) + insertPost;
538 int insertStringLen = insertString.length();
539 buf.insert(index, insertString);
540 searchOffset = index + indexSearchTokLen + insertStringLen;
542 index = buf.indexOf(searchToken, searchOffset);
546 this.numberOfHitsPerPage.put(this.currentPage, count);
547 if (this.currentItem() == 0 && this.hasNextItem()) {
551 return buf.toString();