19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.util.ArrayList;
22 import java.util.HashMap;
23 import java.util.LinkedHashMap;
24 import java.util.List;
26 import java.util.TreeSet;
27 import java.util.logging.Level;
29 import org.openide.util.NbBundle;
31 import org.apache.solr.client.solrj.SolrQuery;
32 import org.apache.solr.client.solrj.SolrRequest.METHOD;
33 import org.apache.solr.client.solrj.response.QueryResponse;
42 class HighlightedText
implements IndexedText, TextMarkupLookup {
44 private static final Logger logger = Logger.getLogger(HighlightedText.class.getName());
45 private static final String HIGHLIGHT_PRE =
"<span style='background:yellow'>";
46 private static final String HIGHLIGHT_POST =
"</span>";
47 private static final String ANCHOR_PREFIX = HighlightedText.class.getName() +
"_";
49 private long objectId;
50 private String keywordHitQuery;
51 private Server solrServer;
52 private int numberPages;
53 private int currentPage;
54 private boolean isRegex =
false;
55 private boolean group =
true;
56 private boolean hasChunks =
false;
58 private LinkedHashMap<Integer, Integer> hitsPages;
60 private HashMap<Integer, Integer> pagesToHits;
61 private List<Integer> pages;
62 private QueryResults hits = null;
63 private String originalQuery = null;
64 private boolean isPageInfoLoaded =
false;
65 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
67 HighlightedText(
long objectId, String keywordHitQuery,
boolean isRegex) {
68 this.objectId = objectId;
69 this.keywordHitQuery = keywordHitQuery;
70 this.isRegex = isRegex;
72 this.hitsPages =
new LinkedHashMap<>();
73 this.pages =
new ArrayList<>();
74 this.pagesToHits =
new HashMap<>();
76 this.solrServer = KeywordSearch.getServer();
84 HighlightedText(
long objectId, String solrQuery,
boolean isRegex, String originalQuery) {
85 this(objectId, solrQuery, isRegex);
86 this.originalQuery = originalQuery;
89 HighlightedText(
long objectId, String solrQuery,
boolean isRegex, QueryResults hits) {
90 this(objectId, solrQuery, isRegex);
94 HighlightedText(
long objectId, String solrQuery,
boolean isRegex,
boolean group, QueryResults hits) {
95 this(objectId, solrQuery, isRegex, hits);
103 private void loadPageInfo() {
104 if (isPageInfoLoaded) {
108 this.numberPages = solrServer.queryNumFileChunks(this.objectId);
109 }
catch (KeywordSearchModuleException ex) {
110 logger.log(Level.WARNING,
"Could not get number pages for content: " +
this.objectId);
112 }
catch (NoOpenCoreException ex) {
113 logger.log(Level.WARNING,
"Could not get number pages for content: " +
this.objectId);
117 if (this.numberPages == 0) {
132 String queryStr = KeywordSearchUtil.escapeLuceneQuery(this.keywordHitQuery);
135 queryStr = Server.Schema.CONTENT_WS +
":" +
"\"" + queryStr +
"\"";
138 Keyword keywordQuery =
new Keyword(queryStr, !isRegex);
139 List<Keyword> keywords =
new ArrayList<>();
140 keywords.add(keywordQuery);
141 KeywordSearchQuery chunksQuery =
new LuceneQuery(
new KeywordList(keywords), keywordQuery);
143 chunksQuery.addFilter(
new KeywordQueryFilter(FilterType.CHUNK,
this.objectId));
145 hits = chunksQuery.performQuery();
146 }
catch (NoOpenCoreException ex) {
147 logger.log(Level.INFO,
"Could not get chunk info and get highlights", ex);
153 TreeSet<Integer> pagesSorted =
new TreeSet<>();
154 for (Keyword k : hits.getKeywords()) {
155 for (KeywordHit hit : hits.getResults(k)) {
156 int chunkID = hit.getChunkId();
157 if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) {
158 pagesSorted.add(chunkID);
164 if (pagesSorted.isEmpty()) {
165 this.currentPage = 0;
167 this.currentPage = pagesSorted.first();
170 for (Integer page : pagesSorted) {
171 hitsPages.put(page, 0);
173 pagesToHits.put(page, 0);
178 this.numberPages = 1;
179 this.currentPage = 1;
182 pagesToHits.put(1, 0);
184 isPageInfoLoaded =
true;
188 private HighlightedText() {
192 return this.objectId;
196 public int getNumberPages() {
197 return this.numberPages;
203 public int getCurrentPage() {
204 return this.currentPage;
208 public boolean hasNextPage() {
209 final int numPages = pages.size();
210 int idx = pages.indexOf(this.currentPage);
211 return idx < numPages - 1;
216 public boolean hasPreviousPage() {
217 int idx = pages.indexOf(this.currentPage);
223 public int nextPage() {
224 if (!hasNextPage()) {
225 throw new IllegalStateException(
226 NbBundle.getMessage(
this.getClass(),
"HighlightedMatchesSource.nextPage.exception.msg"));
228 int idx = pages.indexOf(this.currentPage);
229 currentPage = pages.get(idx + 1);
234 public int previousPage() {
235 if (!hasPreviousPage()) {
236 throw new IllegalStateException(
237 NbBundle.getMessage(
this.getClass(),
"HighlightedMatchesSource.previousPage.exception.msg"));
239 int idx = pages.indexOf(this.currentPage);
240 currentPage = pages.get(idx - 1);
245 public boolean hasNextItem() {
246 if (!this.pagesToHits.containsKey(currentPage)) {
249 return this.pagesToHits.get(currentPage) < this.hitsPages.get(currentPage);
253 public boolean hasPreviousItem() {
254 if (!this.pagesToHits.containsKey(currentPage)) {
257 return this.pagesToHits.get(currentPage) > 1;
261 public int nextItem() {
262 if (!hasNextItem()) {
263 throw new IllegalStateException(
264 NbBundle.getMessage(
this.getClass(),
"HighlightedMatchesSource.nextItem.exception.msg"));
266 int cur = pagesToHits.get(currentPage) + 1;
267 pagesToHits.put(currentPage, cur);
272 public int previousItem() {
273 if (!hasPreviousItem()) {
274 throw new IllegalStateException(
275 NbBundle.getMessage(
this.getClass(),
"HighlightedMatchesSource.previousItem.exception.msg"));
277 int cur = pagesToHits.get(currentPage) - 1;
278 pagesToHits.put(currentPage, cur);
283 public int currentItem() {
284 if (!this.pagesToHits.containsKey(currentPage)) {
287 return pagesToHits.get(currentPage);
291 public LinkedHashMap<Integer, Integer> getHitsPages() {
292 return this.hitsPages;
296 public String getText() {
299 String highLightField = null;
301 String highlightQuery = keywordHitQuery;
304 highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
307 final String findSubstr = LuceneQuery.HIGHLIGHT_FIELD_REGEX +
":";
308 if (!highlightQuery.contains(findSubstr)) {
309 highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
312 highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
314 highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
317 SolrQuery q =
new SolrQuery();
318 q.setShowDebugInfo(DEBUG);
320 String queryStr = null;
323 StringBuilder sb =
new StringBuilder();
324 sb.append(highLightField).append(
":");
328 sb.append(highlightQuery);
332 queryStr = sb.toString();
336 queryStr = KeywordSearchUtil.quoteQuery(highlightQuery);
339 q.setQuery(queryStr);
341 String contentIdStr = Long.toString(this.objectId);
343 contentIdStr +=
"_" + Integer.toString(this.currentPage);
346 final String filterQuery = Server.Schema.ID.toString() +
":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
347 q.addFilterQuery(filterQuery);
348 q.addHighlightField(highLightField);
352 q.setHighlightFragsize(0);
355 q.setParam(
"hl.useFastVectorHighlighter",
"on");
356 q.setParam(
"hl.tag.pre", HIGHLIGHT_PRE);
357 q.setParam(
"hl.tag.post", HIGHLIGHT_POST);
358 q.setParam(
"hl.fragListBuilder",
"single");
361 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
364 QueryResponse response = solrServer.query(q, METHOD.POST);
365 Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
367 Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
368 if (responseHighlightID == null) {
369 return NbBundle.getMessage(this.getClass(),
"HighlightedMatchesSource.getMarkup.noMatchMsg");
372 List<String> contentHighlights = responseHighlightID.get(highLightField);
373 if (contentHighlights == null) {
374 return NbBundle.getMessage(this.getClass(),
"HighlightedMatchesSource.getMarkup.noMatchMsg");
377 String highlightedContent = contentHighlights.get(0).trim();
378 highlightedContent = insertAnchors(highlightedContent);
380 return "<html><pre>" + highlightedContent +
"</pre></html>";
382 }
catch (NoOpenCoreException | KeywordSearchModuleException ex) {
383 return NbBundle.getMessage(this.getClass(),
"HighlightedMatchesSource.getMarkup.queryFailedMsg");
388 public String toString() {
389 return NbBundle.getMessage(this.getClass(),
"HighlightedMatchesSource.toString");
393 public boolean isSearchable() {
398 public String getAnchorPrefix() {
399 return ANCHOR_PREFIX;
403 public int getNumberHits() {
404 if (!this.hitsPages.containsKey(
this.currentPage)) {
407 return this.hitsPages.get(this.currentPage);
410 private String insertAnchors(String searchableContent) {
411 int searchOffset = 0;
414 StringBuilder buf =
new StringBuilder(searchableContent);
416 final String searchToken = HIGHLIGHT_PRE;
417 final int indexSearchTokLen = searchToken.length();
418 final String insertPre =
"<a name='" + ANCHOR_PREFIX;
419 final String insertPost =
"'></a>";
421 while ((index = buf.indexOf(searchToken, searchOffset)) >= 0) {
422 String insertString = insertPre + Integer.toString(count + 1) + insertPost;
423 int insertStringLen = insertString.length();
424 buf.insert(index, insertString);
425 searchOffset = index + indexSearchTokLen + insertStringLen;
430 this.hitsPages.put(this.currentPage, count);
431 if (this.currentItem() == 0 && this.hasNextItem()) {
435 return buf.toString();
438 private static TextMarkupLookup instance = null;
442 public static synchronized TextMarkupLookup getDefault() {
443 if (instance == null) {
444 instance =
new HighlightedText();
451 public TextMarkupLookup createInstance(
long objectId, String keywordHitQuery,
boolean isRegex, String originalQuery) {
452 return new HighlightedText(objectId, keywordHitQuery, isRegex, originalQuery);