19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.util.ArrayList;
22 import java.util.HashMap;
23 import java.util.LinkedHashMap;
24 import java.util.List;
26 import java.util.TreeSet;
27 import java.util.logging.Level;
29 import org.openide.util.NbBundle;
31 import org.apache.solr.client.solrj.SolrQuery;
32 import org.apache.solr.client.solrj.SolrRequest.METHOD;
33 import org.apache.solr.client.solrj.response.QueryResponse;
42 class HighlightedTextMarkup
implements TextMarkup, TextMarkupLookup {
44 private static final Logger logger = Logger.getLogger(HighlightedTextMarkup.class.getName());
45 private static final String HIGHLIGHT_PRE =
"<span style='background:yellow'>";
46 private static final String HIGHLIGHT_POST =
"</span>";
47 private static final String ANCHOR_PREFIX = HighlightedTextMarkup.class.
getName() +
"_";
49 private long objectId;
50 private String keywordHitQuery;
51 private Server solrServer;
52 private int numberPages;
53 private int currentPage;
54 private boolean isRegex =
false;
55 private boolean group =
true;
56 private boolean hasChunks =
false;
58 private LinkedHashMap<Integer, Integer> hitsPages;
60 private HashMap<Integer, Integer> pagesToHits;
61 private List<Integer> pages;
62 private QueryResults hits = null;
63 private String originalQuery = null;
64 private boolean isPageInfoLoaded =
false;
65 private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
67 HighlightedTextMarkup(
long objectId, String keywordHitQuery,
boolean isRegex) {
68 this.objectId = objectId;
69 this.keywordHitQuery = keywordHitQuery;
70 this.isRegex = isRegex;
72 this.hitsPages =
new LinkedHashMap<>();
73 this.pages =
new ArrayList<>();
74 this.pagesToHits =
new HashMap<>();
76 this.solrServer = KeywordSearch.getServer();
84 HighlightedTextMarkup(
long objectId, String solrQuery,
boolean isRegex, String originalQuery) {
85 this(objectId, solrQuery, isRegex);
86 this.originalQuery = originalQuery;
89 HighlightedTextMarkup(
long objectId, String solrQuery,
boolean isRegex, QueryResults hits) {
90 this(objectId, solrQuery, isRegex);
94 HighlightedTextMarkup(
long objectId, String solrQuery,
boolean isRegex,
boolean group, QueryResults hits) {
95 this(objectId, solrQuery, isRegex, hits);
102 private void loadPageInfo() {
103 if (isPageInfoLoaded) {
107 this.numberPages = solrServer.queryNumFileChunks(this.objectId);
108 }
catch (KeywordSearchModuleException ex) {
109 logger.log(Level.WARNING,
"Could not get number pages for content: " +
this.objectId);
111 }
catch (NoOpenCoreException ex) {
112 logger.log(Level.WARNING,
"Could not get number pages for content: " +
this.objectId);
116 if (this.numberPages == 0) {
130 String queryStr = KeywordSearchUtil.escapeLuceneQuery(this.keywordHitQuery);
133 queryStr = Server.Schema.CONTENT_WS +
":" +
"\"" + queryStr +
"\"";
136 Keyword keywordQuery =
new Keyword(queryStr, !isRegex);
137 List<Keyword> keywords =
new ArrayList<>();
138 keywords.add(keywordQuery);
139 KeywordSearchQuery chunksQuery =
new LuceneQuery(
new KeywordList(keywords), keywordQuery);
141 chunksQuery.addFilter(
new KeywordQueryFilter(FilterType.CHUNK,
this.objectId));
143 hits = chunksQuery.performQuery();
144 }
catch (NoOpenCoreException ex) {
145 logger.log(Level.INFO,
"Could not get chunk info and get highlights", ex);
151 TreeSet<Integer> pagesSorted =
new TreeSet<>();
152 for (Keyword k : hits.getKeywords()) {
153 for (KeywordHit hit : hits.getResults(k)) {
154 int chunkID = hit.getChunkId();
155 if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) {
156 pagesSorted.add(chunkID);
162 if (pagesSorted.isEmpty()) {
163 this.currentPage = 0;
165 this.currentPage = pagesSorted.first();
168 for (Integer page : pagesSorted) {
169 hitsPages.put(page, 0);
171 pagesToHits.put(page, 0);
176 this.numberPages = 1;
177 this.currentPage = 1;
180 pagesToHits.put(1, 0);
182 isPageInfoLoaded =
true;
186 private HighlightedTextMarkup() {
190 return this.objectId;
194 public int getNumberPages() {
195 return this.numberPages;
201 public int getCurrentPage() {
202 return this.currentPage;
206 public boolean hasNextPage() {
207 final int numPages = pages.size();
208 int idx = pages.indexOf(this.currentPage);
209 return idx < numPages - 1;
214 public boolean hasPreviousPage() {
215 int idx = pages.indexOf(this.currentPage);
221 public int nextPage() {
222 if (!hasNextPage()) {
223 throw new IllegalStateException(
224 NbBundle.getMessage(
this.getClass(),
"HighlightedMatchesSource.nextPage.exception.msg"));
226 int idx = pages.indexOf(this.currentPage);
227 currentPage = pages.get(idx + 1);
232 public int previousPage() {
233 if (!hasPreviousPage()) {
234 throw new IllegalStateException(
235 NbBundle.getMessage(
this.getClass(),
"HighlightedMatchesSource.previousPage.exception.msg"));
237 int idx = pages.indexOf(this.currentPage);
238 currentPage = pages.get(idx - 1);
243 public boolean hasNextItem() {
244 if (!this.pagesToHits.containsKey(currentPage)) {
247 return this.pagesToHits.get(currentPage) < this.hitsPages.get(currentPage);
251 public boolean hasPreviousItem() {
252 if (!this.pagesToHits.containsKey(currentPage)) {
255 return this.pagesToHits.get(currentPage) > 1;
259 public int nextItem() {
260 if (!hasNextItem()) {
261 throw new IllegalStateException(
262 NbBundle.getMessage(
this.getClass(),
"HighlightedMatchesSource.nextItem.exception.msg"));
264 int cur = pagesToHits.get(currentPage) + 1;
265 pagesToHits.put(currentPage, cur);
270 public int previousItem() {
271 if (!hasPreviousItem()) {
272 throw new IllegalStateException(
273 NbBundle.getMessage(
this.getClass(),
"HighlightedMatchesSource.previousItem.exception.msg"));
275 int cur = pagesToHits.get(currentPage) - 1;
276 pagesToHits.put(currentPage, cur);
281 public int currentItem() {
282 if (!this.pagesToHits.containsKey(currentPage)) {
285 return pagesToHits.get(currentPage);
289 public LinkedHashMap<Integer, Integer> getHitsPages() {
290 return this.hitsPages;
294 public String getMarkup() {
297 String highLightField = null;
299 String highlightQuery = keywordHitQuery;
302 highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
305 final String findSubstr = LuceneQuery.HIGHLIGHT_FIELD_REGEX +
":";
306 if (!highlightQuery.contains(findSubstr)) {
307 highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
310 highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
312 highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
315 SolrQuery q =
new SolrQuery();
316 q.setShowDebugInfo(DEBUG);
318 String queryStr = null;
321 StringBuilder sb =
new StringBuilder();
322 sb.append(highLightField).append(
":");
326 sb.append(highlightQuery);
330 queryStr = sb.toString();
334 queryStr = KeywordSearchUtil.quoteQuery(highlightQuery);
337 q.setQuery(queryStr);
339 String contentIdStr = Long.toString(this.objectId);
341 contentIdStr +=
"_" + Integer.toString(this.currentPage);
345 final String filterQuery = Server.Schema.ID.toString() +
":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
346 q.addFilterQuery(filterQuery);
347 q.addHighlightField(highLightField);
351 q.setHighlightFragsize(0);
354 q.setParam(
"hl.useFastVectorHighlighter",
"on");
355 q.setParam(
"hl.tag.pre", HIGHLIGHT_PRE);
356 q.setParam(
"hl.tag.post", HIGHLIGHT_POST);
357 q.setParam(
"hl.fragListBuilder",
"single");
360 q.setParam(
"hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
363 QueryResponse response = solrServer.query(q, METHOD.POST);
364 Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
366 Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
367 if (responseHighlightID == null) {
368 return NbBundle.getMessage(this.getClass(),
"HighlightedMatchesSource.getMarkup.noMatchMsg");
371 List<String> contentHighlights = responseHighlightID.get(highLightField);
372 if (contentHighlights == null) {
373 return NbBundle.getMessage(this.getClass(),
"HighlightedMatchesSource.getMarkup.noMatchMsg");
376 String highlightedContent = contentHighlights.get(0).trim();
377 highlightedContent = insertAnchors(highlightedContent);
380 return "<html><pre>" + highlightedContent +
"</pre></html>";
382 }
catch (NoOpenCoreException ex) {
383 logger.log(Level.WARNING,
"Couldn't query markup for page: " + currentPage, ex);
385 }
catch (KeywordSearchModuleException ex) {
386 logger.log(Level.WARNING,
"Could not query markup for page: " + currentPage, ex);
392 public String toString() {
393 return NbBundle.getMessage(this.getClass(),
"HighlightedMatchesSource.toString");
397 public boolean isSearchable() {
402 public String getAnchorPrefix() {
403 return ANCHOR_PREFIX;
407 public int getNumberHits() {
408 if (!this.hitsPages.containsKey(
this.currentPage)) {
411 return this.hitsPages.get(this.currentPage);
414 private String insertAnchors(String searchableContent) {
415 int searchOffset = 0;
418 StringBuilder buf =
new StringBuilder(searchableContent);
420 final String searchToken = HIGHLIGHT_PRE;
421 final int indexSearchTokLen = searchToken.length();
422 final String insertPre =
"<a name='" + ANCHOR_PREFIX;
423 final String insertPost =
"'></a>";
425 while ((index = buf.indexOf(searchToken, searchOffset)) >= 0) {
426 String insertString = insertPre + Integer.toString(count + 1) + insertPost;
427 int insertStringLen = insertString.length();
428 buf.insert(index, insertString);
429 searchOffset = index + indexSearchTokLen + insertStringLen;
434 this.hitsPages.put(this.currentPage, count);
435 if (this.currentItem() == 0 && this.hasNextItem()) {
439 return buf.toString();
442 private static TextMarkupLookup instance = null;
446 public static synchronized TextMarkupLookup getDefault() {
447 if (instance == null) {
448 instance =
new HighlightedTextMarkup();
455 public TextMarkupLookup createInstance(
long objectId, String keywordHitQuery,
boolean isRegex, String originalQuery) {
456 return new HighlightedTextMarkup(objectId, keywordHitQuery, isRegex, originalQuery);