Autopsy  4.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
HighlightedText.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2015 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import java.util.ArrayList;
22 import java.util.HashMap;
23 import java.util.LinkedHashMap;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.TreeSet;
27 import java.util.logging.Level;
28 
29 import org.openide.util.NbBundle;
31 import org.apache.solr.client.solrj.SolrQuery;
32 import org.apache.solr.client.solrj.SolrRequest.METHOD;
33 import org.apache.solr.client.solrj.response.QueryResponse;
37 
42 class HighlightedText implements IndexedText, TextMarkupLookup {
43 
44  private static final Logger logger = Logger.getLogger(HighlightedText.class.getName());
45  private static final String HIGHLIGHT_PRE = "<span style='background:yellow'>"; //NON-NLS
46  private static final String HIGHLIGHT_POST = "</span>"; //NON-NLS
47  private static final String ANCHOR_PREFIX = HighlightedText.class.getName() + "_";
48 
49  private long objectId;
50  private String keywordHitQuery;
51  private Server solrServer;
52  private int numberPages;
53  private int currentPage;
54  private boolean isRegex = false;
55  private boolean group = true;
56  private boolean hasChunks = false;
57  //stores all pages/chunks that have hits as key, and number of hits as a value, or 0 if yet unknown
58  private LinkedHashMap<Integer, Integer> hitsPages;
59  //stored page num -> current hit number mapping
60  private HashMap<Integer, Integer> pagesToHits;
61  private List<Integer> pages;
62  private QueryResults hits = null; //original hits that may get passed in
63  private String originalQuery = null; //or original query if hits are not available
64  private boolean isPageInfoLoaded = false;
65  private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
66 
67  HighlightedText(long objectId, String keywordHitQuery, boolean isRegex) {
68  this.objectId = objectId;
69  this.keywordHitQuery = keywordHitQuery;
70  this.isRegex = isRegex;
71  this.group = true;
72  this.hitsPages = new LinkedHashMap<>();
73  this.pages = new ArrayList<>();
74  this.pagesToHits = new HashMap<>();
75 
76  this.solrServer = KeywordSearch.getServer();
77  this.numberPages = 0;
78  this.currentPage = 0;
79  //hits are unknown
80 
81  }
82 
83  //when the results are not known and need to requery to get hits
84  HighlightedText(long objectId, String solrQuery, boolean isRegex, String originalQuery) {
85  this(objectId, solrQuery, isRegex);
86  this.originalQuery = originalQuery;
87  }
88 
89  HighlightedText(long objectId, String solrQuery, boolean isRegex, QueryResults hits) {
90  this(objectId, solrQuery, isRegex);
91  this.hits = hits;
92  }
93 
94  HighlightedText(long objectId, String solrQuery, boolean isRegex, boolean group, QueryResults hits) {
95  this(objectId, solrQuery, isRegex, hits);
96  this.group = group;
97  }
98 
103  private void loadPageInfo() {
104  if (isPageInfoLoaded) {
105  return;
106  }
107  try {
108  this.numberPages = solrServer.queryNumFileChunks(this.objectId);
109  } catch (KeywordSearchModuleException ex) {
110  logger.log(Level.WARNING, "Could not get number pages for content: " + this.objectId); //NON-NLS
111  return;
112  } catch (NoOpenCoreException ex) {
113  logger.log(Level.WARNING, "Could not get number pages for content: " + this.objectId); //NON-NLS
114  return;
115  }
116 
117  if (this.numberPages == 0) {
118  hasChunks = false;
119  } else {
120  hasChunks = true;
121  }
122 
123  //if has chunks, get pages with hits
124  if (hasChunks) {
125  //extract pages of interest, sorted
126 
127  /*
128  * If this is being called from the artifacts / dir tree, then we
129  * need to perform the search to get the highlights.
130  */
131  if (hits == null) {
132  String queryStr = KeywordSearchUtil.escapeLuceneQuery(this.keywordHitQuery);
133  if (isRegex) {
134  //use white-space sep. field to get exact matches only of regex query result
135  queryStr = Server.Schema.CONTENT_WS + ":" + "\"" + queryStr + "\"";
136  }
137 
138  Keyword keywordQuery = new Keyword(queryStr, !isRegex);
139  List<Keyword> keywords = new ArrayList<>();
140  keywords.add(keywordQuery);
141  KeywordSearchQuery chunksQuery = new LuceneQuery(new KeywordList(keywords), keywordQuery);
142 
143  chunksQuery.addFilter(new KeywordQueryFilter(FilterType.CHUNK, this.objectId));
144  try {
145  hits = chunksQuery.performQuery();
146  } catch (NoOpenCoreException ex) {
147  logger.log(Level.INFO, "Could not get chunk info and get highlights", ex); //NON-NLS
148  return;
149  }
150  }
151 
152  //organize the hits by page, filter as needed
153  TreeSet<Integer> pagesSorted = new TreeSet<>();
154  for (Keyword k : hits.getKeywords()) {
155  for (KeywordHit hit : hits.getResults(k)) {
156  int chunkID = hit.getChunkId();
157  if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) {
158  pagesSorted.add(chunkID);
159  }
160  }
161  }
162 
163  //set page to first page having highlights
164  if (pagesSorted.isEmpty()) {
165  this.currentPage = 0;
166  } else {
167  this.currentPage = pagesSorted.first();
168  }
169 
170  for (Integer page : pagesSorted) {
171  hitsPages.put(page, 0); //unknown number of matches in the page
172  pages.add(page);
173  pagesToHits.put(page, 0); //set current hit to 0th
174  }
175 
176  } else {
177  //no chunks
178  this.numberPages = 1;
179  this.currentPage = 1;
180  hitsPages.put(1, 0);
181  pages.add(1);
182  pagesToHits.put(1, 0);
183  }
184  isPageInfoLoaded = true;
185  }
186 
187  //constructor for dummy singleton factory instance for Lookup
188  private HighlightedText() {
189  }
190 
191  long getObjectId() {
192  return this.objectId;
193  }
194 
195  @Override
196  public int getNumberPages() {
197  return this.numberPages;
198  //return number of pages that have hits
199  //return this.hitsPages.keySet().size();
200  }
201 
202  @Override
203  public int getCurrentPage() {
204  return this.currentPage;
205  }
206 
207  @Override
208  public boolean hasNextPage() {
209  final int numPages = pages.size();
210  int idx = pages.indexOf(this.currentPage);
211  return idx < numPages - 1;
212 
213  }
214 
215  @Override
216  public boolean hasPreviousPage() {
217  int idx = pages.indexOf(this.currentPage);
218  return idx > 0;
219 
220  }
221 
222  @Override
223  public int nextPage() {
224  if (!hasNextPage()) {
225  throw new IllegalStateException(
226  NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.nextPage.exception.msg"));
227  }
228  int idx = pages.indexOf(this.currentPage);
229  currentPage = pages.get(idx + 1);
230  return currentPage;
231  }
232 
233  @Override
234  public int previousPage() {
235  if (!hasPreviousPage()) {
236  throw new IllegalStateException(
237  NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.previousPage.exception.msg"));
238  }
239  int idx = pages.indexOf(this.currentPage);
240  currentPage = pages.get(idx - 1);
241  return currentPage;
242  }
243 
244  @Override
245  public boolean hasNextItem() {
246  if (!this.pagesToHits.containsKey(currentPage)) {
247  return false;
248  }
249  return this.pagesToHits.get(currentPage) < this.hitsPages.get(currentPage);
250  }
251 
252  @Override
253  public boolean hasPreviousItem() {
254  if (!this.pagesToHits.containsKey(currentPage)) {
255  return false;
256  }
257  return this.pagesToHits.get(currentPage) > 1;
258  }
259 
260  @Override
261  public int nextItem() {
262  if (!hasNextItem()) {
263  throw new IllegalStateException(
264  NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.nextItem.exception.msg"));
265  }
266  int cur = pagesToHits.get(currentPage) + 1;
267  pagesToHits.put(currentPage, cur);
268  return cur;
269  }
270 
271  @Override
272  public int previousItem() {
273  if (!hasPreviousItem()) {
274  throw new IllegalStateException(
275  NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.previousItem.exception.msg"));
276  }
277  int cur = pagesToHits.get(currentPage) - 1;
278  pagesToHits.put(currentPage, cur);
279  return cur;
280  }
281 
282  @Override
283  public int currentItem() {
284  if (!this.pagesToHits.containsKey(currentPage)) {
285  return 0;
286  }
287  return pagesToHits.get(currentPage);
288  }
289 
290  @Override
291  public LinkedHashMap<Integer, Integer> getHitsPages() {
292  return this.hitsPages;
293  }
294 
295  @Override
296  public String getText() {
297  loadPageInfo(); //inits once
298 
299  String highLightField = null;
300 
301  String highlightQuery = keywordHitQuery;
302 
303  if (isRegex) {
304  highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
305  //escape special lucene chars if not already escaped (if not a compound query)
306  //TODO a better way to mark it a compound highlight query
307  final String findSubstr = LuceneQuery.HIGHLIGHT_FIELD_REGEX + ":";
308  if (!highlightQuery.contains(findSubstr)) {
309  highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
310  }
311  } else {
312  highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
313  //escape special lucene chars always for literal queries query
314  highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
315  }
316 
317  SolrQuery q = new SolrQuery();
318  q.setShowDebugInfo(DEBUG); //debug
319 
320  String queryStr = null;
321 
322  if (isRegex) {
323  StringBuilder sb = new StringBuilder();
324  sb.append(highLightField).append(":");
325  if (group) {
326  sb.append("\"");
327  }
328  sb.append(highlightQuery);
329  if (group) {
330  sb.append("\"");
331  }
332  queryStr = sb.toString();
333  } else {
334  //use default field, simplifies query
335  //always force grouping/quotes
336  queryStr = KeywordSearchUtil.quoteQuery(highlightQuery);
337  }
338 
339  q.setQuery(queryStr);
340 
341  String contentIdStr = Long.toString(this.objectId);
342  if (hasChunks) {
343  contentIdStr += "_" + Integer.toString(this.currentPage);
344  }
345 
346  final String filterQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
347  q.addFilterQuery(filterQuery);
348  q.addHighlightField(highLightField); //for exact highlighting, try content_ws field (with stored="true" in Solr schema)
349 
350  //q.setHighlightSimplePre(HIGHLIGHT_PRE); //original highlighter only
351  //q.setHighlightSimplePost(HIGHLIGHT_POST); //original highlighter only
352  q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH
353 
354  //tune the highlighter
355  q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
356  q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
357  q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
358  q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS
359 
360  //docs says makes sense for the original Highlighter only, but not really
361  q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS
362 
363  try {
364  QueryResponse response = solrServer.query(q, METHOD.POST);
365  Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
366 
367  Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
368  if (responseHighlightID == null) {
369  return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");
370 
371  }
372  List<String> contentHighlights = responseHighlightID.get(highLightField);
373  if (contentHighlights == null) {
374  return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");
375  } else {
376  // extracted content (minus highlight tags) is HTML-escaped
377  String highlightedContent = contentHighlights.get(0).trim();
378  highlightedContent = insertAnchors(highlightedContent);
379 
380  return "<html><pre>" + highlightedContent + "</pre></html>"; //NON-NLS
381  }
382  } catch (NoOpenCoreException | KeywordSearchModuleException ex) {
383  return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.queryFailedMsg");
384  }
385  }
386 
387  @Override
388  public String toString() {
389  return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.toString");
390  }
391 
392  @Override
393  public boolean isSearchable() {
394  return true;
395  }
396 
397  @Override
398  public String getAnchorPrefix() {
399  return ANCHOR_PREFIX;
400  }
401 
402  @Override
403  public int getNumberHits() {
404  if (!this.hitsPages.containsKey(this.currentPage)) {
405  return 0;
406  }
407  return this.hitsPages.get(this.currentPage);
408  }
409 
410  private String insertAnchors(String searchableContent) {
411  int searchOffset = 0;
412  int index = -1;
413 
414  StringBuilder buf = new StringBuilder(searchableContent);
415 
416  final String searchToken = HIGHLIGHT_PRE;
417  final int indexSearchTokLen = searchToken.length();
418  final String insertPre = "<a name='" + ANCHOR_PREFIX; //NON-NLS
419  final String insertPost = "'></a>"; //NON-NLS
420  int count = 0;
421  while ((index = buf.indexOf(searchToken, searchOffset)) >= 0) {
422  String insertString = insertPre + Integer.toString(count + 1) + insertPost;
423  int insertStringLen = insertString.length();
424  buf.insert(index, insertString);
425  searchOffset = index + indexSearchTokLen + insertStringLen; //next offset past this anchor
426  ++count;
427  }
428 
429  //store total hits for this page, now that we know it
430  this.hitsPages.put(this.currentPage, count);
431  if (this.currentItem() == 0 && this.hasNextItem()) {
432  this.nextItem();
433  }
434 
435  return buf.toString();
436  }
437  //dummy instance for Lookup only
438  private static TextMarkupLookup instance = null;
439 
440  //getter of the singleton dummy instance solely for Lookup purpose
441  //this instance does not actually work with Solr
442  public static synchronized TextMarkupLookup getDefault() {
443  if (instance == null) {
444  instance = new HighlightedText();
445  }
446  return instance;
447  }
448 
449  @Override
450  // factory method to create an instance of this object
451  public TextMarkupLookup createInstance(long objectId, String keywordHitQuery, boolean isRegex, String originalQuery) {
452  return new HighlightedText(objectId, keywordHitQuery, isRegex, originalQuery);
453  }
454 }

Copyright © 2012-2015 Basis Technology. Generated on: Wed Apr 6 2016
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.