Autopsy  4.20.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
AccountsText.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2018 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import com.google.common.collect.Iterators;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.Optional;
27 import java.util.Set;
28 import java.util.TreeMap;
29 import java.util.logging.Level;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import javax.annotation.concurrent.GuardedBy;
33 import org.apache.commons.lang3.StringUtils;
34 import org.apache.solr.client.solrj.SolrQuery;
35 import org.apache.solr.client.solrj.SolrRequest.METHOD;
36 import org.apache.solr.client.solrj.response.QueryResponse;
37 import org.openide.util.NbBundle;
40 import org.sleuthkit.datamodel.BlackboardArtifact;
41 import org.sleuthkit.datamodel.BlackboardAttribute;
42 import org.sleuthkit.datamodel.TskCoreException;
43 
53 class AccountsText implements ExtractedText {
54 
55  private static final Logger logger = Logger.getLogger(AccountsText.class.getName());
56  private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
57 
58  private static final String CCN_REGEX = "(%?)(B?)([0-9][ \\-]*?){12,19}(\\^?)";
59 
60  private static final String HIGHLIGHT_PRE = "<span style='background:yellow'>"; //NON-NLS
61  private static final String ANCHOR_NAME_PREFIX = AccountsText.class.getName() + "_";
62 
63  private static final String INSERT_PREFIX = "<a name='" + ANCHOR_NAME_PREFIX; //NON-NLS
64  private static final String INSERT_POSTFIX = "'></a>$0"; //$0 will insert current regex match //NON-NLS
65  private static final Pattern ANCHOR_DETECTION_PATTERN = Pattern.compile(HIGHLIGHT_PRE);
66 
67  private static final BlackboardAttribute.Type TSK_KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
68  private static final BlackboardAttribute.Type TSK_CARD_NUMBER = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER);
69  private static final BlackboardAttribute.Type TSK_KEYWORD = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD);
70 
71  private static final String FIELD = Server.Schema.CONTENT_STR.toString();
72 
73  private final Server solrServer = KeywordSearch.getServer();
74 
75  private final long solrObjectId;
76  private final Collection<? extends BlackboardArtifact> artifacts;
77  private final Set<String> accountNumbers = new HashSet<>();
78  private final String title;
79 
80  @GuardedBy("this")
81  private boolean isPageInfoLoaded = false;
82  private int numberPagesForFile = 0;
83  private Integer currentPage = 0;
84 
88  private final TreeMap<Integer, Integer> numberOfHitsPerPage = new TreeMap<>();
89 
94  private final Set<Integer> pages = numberOfHitsPerPage.keySet();
95 
99  private final HashMap<Integer, Integer> currentHitPerPage = new HashMap<>();
100 
101  AccountsText(long objectID, BlackboardArtifact artifact) {
102  this(objectID, Arrays.asList(artifact));
103  }
104 
105  @NbBundle.Messages({
106  "AccountsText.creditCardNumber=Credit Card Number",
107  "AccountsText.creditCardNumbers=Credit Card Numbers"})
108  AccountsText(long objectID, Collection<? extends BlackboardArtifact> artifacts) {
109  this.solrObjectId = objectID;
110  this.artifacts = artifacts;
111  title = artifacts.size() == 1
112  ? Bundle.AccountsText_creditCardNumber()
113  : Bundle.AccountsText_creditCardNumbers();
114  }
115 
116  long getObjectId() {
117  return this.solrObjectId;
118  }
119 
120  @Override
121  public int getNumberPages() {
122  return this.numberPagesForFile;
123  }
124 
125  @Override
126  public int getCurrentPage() {
127  return this.currentPage;
128  }
129 
130  @Override
131  public boolean hasNextPage() {
132  return getIndexOfCurrentPage() < pages.size() - 1;
133 
134  }
135 
136  @Override
137  public boolean hasPreviousPage() {
138  return getIndexOfCurrentPage() > 0;
139  }
140 
141  @Override
142  @NbBundle.Messages("AccountsText.nextPage.exception.msg=No next page.")
143  public int nextPage() {
144  if (hasNextPage()) {
145  currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() + 1);
146  return currentPage;
147  } else {
148  throw new IllegalStateException(Bundle.AccountsText_nextPage_exception_msg());
149  }
150  }
151 
152  @Override
153  @NbBundle.Messages("AccountsText.previousPage.exception.msg=No previous page.")
154  public int previousPage() {
155  if (hasPreviousPage()) {
156  currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() - 1);
157  return currentPage;
158  } else {
159  throw new IllegalStateException(Bundle.AccountsText_previousPage_exception_msg());
160  }
161  }
162 
163  private int getIndexOfCurrentPage() {
164  return Iterators.indexOf(pages.iterator(), this.currentPage::equals);
165  }
166 
167  @Override
168  public boolean hasNextItem() {
169  if (this.currentHitPerPage.containsKey(currentPage)) {
170  return this.currentHitPerPage.get(currentPage) < this.numberOfHitsPerPage.get(currentPage);
171  } else {
172  return false;
173  }
174  }
175 
176  @Override
177  public boolean hasPreviousItem() {
178  if (this.currentHitPerPage.containsKey(currentPage)) {
179  return this.currentHitPerPage.get(currentPage) > 1;
180  } else {
181  return false;
182  }
183  }
184 
185  @Override
186  @NbBundle.Messages("AccountsText.nextItem.exception.msg=No next item.")
187  public int nextItem() {
188  if (hasNextItem()) {
189  return currentHitPerPage.merge(currentPage, 1, Integer::sum);
190  } else {
191  throw new IllegalStateException(Bundle.AccountsText_nextItem_exception_msg());
192  }
193  }
194 
195  @Override
196  @NbBundle.Messages("AccountsText.previousItem.exception.msg=No previous item.")
197  public int previousItem() {
198  if (hasPreviousItem()) {
199  return currentHitPerPage.merge(currentPage, -1, Integer::sum);
200  } else {
201  throw new IllegalStateException(Bundle.AccountsText_previousItem_exception_msg());
202  }
203  }
204 
205  @Override
206  public int currentItem() {
207  return currentHitPerPage.getOrDefault(currentPage, 0);
208  }
209 
214  synchronized private void loadPageInfo() throws IllegalStateException, TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
215  if (isPageInfoLoaded) {
216  return;
217  }
218 
219  this.numberPagesForFile = solrServer.queryNumFileChunks(this.solrObjectId);
220 
221  boolean needsQuery = false;
222 
223  for (BlackboardArtifact artifact : artifacts) {
224  if (solrObjectId != artifact.getObjectID()) {
225  throw new IllegalStateException("not all artifacts are from the same object!");
226  }
227 
228  //add both the canonical form and the form in the text as accountNumbers to highlight.
229  BlackboardAttribute attribute = artifact.getAttribute(TSK_KEYWORD);
230  this.accountNumbers.add(attribute.getValueString());
231  attribute = artifact.getAttribute(TSK_CARD_NUMBER);
232  this.accountNumbers.add(attribute.getValueString());
233 
234  //if the chunk id is present just use that.
235  Optional<Integer> chunkID =
236  Optional.ofNullable(artifact.getAttribute(TSK_KEYWORD_SEARCH_DOCUMENT_ID))
237  .map(BlackboardAttribute::getValueString)
238  .map(String::trim)
239  .map(kwsdocID -> StringUtils.substringAfterLast(kwsdocID, Server.CHUNK_ID_SEPARATOR))
240  .map(Integer::valueOf);
241  if (chunkID.isPresent()) {
242  numberOfHitsPerPage.put(chunkID.get(), 0);
243  currentHitPerPage.put(chunkID.get(), 0);
244  } else {
245  //otherwise we need to do a query to figure out the paging.
246  needsQuery = true;
247  // we can't break the for loop here because we need to accumulate all the accountNumbers
248  }
249  }
250 
251  if (needsQuery) {
252  // Run a query to figure out which chunks for the current object have hits.
253  Keyword queryKeyword = new Keyword(CCN_REGEX, false, false);
254  KeywordSearchQuery chunksQuery = KeywordSearchUtil.getQueryForKeyword(queryKeyword, new KeywordList(Arrays.asList(queryKeyword)));
255  chunksQuery.addFilter(new KeywordQueryFilter(KeywordQueryFilter.FilterType.CHUNK, this.solrObjectId));
256  //load the chunks/pages from the result of the query.
257  loadPageInfoFromHits(chunksQuery.performQuery());
258  }
259 
260  this.currentPage = pages.stream().findFirst().orElse(1);
261 
262  isPageInfoLoaded = true;
263  }
264 
270  synchronized private void loadPageInfoFromHits(QueryResults hits) {
271  //organize the hits by page, filter as needed
272  for (Keyword k : hits.getKeywords()) {
273  for (KeywordHit hit : hits.getResults(k)) {
274  int chunkID = hit.getChunkId();
275  if (chunkID != 0 && this.solrObjectId == hit.getSolrObjectId()) {
276  String hitString = hit.getHit();
277  if (accountNumbers.stream().anyMatch(hitString::contains)) {
278  numberOfHitsPerPage.put(chunkID, 0); //unknown number of matches in the page
279  currentHitPerPage.put(chunkID, 0); //set current hit to 0th
280  }
281  }
282  }
283  }
284  }
285 
286  @Override
287  public String getText() {
288  try {
289  loadPageInfo(); //inits once
290 
291  SolrQuery q = new SolrQuery();
292  q.setShowDebugInfo(DEBUG); //debug
293 
294  String contentIdStr = this.solrObjectId + Server.CHUNK_ID_SEPARATOR + this.currentPage;
295  final String filterQuery = Server.Schema.ID.toString() + ":" + contentIdStr;
296  //set the documentID filter
297  q.setQuery(filterQuery);
298  q.setFields(FIELD);
299 
300  QueryResponse queryResponse = solrServer.query(q, METHOD.POST);
301 
302  String highlightedText =
303  HighlightedText.attemptManualHighlighting(
304  queryResponse.getResults(),
305  Server.Schema.CONTENT_STR.toString(),
306  accountNumbers
307  ).trim();
308 
309  highlightedText = insertAnchors(highlightedText);
310 
311  // extracted content (minus highlight tags) is HTML-escaped
312  return "<html><pre>" + highlightedText + "</pre></html>"; //NON-NLS
313  } catch (Exception ex) {
314  logger.log(Level.SEVERE, "Error getting highlighted text for Solr doc id " + this.solrObjectId + ", chunkID " + this.currentPage, ex); //NON-NLS
315  return Bundle.ExtractedText_errorMessage_errorGettingText();
316  }
317  }
318 
327  private String insertAnchors(String searchableContent) {
328  /*
329  * use regex matcher to iterate over occurences of HIGHLIGHT_PRE, and
330  * prepend them with an anchor tag.
331  */
332  Matcher m = ANCHOR_DETECTION_PATTERN.matcher(searchableContent);
333  StringBuffer sb = new StringBuffer(searchableContent.length());
334  int count = 0;
335  while (m.find()) {
336  count++;
337  m.appendReplacement(sb, INSERT_PREFIX + count + INSERT_POSTFIX);
338  }
339  m.appendTail(sb);
340  //store total hits for this page, now that we know it
341  this.numberOfHitsPerPage.put(this.currentPage, count);
342  if (this.currentItem() == 0 && this.hasNextItem()) {
343  this.nextItem();
344  }
345  return sb.toString();
346  }
347 
348  @Override
349  public String toString() {
350  return title;
351  }
352 
353  @Override
354  public boolean isSearchable() {
355  return true;
356  }
357 
358  @Override
359  public String getAnchorPrefix() {
360  return ANCHOR_NAME_PREFIX;
361  }
362 
363  @Override
364  public int getNumberHits() {
365  return numberOfHitsPerPage.getOrDefault(currentPage, 0);
366  }
367 }
static Version.Type getBuildType()
Definition: Version.java:87
QueryResponse query(SolrQuery sq)
Definition: Server.java:1709
synchronized static Logger getLogger(String name)
Definition: Logger.java:124

Copyright © 2012-2022 Basis Technology. Generated on: Tue Aug 1 2023
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.