19 package org.sleuthkit.autopsy.keywordsearch;
21 import com.google.common.collect.Iterators;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.HashMap;
25 import java.util.HashSet;
26 import java.util.Optional;
28 import java.util.TreeMap;
29 import java.util.logging.Level;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import javax.annotation.concurrent.GuardedBy;
33 import org.apache.commons.lang3.StringUtils;
34 import org.apache.solr.client.solrj.SolrQuery;
35 import org.apache.solr.client.solrj.SolrRequest.METHOD;
36 import org.apache.solr.client.solrj.response.QueryResponse;
37 import org.openide.util.NbBundle;
53 class AccountsText
implements ExtractedText {
58 private static final String CCN_REGEX =
"(%?)(B?)([0-9][ \\-]*?){12,19}(\\^?)";
60 private static final String HIGHLIGHT_PRE =
"<span style='background:yellow'>";
61 private static final String ANCHOR_NAME_PREFIX = AccountsText.class.getName() +
"_";
63 private static final String INSERT_PREFIX =
"<a name='" + ANCHOR_NAME_PREFIX;
64 private static final String INSERT_POSTFIX =
"'></a>$0";
65 private static final Pattern ANCHOR_DETECTION_PATTERN = Pattern.compile(HIGHLIGHT_PRE);
67 private static final BlackboardAttribute.Type TSK_KEYWORD_SEARCH_DOCUMENT_ID =
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
68 private static final BlackboardAttribute.Type TSK_CARD_NUMBER =
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER);
69 private static final BlackboardAttribute.Type TSK_KEYWORD =
new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD);
71 private static final String FIELD =
Server.
Schema.CONTENT_STR.toString();
75 private final long solrObjectId;
76 private final Collection<? extends BlackboardArtifact> artifacts;
77 private final Set<String> accountNumbers =
new HashSet<>();
78 private final String title;
81 private boolean isPageInfoLoaded =
false;
82 private int numberPagesForFile = 0;
83 private Integer currentPage = 0;
88 private final TreeMap<Integer, Integer> numberOfHitsPerPage =
new TreeMap<>();
94 private final Set<Integer> pages = numberOfHitsPerPage.keySet();
99 private final HashMap<Integer, Integer> currentHitPerPage =
new HashMap<>();
101 AccountsText(
long objectID, BlackboardArtifact artifact) {
102 this(objectID, Arrays.asList(artifact));
106 "AccountsText.creditCardNumber=Credit Card Number",
107 "AccountsText.creditCardNumbers=Credit Card Numbers"})
108 AccountsText(
long objectID, Collection<? extends BlackboardArtifact> artifacts) {
109 this.solrObjectId = objectID;
110 this.artifacts = artifacts;
111 title = artifacts.size() == 1
112 ? Bundle.AccountsText_creditCardNumber()
113 : Bundle.AccountsText_creditCardNumbers();
117 return this.solrObjectId;
121 public int getNumberPages() {
122 return this.numberPagesForFile;
126 public int getCurrentPage() {
127 return this.currentPage;
131 public boolean hasNextPage() {
132 return getIndexOfCurrentPage() < pages.size() - 1;
137 public boolean hasPreviousPage() {
138 return getIndexOfCurrentPage() > 0;
142 @NbBundle.Messages(
"AccountsText.nextPage.exception.msg=No next page.")
143 public int nextPage() {
145 currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() + 1);
148 throw new IllegalStateException(Bundle.AccountsText_nextPage_exception_msg());
153 @NbBundle.Messages(
"AccountsText.previousPage.exception.msg=No previous page.")
154 public int previousPage() {
155 if (hasPreviousPage()) {
156 currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() - 1);
159 throw new IllegalStateException(Bundle.AccountsText_previousPage_exception_msg());
163 private int getIndexOfCurrentPage() {
164 return Iterators.indexOf(pages.iterator(), this.currentPage::equals);
168 public boolean hasNextItem() {
169 if (this.currentHitPerPage.containsKey(currentPage)) {
170 return this.currentHitPerPage.get(currentPage) < this.numberOfHitsPerPage.get(currentPage);
177 public boolean hasPreviousItem() {
178 if (this.currentHitPerPage.containsKey(currentPage)) {
179 return this.currentHitPerPage.get(currentPage) > 1;
186 @NbBundle.Messages(
"AccountsText.nextItem.exception.msg=No next item.")
187 public int nextItem() {
189 return currentHitPerPage.merge(currentPage, 1, Integer::sum);
191 throw new IllegalStateException(Bundle.AccountsText_nextItem_exception_msg());
196 @NbBundle.Messages(
"AccountsText.previousItem.exception.msg=No previous item.")
197 public int previousItem() {
198 if (hasPreviousItem()) {
199 return currentHitPerPage.merge(currentPage, -1, Integer::sum);
201 throw new IllegalStateException(Bundle.AccountsText_previousItem_exception_msg());
206 public int currentItem() {
207 return currentHitPerPage.getOrDefault(currentPage, 0);
215 if (isPageInfoLoaded) {
221 boolean needsQuery =
false;
223 for (BlackboardArtifact artifact : artifacts) {
224 if (solrObjectId != artifact.getObjectID()) {
225 throw new IllegalStateException(
"not all artifacts are from the same object!");
229 BlackboardAttribute attribute = artifact.getAttribute(TSK_KEYWORD);
230 this.accountNumbers.add(attribute.getValueString());
231 attribute = artifact.getAttribute(TSK_CARD_NUMBER);
232 this.accountNumbers.add(attribute.getValueString());
235 Optional<Integer> chunkID =
236 Optional.ofNullable(artifact.getAttribute(TSK_KEYWORD_SEARCH_DOCUMENT_ID))
237 .map(BlackboardAttribute::getValueString)
240 .map(Integer::valueOf);
241 if (chunkID.isPresent()) {
242 numberOfHitsPerPage.put(chunkID.get(), 0);
243 currentHitPerPage.put(chunkID.get(), 0);
254 KeywordSearchQuery chunksQuery = KeywordSearchUtil.getQueryForKeyword(queryKeyword,
new KeywordList(Arrays.asList(queryKeyword)));
255 chunksQuery.addFilter(
new KeywordQueryFilter(KeywordQueryFilter.FilterType.CHUNK,
this.solrObjectId));
257 loadPageInfoFromHits(chunksQuery.performQuery());
260 this.currentPage = pages.stream().findFirst().orElse(1);
262 isPageInfoLoaded =
true;
270 synchronized private void loadPageInfoFromHits(QueryResults hits) {
272 for (
Keyword k : hits.getKeywords()) {
273 for (KeywordHit hit : hits.getResults(k)) {
274 int chunkID = hit.getChunkId();
275 if (chunkID != 0 && this.solrObjectId == hit.getSolrObjectId()) {
276 String hitString = hit.getHit();
277 if (accountNumbers.stream().anyMatch(hitString::contains)) {
278 numberOfHitsPerPage.put(chunkID, 0);
279 currentHitPerPage.put(chunkID, 0);
287 public String getText() {
291 SolrQuery q =
new SolrQuery();
292 q.setShowDebugInfo(DEBUG);
295 final String filterQuery =
Server.
Schema.ID.toString() +
":" + contentIdStr;
297 q.setQuery(filterQuery);
300 QueryResponse queryResponse = solrServer.
query(q, METHOD.POST);
302 String highlightedText =
303 HighlightedText.attemptManualHighlighting(
304 queryResponse.getResults(),
309 highlightedText = insertAnchors(highlightedText);
312 return "<html><pre>" + highlightedText +
"</pre></html>";
313 }
catch (Exception ex) {
314 logger.log(Level.SEVERE,
"Error getting highlighted text for Solr doc id " +
this.solrObjectId +
", chunkID " +
this.currentPage, ex);
315 return Bundle.ExtractedText_errorMessage_errorGettingText();
327 private String insertAnchors(String searchableContent) {
332 Matcher m = ANCHOR_DETECTION_PATTERN.matcher(searchableContent);
333 StringBuffer sb =
new StringBuffer(searchableContent.length());
337 m.appendReplacement(sb, INSERT_PREFIX + count + INSERT_POSTFIX);
341 this.numberOfHitsPerPage.put(this.currentPage, count);
342 if (this.currentItem() == 0 && this.hasNextItem()) {
345 return sb.toString();
349 public String toString() {
354 public boolean isSearchable() {
359 public String getAnchorPrefix() {
360 return ANCHOR_NAME_PREFIX;
364 public int getNumberHits() {
365 return numberOfHitsPerPage.getOrDefault(currentPage, 0);
static Version.Type getBuildType()
static synchronized Server getServer()
static final String CHUNK_ID_SEPARATOR
QueryResponse query(SolrQuery sq)
synchronized static Logger getLogger(String name)
int queryNumFileChunks(long fileID)