19 package org.sleuthkit.autopsy.recentactivity;
22 import java.io.IOException;
23 import java.io.UnsupportedEncodingException;
24 import java.net.URLDecoder;
25 import java.util.ArrayList;
26 import java.util.Collection;
27 import java.util.List;
28 import java.util.logging.Level;
29 import javax.xml.parsers.DocumentBuilder;
30 import javax.xml.parsers.DocumentBuilderFactory;
31 import javax.xml.parsers.ParserConfigurationException;
32 import org.openide.util.NbBundle;
47 import org.w3c.dom.Document;
48 import org.w3c.dom.NamedNodeMap;
49 import org.w3c.dom.NodeList;
50 import org.xml.sax.SAXException;
61 class SearchEngineURLQueryAnalyzer
extends Extract {
63 private static final Logger logger = Logger.getLogger(SearchEngineURLQueryAnalyzer.class.getName());
64 private static final String XMLFILE =
"SEUQAMappings.xml";
65 private static final String XSDFILE =
"SearchEngineSchema.xsd";
66 private static SearchEngineURLQueryAnalyzer.SearchEngine[] engines;
68 private Content dataSource;
69 private IngestJobContext context;
71 SearchEngineURLQueryAnalyzer() {
72 moduleName = NbBundle.getMessage(ExtractIE.class,
"SearchEngineURLQueryAnalyzer.moduleName.text");
80 private final String
key;
83 KeyPair (String key, String keyRegExp) {
93 String getKeyRegExp() {
105 SearchEngine(String engineName, String domainSubstring, List<KeyPair> keyPairs) {
116 String getEngineName() {
120 String getDomainSubstring() {
132 List<KeyPair> getKeys() {
140 split = split +
"[ " + kp.getKey() +
" :: " + kp.getKeyRegExp() +
" ]" +
", ";
142 return NbBundle.getMessage(this.getClass(),
"SearchEngineURLQueryAnalyzer.toString",
143 engineName, domainSubstring, count, split);
151 File f =
new File(path);
152 logger.log(Level.INFO,
"Load successful");
153 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
154 DocumentBuilder db = dbf.newDocumentBuilder();
155 xmlinput = db.parse(f);
157 if (!
XMLUtil.
xmlIsValid(xmlinput, SearchEngineURLQueryAnalyzer.class, XSDFILE)) {
158 logger.log(Level.WARNING,
"Error loading Search Engines: could not validate against [" + XSDFILE +
"], results may not be accurate.");
161 }
catch (IOException e) {
162 throw new IngestModuleException(
"Was not able to load SEUQAMappings.xml: " + e.getLocalizedMessage());
163 }
catch (ParserConfigurationException pce) {
164 throw new IngestModuleException(
"Unable to build XML parser: " + pce.getLocalizedMessage());
165 }
catch (SAXException sxe) {
166 throw new IngestModuleException(
"Unable to parse XML file: " + sxe.getLocalizedMessage());
169 NodeList nlist = xmlinput.getElementsByTagName(
"SearchEngine");
170 SearchEngineURLQueryAnalyzer.SearchEngine[] listEngines =
new SearchEngineURLQueryAnalyzer.SearchEngine[nlist.getLength()];
171 for (
int i = 0; i < nlist.getLength(); i++) {
172 NamedNodeMap nnm = nlist.item(i).getAttributes();
174 String EngineName = nnm.getNamedItem(
"engine").getNodeValue();
175 String EnginedomainSubstring = nnm.getNamedItem(
"domainSubstring").getNodeValue();
176 List<KeyPair> keys =
new ArrayList<>();
179 NodeList listSplits = xmlinput.getElementsByTagName(
"splitToken");
180 for (
int k = 0; k < listSplits.getLength(); k++) {
181 if (listSplits.item(k).getParentNode().getAttributes().getNamedItem(
"engine").getNodeValue().equals(EngineName)) {
182 keys.add(
new KeyPair(listSplits.item(k).getAttributes().getNamedItem(
"plainToken").getNodeValue(), listSplits.item(k).getAttributes().getNamedItem(
"regexToken").getNodeValue()));
186 SearchEngineURLQueryAnalyzer.SearchEngine Se =
new SearchEngineURLQueryAnalyzer.SearchEngine(EngineName, EnginedomainSubstring, keys);
190 engines = listEngines;
201 private static SearchEngineURLQueryAnalyzer.SearchEngine getSearchEngineFromUrl(String domain) {
202 if (engines == null) {
205 for (SearchEngine engine : engines) {
206 if (domain.contains(engine.getDomainSubstring())) {
221 private String extractSearchEngineQuery(SearchEngineURLQueryAnalyzer.SearchEngine eng, String url) {
224 for (KeyPair kp : eng.getKeys()) {
225 if (url.contains(kp.getKey())) {
226 x = getValue(url, kp.getKeyRegExp());
231 String decoded = URLDecoder.decode(x,
"UTF-8");
233 }
catch (UnsupportedEncodingException uee) {
234 logger.log(Level.FINE,
"Error during URL decoding ", uee);
248 private String getValue(String url, String regExpKey) {
256 String v = regExpKey;
258 if (regExpKey.contains(
"\\?")) {
259 v = regExpKey.replace(
"\\?",
"?");
261 String[] sp = url.split(v);
262 if (sp.length >= 2) {
263 if (sp[sp.length - 1].contains(
"&")) {
264 value = sp[sp.length - 1].split(
"&")[0];
266 value = sp[sp.length - 1];
272 private void findSearchQueries() {
273 int totalQueries = 0;
276 Collection<BlackboardArtifact> listArtifacts = currentCase.getSleuthkitCase().getMatchingArtifacts(
"WHERE (`artifact_type_id` = '" + ARTIFACT_TYPE.TSK_WEB_BOOKMARK.getTypeID()
277 +
"' OR `artifact_type_id` = '" + ARTIFACT_TYPE.TSK_WEB_HISTORY.getTypeID() +
"') ");
278 logger.log(Level.INFO,
"Processing {0} blackboard artifacts.", listArtifacts.size());
280 for (BlackboardArtifact artifact : listArtifacts) {
281 if (context.dataSourceIngestIsCancelled()) {
287 String searchEngineDomain =
"";
289 long last_accessed = -1;
291 long fileId = artifact.getObjectID();
292 boolean isFromSource = tskCase.isFileFromSource(dataSource, fileId);
298 AbstractFile file = tskCase.getAbstractFileById(fileId);
303 SearchEngineURLQueryAnalyzer.SearchEngine se = null;
305 Collection<BlackboardAttribute> listAttributes = currentCase.getSleuthkitCase().getMatchingAttributes(
"Where `artifact_id` = " + artifact.getArtifactID());
307 for (BlackboardAttribute attribute : listAttributes) {
308 if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL.getTypeID()) {
309 final String urlString = attribute.getValueString();
310 se = getSearchEngineFromUrl(urlString);
314 query = extractSearchEngineQuery(se, attribute.getValueString());
315 if (query.equals(
""))
318 }
else if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PROG_NAME.getTypeID()) {
319 browser = attribute.getValueString();
320 }
else if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN.getTypeID()) {
321 searchEngineDomain = attribute.getValueString();
322 }
else if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED.getTypeID()) {
323 last_accessed = attribute.getValueLong();
327 if (se != null && !query.equals(
"")) {
328 Collection<BlackboardAttribute> bbattributes =
new ArrayList<>();
329 bbattributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN.getTypeID(),
330 NbBundle.getMessage(this.getClass(),
331 "SearchEngineURLQueryAnalyzer.parentModuleName"), searchEngineDomain));
332 bbattributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_TEXT.getTypeID(),
333 NbBundle.getMessage(this.getClass(),
334 "SearchEngineURLQueryAnalyzer.parentModuleName"), query));
335 bbattributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PROG_NAME.getTypeID(),
336 NbBundle.getMessage(this.getClass(),
337 "SearchEngineURLQueryAnalyzer.parentModuleName"), browser));
338 bbattributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED.getTypeID(),
339 NbBundle.getMessage(this.getClass(),
340 "SearchEngineURLQueryAnalyzer.parentModuleName"), last_accessed));
341 this.addArtifact(ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY, file, bbattributes);
346 }
catch (TskCoreException e) {
347 logger.log(Level.SEVERE,
"Encountered error retrieving artifacts for search engine queries", e);
349 if (context.dataSourceIngestIsCancelled()) {
350 logger.info(
"Operation terminated by user.");
352 IngestServices.getInstance().fireModuleDataEvent(
new ModuleDataEvent(
353 NbBundle.getMessage(
this.getClass(),
"SearchEngineURLQueryAnalyzer.parentModuleName.noSpace"),
354 BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY));
355 logger.log(Level.INFO,
"Extracted {0} queries from the blackboard", totalQueries);
359 private String getTotals() {
361 if (engines == null) {
364 for (SearchEngineURLQueryAnalyzer.SearchEngine se : engines) {
365 total += se.getEngineName() +
" : " + se.getTotal() +
"\n";
371 public void process(Content dataSource, IngestJobContext context) {
372 this.dataSource = dataSource;
373 this.context = context;
374 this.findSearchQueries();
375 logger.log(Level.INFO,
"Search Engine stats: \n{0}", getTotals());
379 void init() throws IngestModuleException {
381 PlatformUtil.extractResourceToUserConfigDir(SearchEngineURLQueryAnalyzer.class, XMLFILE,
true);
382 }
catch (IOException e) {
383 String message = NbBundle
384 .getMessage(this.getClass(),
"SearchEngineURLQueryAnalyzer.init.exception.msg", XMLFILE);
385 logger.log(Level.SEVERE, message, e);
386 throw new IngestModuleException(message);
394 public void complete() {
395 logger.info(
"Search Engine URL Query Analyzer has completed.");
final String domainSubstring
final List< KeyPair > keyPairs
static< T > boolean xmlIsValid(DOMSource xmlfile, Class< T > clazz, String schemaFile)