19 package org.sleuthkit.autopsy.recentactivity;
 
   22 import java.io.IOException;
 
   23 import java.io.UnsupportedEncodingException;
 
   24 import java.net.URLDecoder;
 
   25 import java.util.ArrayList;
 
   26 import java.util.Collection;
 
   27 import java.util.List;
 
   28 import java.util.logging.Level;
 
   29 import javax.xml.parsers.DocumentBuilder;
 
   30 import javax.xml.parsers.DocumentBuilderFactory;
 
   31 import javax.xml.parsers.ParserConfigurationException;
 
   32 import org.openide.util.NbBundle;
 
   42 import org.
sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
 
   44 import org.
sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
 
   47 import org.w3c.dom.Document;
 
   48 import org.w3c.dom.NamedNodeMap;
 
   49 import org.w3c.dom.NodeList;
 
   50 import org.xml.sax.SAXException;
 
   62     "cannotBuildXmlParser=Unable to build XML parser: ",
 
   63     "cannotLoadSEUQA=Unable to load Search Engine URL Query Analyzer settings file, SEUQAMappings.xml: ",
 
   64     "cannotParseXml=Unable to parse XML file: ",
 
   65     "# {0} - file name", 
"SearchEngineURLQueryAnalyzer.init.exception.msg=Unable to find {0}." 
   67 class SearchEngineURLQueryAnalyzer extends Extract {
 
   69     private static final Logger logger = Logger.getLogger(SearchEngineURLQueryAnalyzer.class.getName());
 
   70     private static final String XMLFILE = 
"SEUQAMappings.xml"; 
 
   71     private static final String XSDFILE = 
"SearchEngineSchema.xsd"; 
 
   72     private static SearchEngineURLQueryAnalyzer.SearchEngine[] engines;
 
   74     private Content dataSource;
 
   75     private IngestJobContext context;
 
   77     SearchEngineURLQueryAnalyzer() {
 
   78         moduleName = NbBundle.getMessage(ExtractIE.class, 
"SearchEngineURLQueryAnalyzer.moduleName.text");
 
   87         private final String 
key;
 
   90         KeyPair(String key, String keyRegExp) {
 
   92             this.keyRegExp = keyRegExp;
 
   99         String getKeyRegExp() {
 
  112         SearchEngine(String engineName, String domainSubstring, List<KeyPair> keyPairs) {
 
  113             this.engineName = engineName;
 
  114             this.domainSubstring = domainSubstring;
 
  115             this.keyPairs = keyPairs;
 
  123         String getEngineName() {
 
  127         String getDomainSubstring() {
 
  128             return domainSubstring;
 
  140         List<KeyPair> getKeys() {
 
  141             return this.keyPairs;
 
  148                 split = split + 
"[ " + kp.getKey() + 
" :: " + kp.getKeyRegExp() + 
" ]" + 
", ";
 
  150             return NbBundle.getMessage(this.getClass(), 
"SearchEngineURLQueryAnalyzer.toString",
 
  151                     engineName, domainSubstring, count, split);
 
  159             File f = 
new File(path);
 
  160             logger.log(Level.INFO, 
"Load successful"); 
 
  161             DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
 
  162             DocumentBuilder db = dbf.newDocumentBuilder();
 
  163             xmlinput = db.parse(f);
 
  165             if (!
XMLUtil.
xmlIsValid(xmlinput, SearchEngineURLQueryAnalyzer.class, XSDFILE)) {
 
  166                 logger.log(Level.WARNING, 
"Error loading Search Engines: could not validate against [" + XSDFILE + 
"], results may not be accurate."); 
 
  169         } 
catch (IOException e) {
 
  170             throw new IngestModuleException(Bundle.cannotLoadSEUQA() + e.getLocalizedMessage(), e); 
 
  171         } 
catch (ParserConfigurationException pce) {
 
  172             throw new IngestModuleException(Bundle.cannotBuildXmlParser() + pce.getLocalizedMessage(), pce); 
 
  173         } 
catch (SAXException sxe) {
 
  174             throw new IngestModuleException(Bundle.cannotParseXml() + sxe.getLocalizedMessage(), sxe); 
 
  177         NodeList nlist = xmlinput.getElementsByTagName(
"SearchEngine"); 
 
  178         SearchEngineURLQueryAnalyzer.SearchEngine[] listEngines = 
new SearchEngineURLQueryAnalyzer.SearchEngine[nlist.getLength()];
 
  179         for (
int i = 0; i < nlist.getLength(); i++) {
 
  180             NamedNodeMap nnm = nlist.item(i).getAttributes();
 
  182             String EngineName = nnm.getNamedItem(
"engine").getNodeValue(); 
 
  183             String EnginedomainSubstring = nnm.getNamedItem(
"domainSubstring").getNodeValue(); 
 
  184             List<KeyPair> keys = 
new ArrayList<>();
 
  186             NodeList listSplits = xmlinput.getElementsByTagName(
"splitToken"); 
 
  187             for (
int k = 0; k < listSplits.getLength(); k++) {
 
  188                 if (listSplits.item(k).getParentNode().getAttributes().getNamedItem(
"engine").getNodeValue().equals(EngineName)) { 
 
  189                     keys.add(
new KeyPair(listSplits.item(k).getAttributes().getNamedItem(
"plainToken").getNodeValue(), listSplits.item(k).getAttributes().getNamedItem(
"regexToken").getNodeValue())); 
 
  193             SearchEngineURLQueryAnalyzer.SearchEngine Se = 
new SearchEngineURLQueryAnalyzer.SearchEngine(EngineName, EnginedomainSubstring, keys);
 
  196         engines = listEngines;
 
  209     private static SearchEngineURLQueryAnalyzer.SearchEngine getSearchEngineFromUrl(String domain) {
 
  210         if (engines == null) {
 
  213         for (SearchEngine engine : engines) {
 
  214             if (domain.contains(engine.getDomainSubstring())) {
 
  228     private String extractSearchEngineQuery(SearchEngineURLQueryAnalyzer.SearchEngine eng, String url) {
 
  231         for (KeyPair kp : eng.getKeys()) {
 
  232             if (url.contains(kp.getKey())) {
 
  233                 x = getValue(url, kp.getKeyRegExp());
 
  238             String decoded = URLDecoder.decode(x, 
"UTF-8"); 
 
  240         } 
catch (UnsupportedEncodingException uee) { 
 
  241             logger.log(Level.FINE, 
"Error during URL decoding ", uee); 
 
  256     private String getValue(String url, String regExpKey) {
 
  266         String v = regExpKey;
 
  268         if (regExpKey.contains(
"\\?")) {
 
  269             v = regExpKey.replace(
"\\?", 
"?");
 
  271         String[] sp = url.split(v);
 
  272         if (sp.length >= 2) {
 
  273             if (sp[sp.length - 1].contains(
"&")) {
 
  274                 value = sp[sp.length - 1].split(
"&")[0];
 
  276                 value = sp[sp.length - 1];
 
  282     private void findSearchQueries() {
 
  283         int totalQueries = 0;
 
  286             Collection<BlackboardArtifact> listArtifacts = currentCase.getSleuthkitCase().getMatchingArtifacts(
"WHERE (blackboard_artifacts.artifact_type_id = '" + ARTIFACT_TYPE.TSK_WEB_BOOKMARK.getTypeID() 
 
  287                     + 
"' OR blackboard_artifacts.artifact_type_id = '" + ARTIFACT_TYPE.TSK_WEB_HISTORY.getTypeID() + 
"') ");  
 
  288             logger.log(Level.INFO, 
"Processing {0} blackboard artifacts.", listArtifacts.size()); 
 
  290             for (BlackboardArtifact artifact : listArtifacts) {
 
  291                 if (context.dataSourceIngestIsCancelled()) {
 
  297                 String searchEngineDomain = 
"";
 
  299                 long last_accessed = -1;
 
  301                 long fileId = artifact.getObjectID();
 
  302                 boolean isFromSource = tskCase.isFileFromSource(dataSource, fileId);
 
  308                 AbstractFile file = tskCase.getAbstractFileById(fileId);
 
  313                 SearchEngineURLQueryAnalyzer.SearchEngine se = null;
 
  315                 Collection<BlackboardAttribute> listAttributes = currentCase.getSleuthkitCase().getMatchingAttributes(
"WHERE artifact_id = " + artifact.getArtifactID()); 
 
  317                 for (BlackboardAttribute attribute : listAttributes) {
 
  318                     if (attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL.getTypeID()) {
 
  319                         final String urlString = attribute.getValueString();
 
  320                         se = getSearchEngineFromUrl(urlString);
 
  325                         query = extractSearchEngineQuery(se, attribute.getValueString());
 
  326                         if (query.equals(
"")) 
 
  331                     } 
else if (attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PROG_NAME.getTypeID()) {
 
  332                         browser = attribute.getValueString();
 
  333                     } 
else if (attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN.getTypeID()) {
 
  334                         searchEngineDomain = attribute.getValueString();
 
  335                     } 
else if (attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED.getTypeID()) {
 
  336                         last_accessed = attribute.getValueLong();
 
  340                 if (se != null && !query.equals(
"")) { 
 
  342                     if (last_accessed == -1) {
 
  345                     Collection<BlackboardAttribute> bbattributes = 
new ArrayList<>();
 
  346                     bbattributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN,
 
  347                             NbBundle.getMessage(
this.getClass(),
 
  348                                     "SearchEngineURLQueryAnalyzer.parentModuleName"), searchEngineDomain));
 
  349                     bbattributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_TEXT,
 
  350                             NbBundle.getMessage(
this.getClass(),
 
  351                                     "SearchEngineURLQueryAnalyzer.parentModuleName"), query));
 
  352                     bbattributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PROG_NAME,
 
  353                             NbBundle.getMessage(
this.getClass(),
 
  354                                     "SearchEngineURLQueryAnalyzer.parentModuleName"), browser));
 
  355                     bbattributes.add(
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED,
 
  356                             NbBundle.getMessage(
this.getClass(),
 
  357                                     "SearchEngineURLQueryAnalyzer.parentModuleName"), last_accessed));
 
  358                     this.addArtifact(ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY, file, bbattributes);
 
  363         } 
catch (TskCoreException e) {
 
  364             logger.log(Level.SEVERE, 
"Encountered error retrieving artifacts for search engine queries", e); 
 
  366             if (context.dataSourceIngestIsCancelled()) {
 
  367                 logger.info(
"Operation terminated by user."); 
 
  369             IngestServices.getInstance().fireModuleDataEvent(
new ModuleDataEvent(
 
  370                     NbBundle.getMessage(
this.getClass(), 
"SearchEngineURLQueryAnalyzer.parentModuleName.noSpace"),
 
  371                     BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY));
 
  372             logger.log(Level.INFO, 
"Extracted {0} queries from the blackboard", totalQueries); 
 
  376     private String getTotals() {
 
  378         if (engines == null) {
 
  381         for (SearchEngineURLQueryAnalyzer.SearchEngine se : engines) {
 
  382             total += se.getEngineName() + 
" : " + se.getTotal() + 
"\n";
 
  388     public void process(Content dataSource, IngestJobContext context) {
 
  389         this.dataSource = dataSource;
 
  390         this.context = context;
 
  391         this.findSearchQueries();
 
  392         logger.log(Level.INFO, 
"Search Engine stats: \n{0}", getTotals()); 
 
  396     void init() throws IngestModuleException {
 
  398             PlatformUtil.extractResourceToUserConfigDir(SearchEngineURLQueryAnalyzer.class, XMLFILE, 
true);
 
  399         } 
catch (IOException e) {
 
  400             String message = Bundle.SearchEngineURLQueryAnalyzer_init_exception_msg(XMLFILE);
 
  401             logger.log(Level.SEVERE, message, e);
 
  402             throw new IngestModuleException(message, e);
 
  408     public void complete() {
 
  409         logger.info(
"Search Engine URL Query Analyzer has completed."); 
 
final String domainSubstring
final List< KeyPair > keyPairs
static< T > boolean xmlIsValid(DOMSource xmlfile, Class< T > clazz, String schemaFile)