19 package org.sleuthkit.autopsy.ingest;
 
   21 import java.util.ArrayList;
 
   22 import java.util.Collection;
 
   23 import java.util.Comparator;
 
   24 import java.util.HashSet;
 
   25 import java.util.Iterator;
 
   26 import java.util.List;
 
   28 import java.util.TreeSet;
 
   29 import java.util.concurrent.BlockingDeque;
 
   30 import java.util.concurrent.LinkedBlockingDeque;
 
   31 import java.util.concurrent.LinkedBlockingQueue;
 
   32 import java.util.logging.Level;
 
   33 import java.util.regex.Matcher;
 
   34 import java.util.regex.Pattern;
 
   46 final class IngestTasksScheduler {
 
   48     private static final Logger logger = Logger.getLogger(IngestTasksScheduler.class.getName());
 
   49     private static final int FAT_NTFS_FLAGS = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT12.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT16.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT32.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_NTFS.getValue();
 
   50     private static IngestTasksScheduler instance;
 
   58     private final LinkedBlockingQueue<DataSourceIngestTask> pendingDataSourceTasks;
 
   59     private final DataSourceIngestTaskQueue dataSourceTasksDispenser;
 
   84     private final TreeSet<FileIngestTask> rootDirectoryTasks;
 
   85     private final List<FileIngestTask> directoryTasks;
 
   86     private final BlockingDeque<FileIngestTask> pendingFileTasks;
 
   87     private final FileIngestTaskQueue fileTasksDispenser;
 
  100     private final Set<IngestTask> tasksInProgress;
 
  105     synchronized static IngestTasksScheduler getInstance() {
 
  106         if (IngestTasksScheduler.instance == null) {
 
  107             IngestTasksScheduler.instance = 
new IngestTasksScheduler();
 
  109         return IngestTasksScheduler.instance;
 
  115     private IngestTasksScheduler() {
 
  116         this.pendingDataSourceTasks = 
new LinkedBlockingQueue<>();
 
  117         this.dataSourceTasksDispenser = 
new DataSourceIngestTaskQueue();
 
  118         this.rootDirectoryTasks = 
new TreeSet<>(
new RootDirectoryTaskComparator());
 
  119         this.directoryTasks = 
new ArrayList<>();
 
  120         this.pendingFileTasks = 
new LinkedBlockingDeque<>();
 
  121         this.fileTasksDispenser = 
new FileIngestTaskQueue();
 
  122         this.tasksInProgress = 
new HashSet<>();
 
  131     IngestTaskQueue getDataSourceIngestTaskQueue() {
 
  132         return this.dataSourceTasksDispenser;
 
  141     IngestTaskQueue getFileIngestTaskQueue() {
 
  142         return this.fileTasksDispenser;
 
  154     synchronized void scheduleIngestTasks(DataSourceIngestJob job) {
 
  155         if (!job.isCancelled()) {
 
  161             this.scheduleDataSourceIngestTask(job);
 
  162             this.scheduleFileIngestTasks(job);
 
  171     synchronized void scheduleDataSourceIngestTask(DataSourceIngestJob job) {
 
  172         if (!job.isCancelled()) {
 
  173             DataSourceIngestTask task = 
new DataSourceIngestTask(job);
 
  174             this.tasksInProgress.add(task);
 
  176                 this.pendingDataSourceTasks.put(task);
 
  177             } 
catch (InterruptedException ex) {
 
  182                 this.tasksInProgress.remove(task);
 
  183                 Thread.currentThread().interrupt();
 
  193     synchronized void scheduleFileIngestTasks(DataSourceIngestJob job) {
 
  194         if (!job.isCancelled()) {
 
  197             List<AbstractFile> topLevelFiles = getTopLevelFiles(job.getDataSource());
 
  198             for (AbstractFile firstLevelFile : topLevelFiles) {
 
  199                 FileIngestTask task = 
new FileIngestTask(job, firstLevelFile);
 
  200                 if (IngestTasksScheduler.shouldEnqueueFileTask(task)) {
 
  201                     this.tasksInProgress.add(task);
 
  202                     this.rootDirectoryTasks.add(task);
 
  205             shuffleFileTaskQueues();
 
  215     synchronized void scheduleFileIngestTask(DataSourceIngestJob job, AbstractFile file) {
 
  216         if (!job.isCancelled()) {
 
  217             FileIngestTask task = 
new FileIngestTask(job, file);
 
  218             if (IngestTasksScheduler.shouldEnqueueFileTask(task)) {
 
  219                 this.tasksInProgress.add(task);
 
  220                 addToPendingFileTasksQueue(task);
 
  231     synchronized void notifyTaskCompleted(IngestTask task) {
 
  232         tasksInProgress.remove(task);
 
  243     synchronized boolean tasksForJobAreCompleted(DataSourceIngestJob job) {
 
  244         for (IngestTask task : tasksInProgress) {
 
  245             if (task.getIngestJob().getId() == job.getId()) {
 
  262     synchronized void cancelPendingTasksForIngestJob(DataSourceIngestJob job) {
 
  271         long jobId = job.getId();
 
  272         this.removeTasksForJob(this.rootDirectoryTasks, jobId);
 
  273         this.removeTasksForJob(this.directoryTasks, jobId);
 
  274         this.shuffleFileTaskQueues();
 
  286     private static List<AbstractFile> getTopLevelFiles(Content dataSource) {
 
  287         List<AbstractFile> topLevelFiles = 
new ArrayList<>();
 
  288         Collection<AbstractFile> rootObjects = dataSource.accept(
new GetRootDirectoryVisitor());
 
  289         if (rootObjects.isEmpty() && dataSource instanceof AbstractFile) {
 
  291             topLevelFiles.add((AbstractFile) dataSource);
 
  293             for (AbstractFile root : rootObjects) {
 
  294                 List<Content> children;
 
  296                     children = root.getChildren();
 
  297                     if (children.isEmpty()) {
 
  300                         topLevelFiles.add(root);
 
  304                         for (Content child : children) {
 
  305                             if (child instanceof AbstractFile) {
 
  306                                 topLevelFiles.add((AbstractFile) child);
 
  310                 } 
catch (TskCoreException ex) {
 
  311                     logger.log(Level.WARNING, 
"Could not get children of root to enqueue: " + root.getId() + 
": " + root.getName(), ex); 
 
  315         return topLevelFiles;
 
  323     synchronized private void shuffleFileTaskQueues() {
 
  330             if (!this.pendingFileTasks.isEmpty()) {
 
  334             if (this.directoryTasks.isEmpty()) {
 
  335                 if (this.rootDirectoryTasks.isEmpty()) {
 
  344                     this.directoryTasks.add(this.rootDirectoryTasks.pollFirst());
 
  350             FileIngestTask directoryTask = this.directoryTasks.remove(this.directoryTasks.size() - 1);
 
  351             if (shouldEnqueueFileTask(directoryTask)) {
 
  352                 addToPendingFileTasksQueue(directoryTask);
 
  354                 this.tasksInProgress.remove(directoryTask);
 
  359             final AbstractFile directory = directoryTask.getFile();
 
  361                 for (Content child : directory.getChildren()) {
 
  362                     if (child instanceof AbstractFile) {
 
  363                         AbstractFile file = (AbstractFile) child;
 
  364                         FileIngestTask childTask = 
new FileIngestTask(directoryTask.getIngestJob(), file);
 
  365                         if (file.hasChildren()) {
 
  371                             this.tasksInProgress.add(childTask);
 
  372                             this.directoryTasks.add(childTask);
 
  373                         } 
else if (shouldEnqueueFileTask(childTask)) {
 
  376                             this.tasksInProgress.add(childTask);
 
  377                             addToPendingFileTasksQueue(childTask);
 
  381             } 
catch (TskCoreException ex) {
 
  382                 String errorMessage = String.format(
"An error occurred getting the children of %s", directory.getName()); 
 
  383                 logger.log(Level.SEVERE, errorMessage, ex);
 
  397     private static boolean shouldEnqueueFileTask(
final FileIngestTask task) {
 
  398         final AbstractFile file = task.getFile();
 
  402         if (!task.getIngestJob().shouldProcessUnallocatedSpace()
 
  403                 && file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)) {
 
  409         String fileName = file.getName();
 
  410         if (fileName.equals(
".") || fileName.equals(
"..")) {
 
  420             TskData.TSK_FS_TYPE_ENUM fsType = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_UNSUPP;
 
  422                 FileSystem fs = f.getFileSystem();
 
  424                     fsType = fs.getFsType();
 
  426             } 
catch (TskCoreException ex) {
 
  427                 logger.log(Level.SEVERE, 
"Error querying file system for " + f, ex); 
 
  431             if ((fsType.getValue() & FAT_NTFS_FLAGS) == 0) {
 
  436             boolean isInRootDir = 
false;
 
  438                 AbstractFile parent = f.getParentDirectory();
 
  439                 isInRootDir = parent.isRoot();
 
  440             } 
catch (TskCoreException ex) {
 
  441                 logger.log(Level.WARNING, 
"Error querying parent directory for" + f.getName(), ex); 
 
  447             if (isInRootDir && f.getMetaAddr() < 32) {
 
  448                 String name = f.getName();
 
  449                 if (name.length() > 0 && name.charAt(0) == 
'$' && name.contains(
":")) {
 
  463     synchronized private void addToPendingFileTasksQueue(FileIngestTask task) {
 
  465             this.pendingFileTasks.putFirst(task);
 
  466         } 
catch (InterruptedException ex) {
 
  471             this.tasksInProgress.remove(task);
 
  472             Thread.currentThread().interrupt();
 
  484     synchronized private void removeTasksForJob(Collection<? extends IngestTask> taskQueue, 
long jobId) {
 
  485         Iterator<? extends IngestTask> iterator = taskQueue.iterator();
 
  486         while (iterator.hasNext()) {
 
  487             IngestTask task = iterator.next();
 
  488             if (task.getIngestJob().getId() == jobId) {
 
  489                 this.tasksInProgress.remove(task);
 
  503     private static int countTasksForJob(Collection<? extends IngestTask> queue, 
long jobId) {
 
  504         Iterator<? extends IngestTask> iterator = queue.iterator();
 
  506         while (iterator.hasNext()) {
 
  507             IngestTask task = (IngestTask) iterator.next();
 
  508             if (task.getIngestJob().getId() == jobId) {
 
  523     synchronized IngestJobTasksSnapshot getTasksSnapshotForJob(
long jobId) {
 
  524         return new IngestJobTasksSnapshot(jobId);
 
  534         public int compare(FileIngestTask q1, FileIngestTask q2) {
 
  538                 return (
int) (q2.getFile().getId() - q1.getFile().getId());
 
  540                 return p2.ordinal() - p1.ordinal();
 
  548                 LAST, LOW, MEDIUM, HIGH
 
  551             static final List<Pattern> LAST_PRI_PATHS = 
new ArrayList<>();
 
  553             static final List<Pattern> LOW_PRI_PATHS = 
new ArrayList<>();
 
  555             static final List<Pattern> MEDIUM_PRI_PATHS = 
new ArrayList<>();
 
  557             static final List<Pattern> HIGH_PRI_PATHS = 
new ArrayList<>();
 
  573                 LAST_PRI_PATHS.add(Pattern.compile(
"^pagefile", Pattern.CASE_INSENSITIVE));
 
  574                 LAST_PRI_PATHS.add(Pattern.compile(
"^hiberfil", Pattern.CASE_INSENSITIVE));
 
  577                 LOW_PRI_PATHS.add(Pattern.compile(
"^\\$OrphanFiles", Pattern.CASE_INSENSITIVE));
 
  578                 LOW_PRI_PATHS.add(Pattern.compile(
"^Windows", Pattern.CASE_INSENSITIVE));
 
  580                 MEDIUM_PRI_PATHS.add(Pattern.compile(
"^Program Files", Pattern.CASE_INSENSITIVE));
 
  582                 HIGH_PRI_PATHS.add(Pattern.compile(
"^Users", Pattern.CASE_INSENSITIVE));
 
  583                 HIGH_PRI_PATHS.add(Pattern.compile(
"^Documents and Settings", Pattern.CASE_INSENSITIVE));
 
  584                 HIGH_PRI_PATHS.add(Pattern.compile(
"^home", Pattern.CASE_INSENSITIVE));
 
  585                 HIGH_PRI_PATHS.add(Pattern.compile(
"^ProgramData", Pattern.CASE_INSENSITIVE));
 
  596                 if (!abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.FS)) {
 
  602                 final String path = abstractFile.getName();
 
  606                 for (Pattern p : HIGH_PRI_PATHS) {
 
  607                     Matcher m = p.matcher(path);
 
  612                 for (Pattern p : MEDIUM_PRI_PATHS) {
 
  613                     Matcher m = p.matcher(path);
 
  618                 for (Pattern p : LOW_PRI_PATHS) {
 
  619                     Matcher m = p.matcher(path);
 
  624                 for (Pattern p : LAST_PRI_PATHS) {
 
  625                     Matcher m = p.matcher(path);
 
  644             return IngestTasksScheduler.this.pendingDataSourceTasks.take();
 
  656             FileIngestTask task = IngestTasksScheduler.this.pendingFileTasks.takeFirst();
 
  657             shuffleFileTaskQueues();
 
  666     class IngestJobTasksSnapshot {
 
  668         private final long jobId;
 
  669         private final long rootQueueSize;
 
  670         private final long dirQueueSize;
 
  671         private final long fileQueueSize;
 
  672         private final long dsQueueSize;
 
  673         private final long runningListSize;
 
  680         IngestJobTasksSnapshot(
long jobId) {
 
  682             this.rootQueueSize = countTasksForJob(IngestTasksScheduler.this.rootDirectoryTasks, jobId);
 
  683             this.dirQueueSize = countTasksForJob(IngestTasksScheduler.this.directoryTasks, jobId);
 
  684             this.fileQueueSize = countTasksForJob(IngestTasksScheduler.this.pendingFileTasks, jobId);
 
  685             this.dsQueueSize = countTasksForJob(IngestTasksScheduler.this.pendingDataSourceTasks, jobId);
 
  686             this.runningListSize = countTasksForJob(IngestTasksScheduler.this.tasksInProgress, jobId);
 
  705         long getRootQueueSize() {
 
  706             return rootQueueSize;
 
  715         long getDirectoryTasksQueueSize() {
 
  719         long getFileQueueSize() {
 
  720             return fileQueueSize;
 
  723         long getDsQueueSize() {
 
  727         long getRunningListSize() {
 
  728             return runningListSize;
 
int compare(FileIngestTask q1, FileIngestTask q2)