Autopsy  4.4.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
IngestTasksScheduler.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2012-2017 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.ingest;
20 
21 import java.util.ArrayList;
22 import java.util.Collection;
23 import java.util.Comparator;
24 import java.util.HashSet;
25 import java.util.Iterator;
26 import java.util.List;
27 import java.util.Set;
28 import java.util.TreeSet;
29 import java.util.concurrent.BlockingDeque;
30 import java.util.concurrent.LinkedBlockingDeque;
31 import java.util.concurrent.LinkedBlockingQueue;
32 import java.util.logging.Level;
33 import java.util.regex.Matcher;
34 import java.util.regex.Pattern;
36 import org.sleuthkit.datamodel.AbstractFile;
37 import org.sleuthkit.datamodel.Content;
38 import org.sleuthkit.datamodel.FileSystem;
39 import org.sleuthkit.datamodel.TskCoreException;
40 import org.sleuthkit.datamodel.TskData;
41 
46 final class IngestTasksScheduler {
47 
48  private static final Logger logger = Logger.getLogger(IngestTasksScheduler.class.getName());
49  private static final int FAT_NTFS_FLAGS = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT12.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT16.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT32.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_NTFS.getValue();
50  private static IngestTasksScheduler instance;
51 
58  private final LinkedBlockingQueue<DataSourceIngestTask> pendingDataSourceTasks;
59  private final DataSourceIngestTaskQueue dataSourceTasksDispenser;
60 
84  private final TreeSet<FileIngestTask> rootDirectoryTasks;
85  private final List<FileIngestTask> directoryTasks;
86  private final BlockingDeque<FileIngestTask> pendingFileTasks;
87  private final FileIngestTaskQueue fileTasksDispenser;
88 
100  private final Set<IngestTask> tasksInProgress;
101 
105  synchronized static IngestTasksScheduler getInstance() {
106  if (IngestTasksScheduler.instance == null) {
107  IngestTasksScheduler.instance = new IngestTasksScheduler();
108  }
109  return IngestTasksScheduler.instance;
110  }
111 
115  private IngestTasksScheduler() {
116  this.pendingDataSourceTasks = new LinkedBlockingQueue<>();
117  this.dataSourceTasksDispenser = new DataSourceIngestTaskQueue();
118  this.rootDirectoryTasks = new TreeSet<>(new RootDirectoryTaskComparator());
119  this.directoryTasks = new ArrayList<>();
120  this.pendingFileTasks = new LinkedBlockingDeque<>();
121  this.fileTasksDispenser = new FileIngestTaskQueue();
122  this.tasksInProgress = new HashSet<>();
123  }
124 
131  IngestTaskQueue getDataSourceIngestTaskQueue() {
132  return this.dataSourceTasksDispenser;
133  }
134 
141  IngestTaskQueue getFileIngestTaskQueue() {
142  return this.fileTasksDispenser;
143  }
144 
154  synchronized void scheduleIngestTasks(DataSourceIngestJob job) {
155  if (!job.isCancelled()) {
156  // Scheduling of both a data source ingest task and file ingest tasks
157  // for a job must be an atomic operation. Otherwise, the data source
158  // task might be completed before the file tasks are scheduled,
159  // resulting in a potential false positive when another thread checks
160  // whether or not all the tasks for the job are completed.
161  this.scheduleDataSourceIngestTask(job);
162  this.scheduleFileIngestTasks(job);
163  }
164  }
165 
171  synchronized void scheduleDataSourceIngestTask(DataSourceIngestJob job) {
172  if (!job.isCancelled()) {
173  DataSourceIngestTask task = new DataSourceIngestTask(job);
174  this.tasksInProgress.add(task);
175  try {
176  this.pendingDataSourceTasks.put(task);
177  } catch (InterruptedException ex) {
182  this.tasksInProgress.remove(task);
183  Thread.currentThread().interrupt();
184  }
185  }
186  }
187 
193  synchronized void scheduleFileIngestTasks(DataSourceIngestJob job) {
194  if (!job.isCancelled()) {
195  // Get the top level files for the data source associated with this job
196  // and add them to the root directories priority queue.
197  List<AbstractFile> topLevelFiles = getTopLevelFiles(job.getDataSource());
198  for (AbstractFile firstLevelFile : topLevelFiles) {
199  FileIngestTask task = new FileIngestTask(job, firstLevelFile);
200  if (IngestTasksScheduler.shouldEnqueueFileTask(task)) {
201  this.tasksInProgress.add(task);
202  this.rootDirectoryTasks.add(task);
203  }
204  }
205  shuffleFileTaskQueues();
206  }
207  }
208 
215  synchronized void scheduleFileIngestTask(DataSourceIngestJob job, AbstractFile file) {
216  if (!job.isCancelled()) {
217  FileIngestTask task = new FileIngestTask(job, file);
218  if (IngestTasksScheduler.shouldEnqueueFileTask(task)) {
219  this.tasksInProgress.add(task);
220  addToPendingFileTasksQueue(task);
221  }
222  }
223  }
224 
231  synchronized void notifyTaskCompleted(IngestTask task) {
232  tasksInProgress.remove(task);
233  }
234 
243  synchronized boolean tasksForJobAreCompleted(DataSourceIngestJob job) {
244  for (IngestTask task : tasksInProgress) {
245  if (task.getIngestJob().getId() == job.getId()) {
246  return false;
247  }
248  }
249  return true;
250  }
251 
262  synchronized void cancelPendingTasksForIngestJob(DataSourceIngestJob job) {
271  long jobId = job.getId();
272  this.removeTasksForJob(this.rootDirectoryTasks, jobId);
273  this.removeTasksForJob(this.directoryTasks, jobId);
274  this.shuffleFileTaskQueues();
275  }
276 
286  private static List<AbstractFile> getTopLevelFiles(Content dataSource) {
287  List<AbstractFile> topLevelFiles = new ArrayList<>();
288  Collection<AbstractFile> rootObjects = dataSource.accept(new GetRootDirectoryVisitor());
289  if (rootObjects.isEmpty() && dataSource instanceof AbstractFile) {
290  // The data source is itself a file to be processed.
291  topLevelFiles.add((AbstractFile) dataSource);
292  } else {
293  for (AbstractFile root : rootObjects) {
294  List<Content> children;
295  try {
296  children = root.getChildren();
297  if (children.isEmpty()) {
298  // Add the root object itself, it could be an unallocated
299  // space file, or a child of a volume or an image.
300  topLevelFiles.add(root);
301  } else {
302  // The root object is a file system root directory, get
303  // the files within it.
304  for (Content child : children) {
305  if (child instanceof AbstractFile) {
306  topLevelFiles.add((AbstractFile) child);
307  }
308  }
309  }
310  } catch (TskCoreException ex) {
311  logger.log(Level.WARNING, "Could not get children of root to enqueue: " + root.getId() + ": " + root.getName(), ex); //NON-NLS
312  }
313  }
314  }
315  return topLevelFiles;
316  }
317 
323  synchronized private void shuffleFileTaskQueues() {
324  // This is synchronized because it is called both by synchronized
325  // methods of this ingest scheduler and an unsynchronized method of its
326  // file tasks "dispenser".
327  while (true) {
328  // Loop until either the pending file tasks queue is NOT empty
329  // or the upstream queues that feed into it ARE empty.
330  if (!this.pendingFileTasks.isEmpty()) {
331  // There are file tasks ready to be consumed, exit.
332  return;
333  }
334  if (this.directoryTasks.isEmpty()) {
335  if (this.rootDirectoryTasks.isEmpty()) {
336  // There are no root directory tasks to move into the
337  // directory queue, exit.
338  return;
339  } else {
340  // Move the next root directory task into the
341  // directories queue. Note that the task was already
342  // added to the tasks in progress list when the task was
343  // created in scheduleFileIngestTasks().
344  this.directoryTasks.add(this.rootDirectoryTasks.pollFirst());
345  }
346  }
347 
348  // Try to add the most recently added directory from the
349  // directory tasks queue to the pending file tasks queue.
350  FileIngestTask directoryTask = this.directoryTasks.remove(this.directoryTasks.size() - 1);
351  if (shouldEnqueueFileTask(directoryTask)) {
352  addToPendingFileTasksQueue(directoryTask);
353  } else {
354  this.tasksInProgress.remove(directoryTask);
355  }
356 
357  // If the directory contains subdirectories or files, try to
358  // enqueue tasks for them as well.
359  final AbstractFile directory = directoryTask.getFile();
360  try {
361  for (Content child : directory.getChildren()) {
362  if (child instanceof AbstractFile) {
363  AbstractFile file = (AbstractFile) child;
364  FileIngestTask childTask = new FileIngestTask(directoryTask.getIngestJob(), file);
365  if (file.hasChildren()) {
366  // Found a subdirectory, put the task in the
367  // pending directory tasks queue. Note the
368  // addition of the task to the tasks in progress
369  // list. This is necessary because this is the
370  // first appearance of this task in the queues.
371  this.tasksInProgress.add(childTask);
372  this.directoryTasks.add(childTask);
373  } else if (shouldEnqueueFileTask(childTask)) {
374  // Found a file, put the task directly into the
375  // pending file tasks queue.
376  this.tasksInProgress.add(childTask);
377  addToPendingFileTasksQueue(childTask);
378  }
379  }
380  }
381  } catch (TskCoreException ex) {
382  String errorMessage = String.format("An error occurred getting the children of %s", directory.getName()); //NON-NLS
383  logger.log(Level.SEVERE, errorMessage, ex);
384  }
385  }
386  }
387 
397  private static boolean shouldEnqueueFileTask(final FileIngestTask task) {
398  final AbstractFile file = task.getFile();
399 
400  // Skip the task if the file is actually the pseudo-file for the parent
401  // or current directory.
402  String fileName = file.getName();
403 
404  if (fileName.equals(".") || fileName.equals("..")) {
405  return false;
406  }
407 
413  if (file.isFile() && task.getIngestJob().getFileIngestFilter().fileIsMemberOf(file) == null) {
414  return false;
415  }
416 
417 // Skip the task if the file is one of a select group of special, large
418 // NTFS or FAT file system files.
419  if (file instanceof org.sleuthkit.datamodel.File) {
420  final org.sleuthkit.datamodel.File f = (org.sleuthkit.datamodel.File) file;
421 
422  // Get the type of the file system, if any, that owns the file.
423  TskData.TSK_FS_TYPE_ENUM fsType = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_UNSUPP;
424  try {
425  FileSystem fs = f.getFileSystem();
426  if (fs != null) {
427  fsType = fs.getFsType();
428  }
429  } catch (TskCoreException ex) {
430  logger.log(Level.SEVERE, "Error querying file system for " + f, ex); //NON-NLS
431  }
432 
433  // If the file system is not NTFS or FAT, don't skip the file.
434  if ((fsType.getValue() & FAT_NTFS_FLAGS) == 0) {
435  return true;
436  }
437 
438  // Find out whether the file is in a root directory.
439  boolean isInRootDir = false;
440  try {
441  AbstractFile parent = f.getParentDirectory();
442  isInRootDir = parent.isRoot();
443  } catch (TskCoreException ex) {
444  logger.log(Level.WARNING, "Error querying parent directory for" + f.getName(), ex); //NON-NLS
445  }
446 
447  // If the file is in the root directory of an NTFS or FAT file
448  // system, check its meta-address and check its name for the '$'
449  // character and a ':' character (not a default attribute).
450  if (isInRootDir && f.getMetaAddr() < 32) {
451  String name = f.getName();
452  if (name.length() > 0 && name.charAt(0) == '$' && name.contains(":")) {
453  return false;
454  }
455  }
456  }
457 
458  return true;
459  }
460 
466  synchronized private void addToPendingFileTasksQueue(FileIngestTask task) {
467  try {
468  this.pendingFileTasks.putFirst(task);
469  } catch (InterruptedException ex) {
474  this.tasksInProgress.remove(task);
475  Thread.currentThread().interrupt();
476  }
477  }
478 
487  synchronized private void removeTasksForJob(Collection<? extends IngestTask> taskQueue, long jobId) {
488  Iterator<? extends IngestTask> iterator = taskQueue.iterator();
489  while (iterator.hasNext()) {
490  IngestTask task = iterator.next();
491  if (task.getIngestJob().getId() == jobId) {
492  this.tasksInProgress.remove(task);
493  iterator.remove();
494  }
495  }
496  }
497 
506  private static int countTasksForJob(Collection<? extends IngestTask> queue, long jobId) {
507  Iterator<? extends IngestTask> iterator = queue.iterator();
508  int count = 0;
509  while (iterator.hasNext()) {
510  IngestTask task = (IngestTask) iterator.next();
511  if (task.getIngestJob().getId() == jobId) {
512  count++;
513  }
514  }
515  return count;
516  }
517 
526  synchronized IngestJobTasksSnapshot getTasksSnapshotForJob(long jobId) {
527  return new IngestJobTasksSnapshot(jobId);
528 
529  }
530 
535  private static class RootDirectoryTaskComparator implements Comparator<FileIngestTask> {
536 
537  @Override
538  public int compare(FileIngestTask q1, FileIngestTask q2) {
539  AbstractFilePriority.Priority p1 = AbstractFilePriority.getPriority(q1.getFile());
540  AbstractFilePriority.Priority p2 = AbstractFilePriority.getPriority(q2.getFile());
541  if (p1 == p2) {
542  return (int) (q2.getFile().getId() - q1.getFile().getId());
543  } else {
544  return p2.ordinal() - p1.ordinal();
545  }
546  }
547 
548  private static class AbstractFilePriority {
549 
550  enum Priority {
551 
552  LAST, LOW, MEDIUM, HIGH
553  }
554 
555  static final List<Pattern> LAST_PRI_PATHS = new ArrayList<>();
556 
557  static final List<Pattern> LOW_PRI_PATHS = new ArrayList<>();
558 
559  static final List<Pattern> MEDIUM_PRI_PATHS = new ArrayList<>();
560 
561  static final List<Pattern> HIGH_PRI_PATHS = new ArrayList<>();
562 
563  /*
564  * prioritize root directory folders based on the assumption that we
565  * are looking for user content. Other types of investigations may
566  * want different priorities.
567  */
568  static /*
569  * prioritize root directory folders based on the assumption that we
570  * are looking for user content. Other types of investigations may
571  * want different priorities.
572  */ {
573  // these files have no structure, so they go last
574  //unalloc files are handled as virtual files in getPriority()
575  //LAST_PRI_PATHS.schedule(Pattern.compile("^\\$Unalloc", Pattern.CASE_INSENSITIVE));
576  //LAST_PRI_PATHS.schedule(Pattern.compile("^\\Unalloc", Pattern.CASE_INSENSITIVE));
577  LAST_PRI_PATHS.add(Pattern.compile("^pagefile", Pattern.CASE_INSENSITIVE));
578  LAST_PRI_PATHS.add(Pattern.compile("^hiberfil", Pattern.CASE_INSENSITIVE));
579  // orphan files are often corrupt and windows does not typically have
580  // user content, so put them towards the bottom
581  LOW_PRI_PATHS.add(Pattern.compile("^\\$OrphanFiles", Pattern.CASE_INSENSITIVE));
582  LOW_PRI_PATHS.add(Pattern.compile("^Windows", Pattern.CASE_INSENSITIVE));
583  // all other files go into the medium category too
584  MEDIUM_PRI_PATHS.add(Pattern.compile("^Program Files", Pattern.CASE_INSENSITIVE));
585  // user content is top priority
586  HIGH_PRI_PATHS.add(Pattern.compile("^Users", Pattern.CASE_INSENSITIVE));
587  HIGH_PRI_PATHS.add(Pattern.compile("^Documents and Settings", Pattern.CASE_INSENSITIVE));
588  HIGH_PRI_PATHS.add(Pattern.compile("^home", Pattern.CASE_INSENSITIVE));
589  HIGH_PRI_PATHS.add(Pattern.compile("^ProgramData", Pattern.CASE_INSENSITIVE));
590  }
591 
599  static AbstractFilePriority.Priority getPriority(final AbstractFile abstractFile) {
600  if (!abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.FS)) {
601  //quickly filter out unstructured content
602  //non-fs virtual files and dirs, such as representing unalloc space
603  return AbstractFilePriority.Priority.LAST;
604  }
605  //determine the fs files priority by name
606  final String path = abstractFile.getName();
607  if (path == null) {
608  return AbstractFilePriority.Priority.MEDIUM;
609  }
610  for (Pattern p : HIGH_PRI_PATHS) {
611  Matcher m = p.matcher(path);
612  if (m.find()) {
613  return AbstractFilePriority.Priority.HIGH;
614  }
615  }
616  for (Pattern p : MEDIUM_PRI_PATHS) {
617  Matcher m = p.matcher(path);
618  if (m.find()) {
619  return AbstractFilePriority.Priority.MEDIUM;
620  }
621  }
622  for (Pattern p : LOW_PRI_PATHS) {
623  Matcher m = p.matcher(path);
624  if (m.find()) {
625  return AbstractFilePriority.Priority.LOW;
626  }
627  }
628  for (Pattern p : LAST_PRI_PATHS) {
629  Matcher m = p.matcher(path);
630  if (m.find()) {
631  return AbstractFilePriority.Priority.LAST;
632  }
633  }
634  //default is medium
635  return AbstractFilePriority.Priority.MEDIUM;
636  }
637  }
638  }
639 
644  private final class DataSourceIngestTaskQueue implements IngestTaskQueue {
645 
646  @Override
647  public IngestTask getNextTask() throws InterruptedException {
648  return IngestTasksScheduler.this.pendingDataSourceTasks.take();
649  }
650  }
651 
656  private final class FileIngestTaskQueue implements IngestTaskQueue {
657 
658  @Override
659  public IngestTask getNextTask() throws InterruptedException {
660  FileIngestTask task = IngestTasksScheduler.this.pendingFileTasks.takeFirst();
661  shuffleFileTaskQueues();
662  return task;
663  }
664 
665  }
666 
670  class IngestJobTasksSnapshot {
671 
672  private final long jobId;
673  private final long rootQueueSize;
674  private final long dirQueueSize;
675  private final long fileQueueSize;
676  private final long dsQueueSize;
677  private final long runningListSize;
678 
684  IngestJobTasksSnapshot(long jobId) {
685  this.jobId = jobId;
686  this.rootQueueSize = countTasksForJob(IngestTasksScheduler.this.rootDirectoryTasks, jobId);
687  this.dirQueueSize = countTasksForJob(IngestTasksScheduler.this.directoryTasks, jobId);
688  this.fileQueueSize = countTasksForJob(IngestTasksScheduler.this.pendingFileTasks, jobId);
689  this.dsQueueSize = countTasksForJob(IngestTasksScheduler.this.pendingDataSourceTasks, jobId);
690  this.runningListSize = countTasksForJob(IngestTasksScheduler.this.tasksInProgress, jobId);
691  }
692 
699  long getJobId() {
700  return jobId;
701  }
702 
709  long getRootQueueSize() {
710  return rootQueueSize;
711  }
712 
719  long getDirectoryTasksQueueSize() {
720  return dirQueueSize;
721  }
722 
723  long getFileQueueSize() {
724  return fileQueueSize;
725  }
726 
727  long getDsQueueSize() {
728  return dsQueueSize;
729  }
730 
731  long getRunningListSize() {
732  return runningListSize;
733  }
734 
735  }
736 
737 }

Copyright © 2012-2016 Basis Technology. Generated on: Fri Sep 29 2017
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.