Autopsy  4.19.1
Graphical digital forensics platform for The Sleuth Kit and other tools.
SevenZipExtractor.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2015-2021 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.modules.embeddedfileextractor;
20 
21 import java.io.File;
22 import java.io.FileOutputStream;
23 import java.io.IOException;
24 import java.nio.charset.Charset;
25 import java.nio.file.Path;
26 import java.nio.file.Paths;
27 import java.util.ArrayList;
28 import java.util.Arrays;
29 import java.util.Collection;
30 import java.util.Collections;
31 import java.util.Date;
32 import java.util.HashMap;
33 import java.util.List;
34 import java.util.Map;
35 import java.util.concurrent.ConcurrentHashMap;
36 import java.util.logging.Level;
37 import net.sf.sevenzipjbinding.ArchiveFormat;
38 import static net.sf.sevenzipjbinding.ArchiveFormat.RAR;
39 import net.sf.sevenzipjbinding.ExtractAskMode;
40 import net.sf.sevenzipjbinding.ExtractOperationResult;
41 import net.sf.sevenzipjbinding.IArchiveExtractCallback;
42 import net.sf.sevenzipjbinding.ICryptoGetTextPassword;
43 import net.sf.sevenzipjbinding.IInArchive;
44 import net.sf.sevenzipjbinding.ISequentialOutStream;
45 import net.sf.sevenzipjbinding.PropID;
46 import net.sf.sevenzipjbinding.SevenZip;
47 import net.sf.sevenzipjbinding.SevenZipException;
48 import net.sf.sevenzipjbinding.SevenZipNativeInitializationException;
49 import org.apache.tika.Tika;
50 import org.apache.tika.parser.txt.CharsetDetector;
51 import org.apache.tika.parser.txt.CharsetMatch;
52 import org.netbeans.api.progress.ProgressHandle;
53 import org.openide.util.NbBundle;
54 import org.openide.util.NbBundle.Messages;
69 import org.sleuthkit.datamodel.AbstractFile;
70 import org.sleuthkit.datamodel.Blackboard;
71 import org.sleuthkit.datamodel.BlackboardArtifact;
72 import static org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE.TSK_INTERESTING_FILE_HIT;
73 import org.sleuthkit.datamodel.BlackboardAttribute;
74 import static org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE.TSK_COMMENT;
75 import static org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DESCRIPTION;
76 import static org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME;
77 import org.sleuthkit.datamodel.Content;
78 import org.sleuthkit.datamodel.DerivedFile;
79 import org.sleuthkit.datamodel.EncodedFileOutputStream;
80 import org.sleuthkit.datamodel.ReadContentInputStream;
81 import org.sleuthkit.datamodel.Score;
82 import org.sleuthkit.datamodel.SleuthkitCase.CaseDbTransaction;
83 import org.sleuthkit.datamodel.TskCoreException;
84 import org.sleuthkit.datamodel.TskData;
85 
90 class SevenZipExtractor {
91 
92  private static final Logger logger = Logger.getLogger(SevenZipExtractor.class.getName());
93 
94  private static final String MODULE_NAME = EmbeddedFileExtractorModuleFactory.getModuleName();
95 
96  //encryption type strings
97  private static final String ENCRYPTION_FILE_LEVEL = NbBundle.getMessage(EmbeddedFileExtractorIngestModule.class,
98  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.encryptionFileLevel");
99  private static final String ENCRYPTION_FULL = EncryptionDetectionModuleFactory.PASSWORD_PROTECT_MESSAGE;
100 
101  //zip bomb detection
102  private static final int MAX_DEPTH = 4;
103  private static final int MAX_COMPRESSION_RATIO = 600;
104  private static final long MIN_COMPRESSION_RATIO_SIZE = 500 * 1000000L;
105  private static final long MIN_FREE_DISK_SPACE = 1 * 1000 * 1000000L; //1GB
106 
107  private IngestServices services = IngestServices.getInstance();
108  private final IngestJobContext context;
109  private final FileTypeDetector fileTypeDetector;
110  private final FileTaskExecutor fileTaskExecutor;
111 
112  private String moduleDirRelative;
113  private String moduleDirAbsolute;
114 
115  private Blackboard blackboard;
116 
117  private ProgressHandle progress;
118  private int numItems;
119  private String currentArchiveName;
120 
125 
126  ZIP("application/zip"), //NON-NLS
127  SEVENZ("application/x-7z-compressed"), //NON-NLS
128  GZIP("application/gzip"), //NON-NLS
129  XGZIP("application/x-gzip"), //NON-NLS
130  XBZIP2("application/x-bzip2"), //NON-NLS
131  XTAR("application/x-tar"), //NON-NLS
132  XGTAR("application/x-gtar"),
133  XRAR("application/x-rar-compressed"); //NON-NLS
134 
135  private final String mimeType;
136 
137  SupportedArchiveExtractionFormats(final String mimeType) {
138  this.mimeType = mimeType;
139  }
140 
141  @Override
142  public String toString() {
143  return this.mimeType;
144  }
145  // TODO Expand to support more formats after upgrading Tika
146  }
147 
168  SevenZipExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector, String moduleDirRelative, String moduleDirAbsolute, FileTaskExecutor fileTaskExecutor) throws SevenZipNativeInitializationException {
169  if (!SevenZip.isInitializedSuccessfully()) {
170  throw new SevenZipNativeInitializationException("SevenZip has not been previously initialized.");
171  }
172  this.context = context;
173  this.fileTypeDetector = fileTypeDetector;
174  this.moduleDirRelative = moduleDirRelative;
175  this.moduleDirAbsolute = moduleDirAbsolute;
176  this.fileTaskExecutor = fileTaskExecutor;
177  }
178 
187  boolean isSevenZipExtractionSupported(AbstractFile file) {
188  String fileMimeType = fileTypeDetector.getMIMEType(file);
189  for (SupportedArchiveExtractionFormats mimeType : SupportedArchiveExtractionFormats.values()) {
190  if (checkForIngestCancellation(file)) {
191  break;
192  }
193  if (mimeType.toString().equals(fileMimeType)) {
194  return true;
195  }
196  }
197  return false;
198  }
199 
200  boolean isSevenZipExtractionSupported(String mimeType) {
201  for (SupportedArchiveExtractionFormats supportedMimeType : SupportedArchiveExtractionFormats.values()) {
202  if (mimeType.contains(supportedMimeType.toString())) {
203  return true;
204  }
205  }
206  return false;
207  }
208 
219  private boolean checkForIngestCancellation(AbstractFile file) {
220  if (fileTaskExecutor != null && context != null && context.fileIngestIsCancelled()) {
221  logger.log(Level.INFO, "Ingest was cancelled. Results extracted from the following archive file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()});
222  return true;
223  }
224  return false;
225  }
226 
250  private boolean isZipBombArchiveItemCheck(AbstractFile archiveFile, IInArchive inArchive, int inArchiveItemIndex, ConcurrentHashMap<Long, Archive> depthMap, String escapedFilePath) {
251  //If a file is corrupted as a result of reconstructing it from unallocated space, then
252  //7zip does a poor job estimating the original uncompressed file size.
253  //As a result, many corrupted files have wonky compression ratios and could flood the UI
254  //with false zip bomb notifications. The decision was made to skip compression ratio checks
255  //for unallocated zip files. Instead, we let the depth be an indicator of a zip bomb.
256  //Gzip archives compress a single file. They may have a sparse file,
257  //and that file could be much larger, however it won't be the exponential growth seen with more dangerous zip bombs.
258  //In addition a fair number of browser cache files will be gzip archives,
259  //and their file sizes are frequently retrieved incorrectly so ignoring gzip files is a reasonable decision.
260  if (archiveFile.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.UNALLOC) || archiveFile.getMIMEType().equalsIgnoreCase(SupportedArchiveExtractionFormats.XGZIP.toString())) {
261  return false;
262  }
263 
264  try {
265  final Long archiveItemSize = (Long) inArchive.getProperty(
266  inArchiveItemIndex, PropID.SIZE);
267 
268  //skip the check for small files
269  if (archiveItemSize == null || archiveItemSize < MIN_COMPRESSION_RATIO_SIZE) {
270  return false;
271  }
272 
273  final Long archiveItemPackedSize = (Long) inArchive.getProperty(
274  inArchiveItemIndex, PropID.PACKED_SIZE);
275 
276  if (archiveItemPackedSize == null || archiveItemPackedSize <= 0) {
277  logger.log(Level.WARNING, "Cannot getting compression ratio, cannot detect if zipbomb: {0}, item: {1}", //NON-NLS
278  new Object[]{archiveFile.getName(), (String) inArchive.getProperty(inArchiveItemIndex, PropID.PATH)}); //NON-NLS
279  return false;
280  }
281 
282  int cRatio = (int) (archiveItemSize / archiveItemPackedSize);
283 
284  if (cRatio >= MAX_COMPRESSION_RATIO) {
285  Archive rootArchive = depthMap.get(depthMap.get(archiveFile.getId()).getRootArchiveId());
286  String details = NbBundle.getMessage(SevenZipExtractor.class,
287  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.isZipBombCheck.warnDetails",
288  cRatio, FileUtil.escapeFileName(getArchiveFilePath(rootArchive.getArchiveFile())));
289 
290  flagRootArchiveAsZipBomb(rootArchive, archiveFile, details, escapedFilePath);
291  return true;
292  } else {
293  return false;
294  }
295 
296  } catch (SevenZipException ex) {
297  logger.log(Level.WARNING, "Error getting archive item size and cannot detect if zipbomb. ", ex); //NON-NLS
298  return false;
299  }
300  }
301 
313  private void flagRootArchiveAsZipBomb(Archive rootArchive, AbstractFile archiveFile, String details, String escapedFilePath) {
314  rootArchive.flagAsZipBomb();
315  logger.log(Level.INFO, details);
316 
317  String setName = "Possible Zip Bomb";
318  try {
319  Collection<BlackboardAttribute> attributes = Arrays.asList(
320  new BlackboardAttribute(
321  TSK_SET_NAME, MODULE_NAME,
322  setName),
323  new BlackboardAttribute(
324  TSK_DESCRIPTION, MODULE_NAME,
325  Bundle.SevenZipExtractor_zipBombArtifactCreation_text(archiveFile.getName())),
326  new BlackboardAttribute(
327  TSK_COMMENT, MODULE_NAME,
328  details));
329 
330  if (!blackboard.artifactExists(archiveFile, TSK_INTERESTING_FILE_HIT, attributes)) {
331 
332  BlackboardArtifact artifact = rootArchive.getArchiveFile().newAnalysisResult(
333  BlackboardArtifact.Type.TSK_INTERESTING_FILE_HIT, Score.SCORE_LIKELY_NOTABLE,
334  null, setName, null,
335  attributes)
336  .getAnalysisResult();
337 
338  try {
339  /*
340  * post the artifact which will index the artifact for
341  * keyword search, and fire an event to notify UI of this
342  * new artifact
343  */
344  blackboard.postArtifact(artifact, MODULE_NAME);
345 
346  String msg = NbBundle.getMessage(SevenZipExtractor.class,
347  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.isZipBombCheck.warnMsg", archiveFile.getName(), escapedFilePath);//NON-NLS
348 
349  services.postMessage(IngestMessage.createWarningMessage(MODULE_NAME, msg, details));
350 
351  } catch (Blackboard.BlackboardException ex) {
352  logger.log(Level.SEVERE, "Unable to index blackboard artifact " + artifact.getArtifactID(), ex); //NON-NLS
353  MessageNotifyUtil.Notify.error(
354  Bundle.SevenZipExtractor_indexError_message(), artifact.getDisplayName());
355  }
356  }
357  } catch (TskCoreException ex) {
358  logger.log(Level.SEVERE, "Error creating blackboard artifact for Zip Bomb Detection for file: " + escapedFilePath, ex); //NON-NLS
359  }
360  }
361 
370  private ArchiveFormat get7ZipOptions(AbstractFile archiveFile) {
371  // try to get the file type from the BB
372  String detectedFormat;
373  detectedFormat = archiveFile.getMIMEType();
374 
375  if (detectedFormat == null) {
376  logger.log(Level.WARNING, "Could not detect format for file: {0}", archiveFile); //NON-NLS
377 
378  // if we don't have attribute info then use file extension
379  String extension = archiveFile.getNameExtension();
380  if ("rar".equals(extension)) //NON-NLS
381  {
382  // for RAR files we need to open them explicitly as RAR. Otherwise, if there is a ZIP archive inside RAR archive
383  // it will be opened incorrectly when using 7zip's built-in auto-detect functionality
384  return RAR;
385  }
386 
387  // Otherwise open the archive using 7zip's built-in auto-detect functionality
388  return null;
389  } else if (detectedFormat.contains("application/x-rar-compressed")) //NON-NLS
390  {
391  // for RAR files we need to open them explicitly as RAR. Otherwise, if there is a ZIP archive inside RAR archive
392  // it will be opened incorrectly when using 7zip's built-in auto-detect functionality
393  return RAR;
394  }
395 
396  // Otherwise open the archive using 7zip's built-in auto-detect functionality
397  return null;
398  }
399 
410  private long getRootArchiveId(AbstractFile file) throws TskCoreException {
411  long id = file.getId();
412  Content parentContent = file.getParent();
413  while (parentContent != null) {
414  id = parentContent.getId();
415  parentContent = parentContent.getParent();
416  }
417  return id;
418  }
419 
439  private List<AbstractFile> getAlreadyExtractedFiles(AbstractFile archiveFile, String archiveFilePath) throws TskCoreException, InterruptedException, FileTaskExecutor.FileTaskFailedException {
440  /*
441  * TODO (Jira-7145): Is this logic correct?
442  */
443  List<AbstractFile> extractedFiles = new ArrayList<>();
444  File outputDirectory = new File(moduleDirAbsolute, EmbeddedFileExtractorIngestModule.getUniqueName(archiveFile));
445  if (archiveFile.hasChildren() && fileTaskExecutor.exists(outputDirectory)) {
446  Case currentCase = Case.getCurrentCase();
447  FileManager fileManager = currentCase.getServices().getFileManager();
448  extractedFiles.addAll(fileManager.findFilesByParentPath(getRootArchiveId(archiveFile), archiveFilePath));
449  }
450  return extractedFiles;
451  }
452 
460  private String getArchiveFilePath(AbstractFile archiveFile) {
461  return archiveFile.getParentPath() + archiveFile.getName();
462  }
463 
473  private boolean makeExtractedFilesDirectory(String uniqueArchiveFileName) {
474  boolean success = true;
475  Path rootDirectoryPath = Paths.get(moduleDirAbsolute, uniqueArchiveFileName);
476  File rootDirectory = rootDirectoryPath.toFile();
477  try {
478  if (!fileTaskExecutor.exists(rootDirectory)) {
479  success = fileTaskExecutor.mkdirs(rootDirectory);
480  }
481  } catch (SecurityException | FileTaskFailedException | InterruptedException ex) {
482  logger.log(Level.SEVERE, String.format("Error creating root extracted files directory %s", rootDirectory), ex); //NON-NLS
483  success = false;
484  }
485  return success;
486  }
487 
500  private String getPathInArchive(IInArchive archive, int inArchiveItemIndex, AbstractFile archiveFile) throws SevenZipException {
501  String pathInArchive = (String) archive.getProperty(inArchiveItemIndex, PropID.PATH);
502 
503  if (pathInArchive == null || pathInArchive.isEmpty()) {
504  //some formats (.tar.gz) may not be handled correctly -- file in archive has no name/path
505  //handle this for .tar.gz and tgz but assuming the child is tar,
506  //otherwise, unpack using itemNumber as name
507 
508  //TODO this should really be signature based, not extension based
509  String archName = archiveFile.getName();
510  int dotI = archName.lastIndexOf(".");
511  String useName = null;
512  if (dotI != -1) {
513  String base = archName.substring(0, dotI);
514  String ext = archName.substring(dotI);
515  int colonIndex = ext.lastIndexOf(":");
516  if (colonIndex != -1) {
517  // If alternate data stream is found, fix the name
518  // so Windows doesn't choke on the colon character.
519  ext = ext.substring(0, colonIndex);
520  }
521  switch (ext) {
522  case ".gz": //NON-NLS
523  useName = base;
524  break;
525  case ".tgz": //NON-NLS
526  useName = base + ".tar"; //NON-NLS
527  break;
528  case ".bz2": //NON-NLS
529  useName = base;
530  break;
531  }
532  }
533  if (useName == null) {
534  pathInArchive = "/" + archName + "/" + Integer.toString(inArchiveItemIndex);
535  } else {
536  pathInArchive = "/" + useName;
537  }
538  }
539  return pathInArchive;
540  }
541 
542  private byte[] getPathBytesInArchive(IInArchive archive, int inArchiveItemIndex, AbstractFile archiveFile) throws SevenZipException {
543  return (byte[]) archive.getProperty(inArchiveItemIndex, PropID.PATH_BYTES);
544  }
545 
546  /*
547  * Get the String that will represent the key for the hashmap which keeps
548  * track of existing files from an AbstractFile
549  */
550  private String getKeyAbstractFile(AbstractFile fileInDatabase) {
551  return fileInDatabase == null ? null : fileInDatabase.getParentPath() + fileInDatabase.getName();
552  }
553 
554  /*
555  * Get the String that will represent the key for the hashmap which keeps
556  * track of existing files from an unpacked node and the archiveFilePath
557  */
558  private String getKeyFromUnpackedNode(UnpackedTree.UnpackedNode node, String archiveFilePath) {
559  return node == null ? null : archiveFilePath + "/" + node.getFileName();
560  }
561 
569  void unpack(AbstractFile archiveFile, ConcurrentHashMap<Long, Archive> depthMap) {
570  unpack(archiveFile, depthMap, null);
571  }
572 
584  @Messages({"SevenZipExtractor.indexError.message=Failed to index encryption detected artifact for keyword search.",
585  "# {0} - rootArchive",
586  "SevenZipExtractor.zipBombArtifactCreation.text=Zip Bomb Detected {0}"})
587  boolean unpack(AbstractFile archiveFile, ConcurrentHashMap<Long, Archive> depthMap, String password) {
588  boolean unpackSuccessful = true; //initialized to true change to false if any files fail to extract and
589  boolean hasEncrypted = false;
590  boolean fullEncryption = true;
591  boolean progressStarted = false;
592  final String archiveFilePath = getArchiveFilePath(archiveFile);
593  final String escapedArchiveFilePath = FileUtil.escapeFileName(archiveFilePath);
594  HashMap<String, ZipFileStatusWrapper> statusMap = new HashMap<>();
595  List<AbstractFile> unpackedFiles = Collections.<AbstractFile>emptyList();
596 
597  currentArchiveName = archiveFile.getName();
598 
599  SevenZipContentReadStream stream = null;
600  progress = ProgressHandle.createHandle(Bundle.EmbeddedFileExtractorIngestModule_ArchiveExtractor_moduleName());
601  //recursion depth check for zip bomb
602  Archive parentAr;
603  try {
604  blackboard = Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboard();
605  } catch (NoCurrentCaseException ex) {
606  logger.log(Level.INFO, "Exception while getting open case.", ex); //NON-NLS
607  unpackSuccessful = false;
608  return unpackSuccessful;
609  }
610  if (checkForIngestCancellation(archiveFile)) {
611  return false;
612  }
613  try {
614  List<AbstractFile> existingFiles = getAlreadyExtractedFiles(archiveFile, archiveFilePath);
615  for (AbstractFile file : existingFiles) {
616  statusMap.put(getKeyAbstractFile(file), new ZipFileStatusWrapper(file, ZipFileStatus.EXISTS));
617  }
618  } catch (TskCoreException | FileTaskFailedException | InterruptedException ex) {
619  logger.log(Level.SEVERE, String.format("Error checking if %s has already been processed, skipping", escapedArchiveFilePath), ex); //NON-NLS
620  unpackSuccessful = false;
621  return unpackSuccessful;
622  }
623  if (checkForIngestCancellation(archiveFile)) {
624  return false;
625  }
626  parentAr = depthMap.get(archiveFile.getId());
627  if (parentAr == null) {
628  parentAr = new Archive(0, archiveFile.getId(), archiveFile);
629  depthMap.put(archiveFile.getId(), parentAr);
630  } else {
631  Archive rootArchive = depthMap.get(parentAr.getRootArchiveId());
632  if (rootArchive.isFlaggedAsZipBomb()) {
633  //skip this archive as the root archive has already been determined to contain a zip bomb
634  unpackSuccessful = false;
635  return unpackSuccessful;
636  } else if (parentAr.getDepth() == MAX_DEPTH) {
637  String details = NbBundle.getMessage(SevenZipExtractor.class,
638  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.warnDetails.zipBomb",
639  parentAr.getDepth(), FileUtil.escapeFileName(getArchiveFilePath(rootArchive.getArchiveFile())));
640  flagRootArchiveAsZipBomb(rootArchive, archiveFile, details, escapedArchiveFilePath);
641  unpackSuccessful = false;
642  return unpackSuccessful;
643  }
644  }
645  if (checkForIngestCancellation(archiveFile)) {
646  return false;
647  }
648  IInArchive inArchive = null;
649  try {
650  stream = new SevenZipContentReadStream(new ReadContentInputStream(archiveFile));
651  // for RAR files we need to open them explicitly as RAR. Otherwise, if there is a ZIP archive inside RAR archive
652  // it will be opened incorrectly when using 7zip's built-in auto-detect functionality.
653  // All other archive formats are still opened using 7zip built-in auto-detect functionality.
654  ArchiveFormat options = get7ZipOptions(archiveFile);
655  if (checkForIngestCancellation(archiveFile)) {
656  return false;
657  }
658  if (password == null) {
659  inArchive = SevenZip.openInArchive(options, stream);
660  } else {
661  inArchive = SevenZip.openInArchive(options, stream, password);
662  }
663  numItems = inArchive.getNumberOfItems();
664  progress.start(numItems);
665  progressStarted = true;
666  if (checkForIngestCancellation(archiveFile)) {
667  return false;
668  }
669  //setup the archive local root folder
670  final String uniqueArchiveFileName = FileUtil.escapeFileName(EmbeddedFileExtractorIngestModule.getUniqueName(archiveFile));
671  if (!makeExtractedFilesDirectory(uniqueArchiveFileName)) {
672  return false;
673  }
674 
675  //initialize tree hierarchy to keep track of unpacked file structure
676  SevenZipExtractor.UnpackedTree unpackedTree = new SevenZipExtractor.UnpackedTree(moduleDirRelative + "/" + uniqueArchiveFileName, archiveFile);
677 
678  long freeDiskSpace;
679  try {
680  freeDiskSpace = services.getFreeDiskSpace();
681  } catch (NullPointerException ex) {
682  //If ingest has not been run at least once getFreeDiskSpace() will throw a null pointer exception
683  //currently getFreeDiskSpace always returns DISK_FREE_SPACE_UNKNOWN
684  freeDiskSpace = IngestMonitor.DISK_FREE_SPACE_UNKNOWN;
685  }
686 
687  Map<Integer, InArchiveItemDetails> archiveDetailsMap = new HashMap<>();
688  for (int inArchiveItemIndex = 0; inArchiveItemIndex < numItems; inArchiveItemIndex++) {
689  if (checkForIngestCancellation(archiveFile)) {
690  return false;
691  }
692  progress.progress(String.format("%s: Analyzing archive metadata and creating local files (%d of %d)", currentArchiveName, inArchiveItemIndex + 1, numItems), 0);
693  if (isZipBombArchiveItemCheck(archiveFile, inArchive, inArchiveItemIndex, depthMap, escapedArchiveFilePath)) {
694  unpackSuccessful = false;
695  return unpackSuccessful;
696  }
697 
698  String pathInArchive = getPathInArchive(inArchive, inArchiveItemIndex, archiveFile);
699  byte[] pathBytesInArchive = getPathBytesInArchive(inArchive, inArchiveItemIndex, archiveFile);
700  UnpackedTree.UnpackedNode unpackedNode = unpackedTree.addNode(pathInArchive, pathBytesInArchive);
701  if (checkForIngestCancellation(archiveFile)) {
702  return false;
703  }
704  final boolean isEncrypted = (Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.ENCRYPTED);
705 
706  if (isEncrypted && password == null) {
707  logger.log(Level.WARNING, "Skipping encrypted file in archive: {0}", pathInArchive); //NON-NLS
708  hasEncrypted = true;
709  unpackSuccessful = false;
710  continue;
711  } else {
712  fullEncryption = false;
713  }
714 
715  // NOTE: item size may return null in case of certain
716  // archiving formats. Eg: BZ2
717  //check if unpacking this file will result in out of disk space
718  //this is additional to zip bomb prevention mechanism
719  Long archiveItemSize = (Long) inArchive.getProperty(
720  inArchiveItemIndex, PropID.SIZE);
721  if (freeDiskSpace != IngestMonitor.DISK_FREE_SPACE_UNKNOWN && archiveItemSize != null && archiveItemSize > 0) { //if free space is known and file is not empty.
722  String archiveItemPath = (String) inArchive.getProperty(
723  inArchiveItemIndex, PropID.PATH);
724  long newDiskSpace = freeDiskSpace - archiveItemSize;
725  if (newDiskSpace < MIN_FREE_DISK_SPACE) {
726  String msg = NbBundle.getMessage(SevenZipExtractor.class,
727  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.notEnoughDiskSpace.msg",
728  escapedArchiveFilePath, archiveItemPath);
729  String details = NbBundle.getMessage(SevenZipExtractor.class,
730  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.notEnoughDiskSpace.details");
731  services.postMessage(IngestMessage.createErrorMessage(MODULE_NAME, msg, details));
732  logger.log(Level.INFO, "Skipping archive item due to insufficient disk space: {0}, {1}", new String[]{escapedArchiveFilePath, archiveItemPath}); //NON-NLS
733  logger.log(Level.INFO, "Available disk space: {0}", new Object[]{freeDiskSpace}); //NON-NLS
734  unpackSuccessful = false;
735  continue; //skip this file
736  } else {
737  //update est. disk space during this archive, so we don't need to poll for every file extracted
738  freeDiskSpace = newDiskSpace;
739  }
740  }
741  if (checkForIngestCancellation(archiveFile)) {
742  return false;
743  }
744  final String uniqueExtractedName = FileUtil.escapeFileName(uniqueArchiveFileName + File.separator + (inArchiveItemIndex / 1000) + File.separator + inArchiveItemIndex);
745  final String localAbsPath = moduleDirAbsolute + File.separator + uniqueExtractedName;
746  final String localRelPath = moduleDirRelative + File.separator + uniqueExtractedName;
747 
748  //create local dirs and empty files before extracted
749  //cannot rely on files in top-bottom order
750  File localFile = new File(localAbsPath);
751  boolean localFileExists;
752  try {
753  if ((Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.IS_FOLDER)) {
754  localFileExists = findOrCreateDirectory(localFile);
755  } else {
756  localFileExists = findOrCreateEmptyFile(localFile);
757  }
758  } catch (FileTaskFailedException | InterruptedException ex) {
759  localFileExists = false;
760  logger.log(Level.SEVERE, String.format("Error fiding or creating %s", localFile.getAbsolutePath()), ex); //NON-NLS
761  }
762  if (checkForIngestCancellation(archiveFile)) {
763  return false;
764  }
765  // skip the rest of this loop if we couldn't create the file
766  //continue will skip details from being added to the map
767  if (!localFileExists) {
768  logger.log(Level.SEVERE, String.format("Skipping %s because it could not be created", localFile.getAbsolutePath())); //NON-NLS
769  continue;
770  }
771 
772  //Store archiveItemIndex with local paths and unpackedNode reference.
773  //Necessary for the extract call back to write the current archive
774  //file to the correct disk location and to correctly update it's
775  //corresponding unpackedNode
776  archiveDetailsMap.put(inArchiveItemIndex, new InArchiveItemDetails(
777  unpackedNode, localAbsPath, localRelPath));
778  }
779 
780  int[] extractionIndices = getExtractableFilesFromDetailsMap(archiveDetailsMap);
781  if (checkForIngestCancellation(archiveFile)) {
782  return false;
783  }
784  StandardIArchiveExtractCallback archiveCallBack
785  = new StandardIArchiveExtractCallback(
786  inArchive, archiveFile, progress,
787  archiveDetailsMap, password, freeDiskSpace);
788 
789  //According to the documentation, indices in sorted order are optimal
790  //for efficiency. Hence, the HashMap and linear processing of
791  //inArchiveItemIndex. False indicates non-test mode
792  inArchive.extract(extractionIndices, false, archiveCallBack);
793  if (checkForIngestCancellation(archiveFile)) {
794  return false;
795  }
796  unpackSuccessful &= archiveCallBack.wasSuccessful();
797 
798  archiveDetailsMap = null;
799 
800  // add them to the DB. We wait until the end so that we have the metadata on all of the
801  // intermediate nodes since the order is not guaranteed
802  try {
803  unpackedTree.updateOrAddFileToCaseRec(statusMap, archiveFilePath, parentAr, archiveFile, depthMap);
804  unpackedTree.commitCurrentTransaction();
805  } catch (TskCoreException | NoCurrentCaseException ex) {
806  logger.log(Level.SEVERE, "Error populating complete derived file hierarchy from the unpacked dir structure", ex); //NON-NLS
807  //TODO decide if anything to cleanup, for now bailing
808  unpackedTree.rollbackCurrentTransaction();
809  }
810 
811  if (checkForIngestCancellation(archiveFile)) {
812  return false;
813  }
814 
815  // Get the new files to be added to the case.
816  unpackedFiles = unpackedTree.getAllFileObjects();
817  } catch (SevenZipException | IllegalArgumentException ex) {
818  logger.log(Level.WARNING, "Error unpacking file: " + archiveFile, ex); //NON-NLS
819  //inbox message
820 
821  // print a message if the file is allocated
822  if (archiveFile.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.ALLOC)) {
823  String msg = NbBundle.getMessage(SevenZipExtractor.class,
824  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.errUnpacking.msg",
825  currentArchiveName);
826  String details = NbBundle.getMessage(SevenZipExtractor.class,
827  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.errUnpacking.details",
828  escapedArchiveFilePath, ex.getMessage());
829  services.postMessage(IngestMessage.createErrorMessage(MODULE_NAME, msg, details));
830  }
831  } finally {
832  if (inArchive != null) {
833  try {
834  inArchive.close();
835  } catch (SevenZipException e) {
836  logger.log(Level.SEVERE, "Error closing archive: " + archiveFile, e); //NON-NLS
837  }
838  }
839 
840  if (stream != null) {
841  try {
842  stream.close();
843  } catch (IOException ex) {
844  logger.log(Level.SEVERE, "Error closing stream after unpacking archive: " + archiveFile, ex); //NON-NLS
845  }
846  }
847 
848  //close progress bar
849  if (progressStarted) {
850  progress.finish();
851  }
852  }
853  if (checkForIngestCancellation(archiveFile)) {
854  return false;
855  }
856  //create artifact and send user message
857  if (hasEncrypted) {
858  String encryptionType = fullEncryption ? ENCRYPTION_FULL : ENCRYPTION_FILE_LEVEL;
859  try {
860  BlackboardArtifact artifact = archiveFile.newAnalysisResult(
861  new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED),
862  Score.SCORE_NOTABLE,
863  null, null, encryptionType,
864  Arrays.asList(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_COMMENT, MODULE_NAME, encryptionType)))
865  .getAnalysisResult();
866 
867  try {
868  /*
869  * post the artifact which will index the artifact for
870  * keyword search, and fire an event to notify UI of this
871  * new artifact
872  */
873  blackboard.postArtifact(artifact, MODULE_NAME);
874  } catch (Blackboard.BlackboardException ex) {
875  logger.log(Level.SEVERE, "Unable to post blackboard artifact " + artifact.getArtifactID(), ex); //NON-NLS
876  MessageNotifyUtil.Notify.error(
877  Bundle.SevenZipExtractor_indexError_message(), artifact.getDisplayName());
878  }
879 
880  } catch (TskCoreException ex) {
881  logger.log(Level.SEVERE, "Error creating blackboard artifact for encryption detected for file: " + escapedArchiveFilePath, ex); //NON-NLS
882  }
883 
884  String msg = NbBundle.getMessage(SevenZipExtractor.class,
885  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.encrFileDetected.msg");
886  String details = NbBundle.getMessage(SevenZipExtractor.class,
887  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.encrFileDetected.details",
888  currentArchiveName, MODULE_NAME);
889  services.postMessage(IngestMessage.createWarningMessage(MODULE_NAME, msg, details));
890  }
891 
892  // adding unpacked extracted derived files to the job after closing relevant resources.
893  if (!unpackedFiles.isEmpty()) {
894  //currently sending a single event for all new files
895  services.fireModuleContentEvent(new ModuleContentEvent(archiveFile));
896  if (context != null) {
897  context.addFilesToJob(unpackedFiles);
898  }
899  }
900 
901  return unpackSuccessful;
902  }
903 
911  private boolean findOrCreateDirectory(File directory) throws FileTaskFailedException, InterruptedException {
912  if (!fileTaskExecutor.exists(directory)) {
913  return fileTaskExecutor.mkdirs(directory);
914  } else {
915  return true;
916  }
917  }
918 
926  private boolean findOrCreateEmptyFile(File file) throws FileTaskFailedException, InterruptedException {
927  if (!fileTaskExecutor.exists(file)) {
928  fileTaskExecutor.mkdirs(file.getParentFile());
929  return fileTaskExecutor.createNewFile(file);
930  } else {
931  return true;
932  }
933  }
934 
935  private Charset detectFilenamesCharset(List<byte[]> byteDatas) {
936  Charset detectedCharset = null;
937  CharsetDetector charsetDetector = new CharsetDetector();
938  int byteSum = 0;
939  int fileNum = 0;
940  for (byte[] byteData : byteDatas) {
941  fileNum++;
942  byteSum += byteData.length;
943  // Only read ~1000 bytes of filenames in this directory
944  if (byteSum >= 1000) {
945  break;
946  }
947  }
948  byte[] allBytes = new byte[byteSum];
949  int start = 0;
950  for (int i = 0; i < fileNum; i++) {
951  byte[] byteData = byteDatas.get(i);
952  System.arraycopy(byteData, 0, allBytes, start, byteData.length);
953  start += byteData.length;
954  }
955  charsetDetector.setText(allBytes);
956  CharsetMatch cm = charsetDetector.detect();
957  if (cm.getConfidence() >= 90 && Charset.isSupported(cm.getName())) {
958  detectedCharset = Charset.forName(cm.getName());
959  }
960  return detectedCharset;
961  }
962 
967  private int[] getExtractableFilesFromDetailsMap(
968  Map<Integer, InArchiveItemDetails> archiveDetailsMap) {
969 
970  Integer[] wrappedExtractionIndices = archiveDetailsMap.keySet()
971  .toArray(new Integer[archiveDetailsMap.size()]);
972 
973  return Arrays.stream(wrappedExtractionIndices)
974  .mapToInt(Integer::intValue)
975  .toArray();
976 
977  }
978 
986  private final static class UnpackStream implements ISequentialOutStream {
987 
988  private EncodedFileOutputStream output;
989  private String localAbsPath;
990  private int bytesWritten;
991  private static final Tika tika = new Tika();
992  private String mimeType = "";
993 
994  UnpackStream(String localAbsPath) throws IOException {
995  this.output = new EncodedFileOutputStream(new FileOutputStream(localAbsPath), TskData.EncodingType.XOR1);
996  this.localAbsPath = localAbsPath;
997  this.bytesWritten = 0;
998  }
999 
1000  public void setNewOutputStream(String localAbsPath) throws IOException {
1001  this.output.close();
1002  this.output = new EncodedFileOutputStream(new FileOutputStream(localAbsPath), TskData.EncodingType.XOR1);
1003  this.localAbsPath = localAbsPath;
1004  this.bytesWritten = 0;
1005  this.mimeType = "";
1006  }
1007 
1008  public int getSize() {
1009  return bytesWritten;
1010  }
1011 
1012  @Override
1013  public int write(byte[] bytes) throws SevenZipException {
1014  try {
1015  // Detect MIME type now while the file is in memory
1016  if (bytesWritten == 0) {
1017  mimeType = tika.detect(bytes);
1018  }
1019  output.write(bytes);
1020  this.bytesWritten += bytes.length;
1021  } catch (IOException ex) {
1022  throw new SevenZipException(
1023  NbBundle.getMessage(SevenZipExtractor.class,
1024  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.UnpackStream.write.exception.msg",
1025  localAbsPath), ex);
1026  }
1027  return bytes.length;
1028  }
1029 
1030  public String getMIMEType() {
1031  return mimeType;
1032  }
1033 
1034  public void close() throws IOException {
1035  try (EncodedFileOutputStream out = output) {
1036  out.flush();
1037  }
1038  }
1039 
1040  }
1041 
1045  private static class InArchiveItemDetails {
1046 
1047  private final SevenZipExtractor.UnpackedTree.UnpackedNode unpackedNode;
1048  private final String localAbsPath;
1049  private final String localRelPath;
1050 
1052  SevenZipExtractor.UnpackedTree.UnpackedNode unpackedNode,
1053  String localAbsPath, String localRelPath) {
1054  this.unpackedNode = unpackedNode;
1055  this.localAbsPath = localAbsPath;
1056  this.localRelPath = localRelPath;
1057  }
1058 
1059  public SevenZipExtractor.UnpackedTree.UnpackedNode getUnpackedNode() {
1060  return unpackedNode;
1061  }
1062 
1063  public String getLocalAbsPath() {
1064  return localAbsPath;
1065  }
1066 
1067  public String getLocalRelPath() {
1068  return localRelPath;
1069  }
1070  }
1071 
1076  private static class StandardIArchiveExtractCallback
1077  implements IArchiveExtractCallback, ICryptoGetTextPassword {
1078 
1079  private final AbstractFile archiveFile;
1080  private final IInArchive inArchive;
1081  private UnpackStream unpackStream = null;
1082  private final Map<Integer, InArchiveItemDetails> archiveDetailsMap;
1083  private final ProgressHandle progressHandle;
1084 
1085  private int inArchiveItemIndex;
1086 
1087  private long createTimeInSeconds;
1088  private long modTimeInSeconds;
1089  private long accessTimeInSeconds;
1090 
1091  private boolean isFolder;
1092  private final String password;
1093 
1094  private boolean unpackSuccessful = true;
1095 
1096  StandardIArchiveExtractCallback(IInArchive inArchive,
1097  AbstractFile archiveFile, ProgressHandle progressHandle,
1098  Map<Integer, InArchiveItemDetails> archiveDetailsMap,
1099  String password, long freeDiskSpace) {
1100  this.inArchive = inArchive;
1101  this.progressHandle = progressHandle;
1102  this.archiveFile = archiveFile;
1103  this.archiveDetailsMap = archiveDetailsMap;
1104  this.password = password;
1105  }
1106 
1121  @Override
1122  public ISequentialOutStream getStream(int inArchiveItemIndex,
1123  ExtractAskMode mode) throws SevenZipException {
1124 
1125  this.inArchiveItemIndex = inArchiveItemIndex;
1126 
1127  isFolder = (Boolean) inArchive
1128  .getProperty(inArchiveItemIndex, PropID.IS_FOLDER);
1129  if (isFolder || mode != ExtractAskMode.EXTRACT) {
1130  return null;
1131  }
1132 
1133  final String localAbsPath = archiveDetailsMap.get(
1134  inArchiveItemIndex).getLocalAbsPath();
1135 
1136  //If the Unpackstream has been allocated, then set the Outputstream
1137  //to another file rather than creating a new unpack stream. The 7Zip
1138  //binding has a memory leak, so creating new unpack streams will not be
1139  //dereferenced. As a fix, we create one UnpackStream, and mutate its state,
1140  //so that there only exists one 8192 byte buffer in memory per archive.
1141  try {
1142  if (unpackStream != null) {
1143  unpackStream.setNewOutputStream(localAbsPath);
1144  } else {
1145  unpackStream = new UnpackStream(localAbsPath);
1146  }
1147  } catch (IOException ex) {
1148  logger.log(Level.WARNING, String.format("Error opening or setting new stream " //NON-NLS
1149  + "for archive file at %s", localAbsPath), ex.getMessage()); //NON-NLS
1150  return null;
1151  }
1152 
1153  return unpackStream;
1154  }
1155 
1164  @Override
1165  public void prepareOperation(ExtractAskMode mode) throws SevenZipException {
1166  final Date createTime = (Date) inArchive.getProperty(
1167  inArchiveItemIndex, PropID.CREATION_TIME);
1168  final Date accessTime = (Date) inArchive.getProperty(
1169  inArchiveItemIndex, PropID.LAST_ACCESS_TIME);
1170  final Date writeTime = (Date) inArchive.getProperty(
1171  inArchiveItemIndex, PropID.LAST_MODIFICATION_TIME);
1172 
1173  createTimeInSeconds = createTime == null ? 0L
1174  : createTime.getTime() / 1000;
1175  modTimeInSeconds = writeTime == null ? 0L
1176  : writeTime.getTime() / 1000;
1177  accessTimeInSeconds = accessTime == null ? 0L
1178  : accessTime.getTime() / 1000;
1179 
1180  progressHandle.progress(archiveFile.getName() + ": "
1181  + (String) inArchive.getProperty(inArchiveItemIndex, PropID.PATH),
1183 
1184  }
1185 
1194  @Override
1195  public void setOperationResult(ExtractOperationResult result) throws SevenZipException {
1196 
1197  final SevenZipExtractor.UnpackedTree.UnpackedNode unpackedNode
1198  = archiveDetailsMap.get(inArchiveItemIndex).getUnpackedNode();
1199  final String localRelPath = archiveDetailsMap.get(
1200  inArchiveItemIndex).getLocalRelPath();
1201  if (isFolder) {
1202  unpackedNode.addDerivedInfo(0,
1203  !(Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.IS_FOLDER),
1205  localRelPath);
1206  return;
1207  } else {
1208  unpackedNode.setMimeType(unpackStream.getMIMEType());
1209  }
1210 
1211  final String localAbsPath = archiveDetailsMap.get(
1212  inArchiveItemIndex).getLocalAbsPath();
1213  if (result != ExtractOperationResult.OK) {
1214  if (archiveFile.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.UNALLOC)) {
1215  logger.log(Level.WARNING, "Extraction of : {0} encountered error {1} (file is unallocated and may be corrupt)", //NON-NLS
1216  new Object[]{localAbsPath, result});
1217  } else {
1218  logger.log(Level.WARNING, "Extraction of : {0} encountered error {1}", //NON-NLS
1219  new Object[]{localAbsPath, result});
1220  }
1221  unpackSuccessful = false;
1222  }
1223 
1224  //record derived data in unode, to be traversed later after unpacking the archive
1225  unpackedNode.addDerivedInfo(unpackStream.getSize(),
1226  !(Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.IS_FOLDER),
1228 
1229  try {
1230  unpackStream.close();
1231  } catch (IOException e) {
1232  logger.log(Level.WARNING, "Error closing unpack stream for file: {0}", localAbsPath); //NON-NLS
1233  }
1234  }
1235 
1236  @Override
1237  public void setTotal(long value) throws SevenZipException {
1238  //Not necessary for extract, left intenionally blank
1239  }
1240 
1241  @Override
1242  public void setCompleted(long value) throws SevenZipException {
1243  //Not necessary for extract, left intenionally blank
1244  }
1245 
1253  @Override
1254  public String cryptoGetTextPassword() throws SevenZipException {
1255  return password;
1256  }
1257 
1258  public boolean wasSuccessful() {
1259  return unpackSuccessful;
1260  }
1261  }
1262 
1270  private class UnpackedTree {
1271 
1272  final UnpackedNode rootNode;
1273  private int nodesProcessed = 0;
1274 
1275  // It is significantly faster to add the DerivedFiles to the case on a transaction,
1276  // but we don't want to hold the transaction (and case write lock) for the entire
1277  // stage. Instead, we use the same transaction for MAX_TRANSACTION_SIZE database operations
1278  // and then commit that transaction and start a new one, giving at least a short window
1279  // for other processes.
1280  private CaseDbTransaction currentTransaction = null;
1281  private long transactionCounter = 0;
1282  private final static long MAX_TRANSACTION_SIZE = 1000;
1283 
1290  UnpackedTree(String localPathRoot, AbstractFile archiveFile) {
1291  this.rootNode = new UnpackedNode();
1292  this.rootNode.setFile(archiveFile);
1293  this.rootNode.setFileName(archiveFile.getName());
1294  this.rootNode.setLocalRelPath(localPathRoot);
1295  }
1296 
1306  UnpackedNode addNode(String filePath, byte[] filePathBytes) {
1307  String[] toks = filePath.split("[\\/\\\\]");
1308  List<String> tokens = new ArrayList<>();
1309  for (int i = 0; i < toks.length; ++i) {
1310  if (!toks[i].isEmpty()) {
1311  tokens.add(toks[i]);
1312  }
1313  }
1314 
1315  List<byte[]> byteTokens;
1316  if (filePathBytes == null) {
1317  return addNode(rootNode, tokens, null);
1318  } else {
1319  byteTokens = new ArrayList<>(tokens.size());
1320  int last = 0;
1321  for (int i = 0; i < filePathBytes.length; i++) {
1322  if (filePathBytes[i] == '/') {
1323  int len = i - last;
1324  if (len > 0) {
1325  byte[] arr = new byte[len];
1326  System.arraycopy(filePathBytes, last, arr, 0, len);
1327  byteTokens.add(arr);
1328  }
1329  last = i + 1;
1330  }
1331  }
1332  int len = filePathBytes.length - last;
1333  if (len > 0) {
1334  byte[] arr = new byte[len];
1335  System.arraycopy(filePathBytes, last, arr, 0, len);
1336  byteTokens.add(arr);
1337  }
1338 
1339  if (tokens.size() != byteTokens.size()) {
1340  String rootFileInfo = "(unknown)";
1341  if (rootNode.getFile() != null) {
1342  rootFileInfo = rootNode.getFile().getParentPath() + rootNode.getFile().getName()
1343  + "(ID: " + rootNode.getFile().getId() + ")";
1344  }
1345  logger.log(Level.WARNING, "Could not map path bytes to path string while extracting archive {0} (path string: \"{1}\", bytes: {2})",
1346  new Object[]{rootFileInfo, this.rootNode.getFile().getId(), filePath, bytesToString(filePathBytes)});
1347  return addNode(rootNode, tokens, null);
1348  }
1349  }
1350 
1351  return addNode(rootNode, tokens, byteTokens);
1352  }
1353 
1361  private String bytesToString(byte[] bytes) {
1362  StringBuilder result = new StringBuilder();
1363  for (byte b : bytes) {
1364  result.append(String.format("%02x", b));
1365  }
1366  return result.toString();
1367  }
1368 
1379  List<String> tokenPath, List<byte[]> tokenPathBytes) {
1380  // we found all of the tokens
1381  if (tokenPath.isEmpty()) {
1382  return parent;
1383  }
1384 
1385  // get the next name in the path and look it up
1386  String childName = tokenPath.remove(0);
1387  byte[] childNameBytes = null;
1388  if (tokenPathBytes != null) {
1389  childNameBytes = tokenPathBytes.remove(0);
1390  }
1391  UnpackedNode child = parent.getChild(childName);
1392  // create new node
1393  if (child == null) {
1394  child = new UnpackedNode(childName, parent);
1395  child.setFileNameBytes(childNameBytes);
1396  parent.addChild(child);
1397  }
1398 
1399  // go down one more level
1400  return addNode(child, tokenPath, tokenPathBytes);
1401  }
1402 
1409  List<AbstractFile> getRootFileObjects() {
1410  List<AbstractFile> ret = new ArrayList<>();
1411  rootNode.getChildren().forEach((child) -> {
1412  ret.add(child.getFile());
1413  });
1414  return ret;
1415  }
1416 
1423  List<AbstractFile> getAllFileObjects() {
1424  List<AbstractFile> ret = new ArrayList<>();
1425  rootNode.getChildren().forEach((child) -> {
1426  getAllFileObjectsRec(ret, child);
1427  });
1428  return ret;
1429  }
1430 
1431  private void getAllFileObjectsRec(List<AbstractFile> list, UnpackedNode parent) {
1432  list.add(parent.getFile());
1433  parent.getChildren().forEach((child) -> {
1434  getAllFileObjectsRec(list, child);
1435  });
1436  }
1437 
1442  void updateOrAddFileToCaseRec(HashMap<String, ZipFileStatusWrapper> statusMap, String archiveFilePath, Archive parentAr, AbstractFile archiveFile, ConcurrentHashMap<Long, Archive> depthMap) throws TskCoreException, NoCurrentCaseException {
1444  for (UnpackedNode child : rootNode.getChildren()) {
1445  updateOrAddFileToCaseRec(child, fileManager, statusMap, archiveFilePath, parentAr, archiveFile, depthMap);
1446  }
1447  }
1448 
1466  private void updateOrAddFileToCaseRec(UnpackedNode node, FileManager fileManager, HashMap<String, ZipFileStatusWrapper> statusMap, String archiveFilePath, Archive parentAr, AbstractFile archiveFile, ConcurrentHashMap<Long, Archive> depthMap) throws TskCoreException {
1467  DerivedFile df;
1468  progress.progress(String.format("%s: Adding/updating files in case database (%d of %d)", currentArchiveName, ++nodesProcessed, numItems));
1469  try {
1470  String nameInDatabase = getKeyFromUnpackedNode(node, archiveFilePath);
1471  ZipFileStatusWrapper existingFile = nameInDatabase == null ? null : statusMap.get(nameInDatabase);
1472  if (existingFile == null) {
1473  df = Case.getCurrentCaseThrows().getSleuthkitCase().addDerivedFile(node.getFileName(), node.getLocalRelPath(), node.getSize(),
1474  node.getCtime(), node.getCrtime(), node.getAtime(), node.getMtime(),
1475  node.isIsFile(), node.getParent().getFile(), "", MODULE_NAME,
1476  "", "", TskData.EncodingType.XOR1, getCurrentTransaction());
1477  statusMap.put(getKeyAbstractFile(df), new ZipFileStatusWrapper(df, ZipFileStatus.EXISTS));
1478  } else {
1479  String key = getKeyAbstractFile(existingFile.getFile());
1480  if (existingFile.getStatus() == ZipFileStatus.EXISTS && existingFile.getFile().getSize() < node.getSize()) {
1481  existingFile.setStatus(ZipFileStatus.UPDATE);
1482  statusMap.put(key, existingFile);
1483  }
1484  if (existingFile.getStatus() == ZipFileStatus.UPDATE) {
1485  //if the we are updating a file and its mime type was octet-stream we want to re-type it
1486  String mimeType = existingFile.getFile().getMIMEType().equalsIgnoreCase("application/octet-stream") ? null : existingFile.getFile().getMIMEType();
1487  df = Case.getCurrentCaseThrows().getSleuthkitCase().updateDerivedFile((DerivedFile) existingFile.getFile(), node.getLocalRelPath(), node.getSize(),
1488  node.getCtime(), node.getCrtime(), node.getAtime(), node.getMtime(),
1489  node.isIsFile(), mimeType, "", MODULE_NAME,
1490  "", "", TskData.EncodingType.XOR1, existingFile.getFile().getParent(), getCurrentTransaction());
1491  } else {
1492  //ALREADY CURRENT - SKIP
1493  statusMap.put(key, new ZipFileStatusWrapper(existingFile.getFile(), ZipFileStatus.SKIP));
1494  df = (DerivedFile) existingFile.getFile();
1495  }
1496  }
1497  node.setFile(df);
1498  } catch (TskCoreException | NoCurrentCaseException ex) {
1499  logger.log(Level.SEVERE, "Error adding a derived file to db:" + node.getFileName(), ex); //NON-NLS
1500  throw new TskCoreException(
1501  NbBundle.getMessage(SevenZipExtractor.class, "EmbeddedFileExtractorIngestModule.ArchiveExtractor.UnpackedTree.exception.msg",
1502  node.getFileName()), ex);
1503  }
1504 
1505  // Determine encoding of children
1506  if (node.getChildren().size() > 0) {
1507  String names = "";
1508  ArrayList<byte[]> byteDatas = new ArrayList<>();
1509  for (UnpackedNode child : node.getChildren()) {
1510  byte[] childBytes = child.getFileNameBytes();
1511  if (childBytes != null) {
1512  byteDatas.add(childBytes);
1513  }
1514  names += child.getFileName();
1515  }
1516  Charset detectedCharset = detectFilenamesCharset(byteDatas);
1517 
1518  // If a charset was detected, transcode filenames accordingly
1519  if (detectedCharset != null && detectedCharset.canEncode()) {
1520  for (UnpackedNode child : node.getChildren()) {
1521  byte[] childBytes = child.getFileNameBytes();
1522  if (childBytes != null) {
1523  String decodedName = new String(childBytes, detectedCharset);
1524  child.setFileName(decodedName);
1525  }
1526  }
1527  }
1528  }
1529 
1530  // Check for zip bombs
1531  if (isSevenZipExtractionSupported(node.getMimeType())) {
1532  Archive child = new Archive(parentAr.getDepth() + 1, parentAr.getRootArchiveId(), archiveFile);
1533  parentAr.addChild(child);
1534  depthMap.put(node.getFile().getId(), child);
1535  }
1536 
1537  //recurse adding the children if this file was incomplete the children presumably need to be added
1538  for (UnpackedNode child : node.getChildren()) {
1539  updateOrAddFileToCaseRec(child, fileManager, statusMap, getKeyFromUnpackedNode(node, archiveFilePath), parentAr, archiveFile, depthMap);
1540  }
1541  }
1542 
1553  private CaseDbTransaction getCurrentTransaction() throws TskCoreException {
1554 
1555  if (currentTransaction == null) {
1556  startTransaction();
1557  }
1558 
1559  if (transactionCounter > MAX_TRANSACTION_SIZE) {
1561  startTransaction();
1562  }
1563 
1564  transactionCounter++;
1565  return currentTransaction;
1566  }
1567 
1573  private void startTransaction() throws TskCoreException {
1574  try {
1575  currentTransaction = Case.getCurrentCaseThrows().getSleuthkitCase().beginTransaction();
1576  transactionCounter = 0;
1577  } catch (NoCurrentCaseException ex) {
1578  throw new TskCoreException("Case is closed");
1579  }
1580  }
1581 
1587  private void commitCurrentTransaction() throws TskCoreException {
1588  if (currentTransaction != null) {
1589  currentTransaction.commit();
1590  currentTransaction = null;
1591  }
1592  }
1593 
1598  if (currentTransaction != null) {
1599  try {
1600  currentTransaction.rollback();
1601  currentTransaction = null;
1602  } catch (TskCoreException ex) {
1603  // Ignored
1604  }
1605  }
1606  }
1607 
1611  private class UnpackedNode {
1612 
1613  private String fileName;
1614  private byte[] fileNameBytes;
1615  private AbstractFile file;
1616  private final List<UnpackedNode> children = new ArrayList<>();
1617  private String localRelPath = "";
1618  private long size;
1619  private long ctime, crtime, atime, mtime;
1620  private boolean isFile;
1621  private String mimeType = "";
1623 
1624  //root constructor
1625  UnpackedNode() {
1626  }
1627 
1628  //child node constructor
1629  UnpackedNode(String fileName, UnpackedNode parent) {
1630  this.fileName = fileName;
1631  this.parent = parent;
1632  this.localRelPath = parent.getLocalRelPath() + File.separator + fileName;
1633  }
1634 
1635  long getCtime() {
1636  return ctime;
1637  }
1638 
1639  long getCrtime() {
1640  return crtime;
1641  }
1642 
1643  long getAtime() {
1644  return atime;
1645  }
1646 
1647  long getMtime() {
1648  return mtime;
1649  }
1650 
1651  void setFileName(String fileName) {
1652  this.fileName = fileName;
1653  }
1654 
1660  void addChild(UnpackedNode child) {
1661  children.add(child);
1662  }
1663 
1670  List<UnpackedNode> getChildren() {
1671  return children;
1672  }
1673 
1679  UnpackedNode getParent() {
1680  return parent;
1681  }
1682 
1683  void addDerivedInfo(long size,
1684  boolean isFile,
1685  long ctime, long crtime, long atime, long mtime, String relLocalPath) {
1686  this.size = size;
1687  this.isFile = isFile;
1688  this.ctime = ctime;
1689  this.crtime = crtime;
1690  this.atime = atime;
1691  this.mtime = mtime;
1692  this.localRelPath = relLocalPath;
1693  }
1694 
1695  void setFile(AbstractFile file) {
1696  this.file = file;
1697  }
1698 
1699  void setMimeType(String mimeType) {
1700  this.mimeType = mimeType;
1701  }
1702 
1703  String getMimeType() {
1704  return mimeType;
1705  }
1706 
1714  UnpackedNode getChild(String childFileName) {
1715  UnpackedNode ret = null;
1716  for (UnpackedNode child : children) {
1717  if (child.getFileName().equals(childFileName)) {
1718  ret = child;
1719  break;
1720  }
1721  }
1722  return ret;
1723  }
1724 
1725  String getFileName() {
1726  return fileName;
1727  }
1728 
1729  AbstractFile getFile() {
1730  return file;
1731  }
1732 
1733  String getLocalRelPath() {
1734  return localRelPath;
1735  }
1736 
1743  void setLocalRelPath(String localRelativePath) {
1744  localRelPath = localRelativePath;
1745  }
1746 
1747  long getSize() {
1748  return size;
1749  }
1750 
1751  boolean isIsFile() {
1752  return isFile;
1753  }
1754 
1755  void setFileNameBytes(byte[] fileNameBytes) {
1756  if (fileNameBytes != null) {
1757  this.fileNameBytes = Arrays.copyOf(fileNameBytes, fileNameBytes.length);
1758  }
1759  }
1760 
1761  byte[] getFileNameBytes() {
1762  if (fileNameBytes == null) {
1763  return null;
1764  }
1765  return Arrays.copyOf(fileNameBytes, fileNameBytes.length);
1766  }
1767  }
1768  }
1769 
1774  static class Archive {
1775 
1776  //depth will be 0 for the root archive unpack was called on, and increase as unpack recurses down through archives contained within
1777  private final int depth;
1778  private final List<Archive> children;
1779  private final long rootArchiveId;
1780  private boolean flaggedAsZipBomb = false;
1781  private final AbstractFile archiveFile;
1782 
1795  Archive(int depth, long rootArchiveId, AbstractFile archiveFile) {
1796  this.children = new ArrayList<>();
1797  this.depth = depth;
1798  this.rootArchiveId = rootArchiveId;
1799  this.archiveFile = archiveFile;
1800  }
1801 
1808  void addChild(Archive child) {
1809  children.add(child);
1810  }
1811 
1816  synchronized void flagAsZipBomb() {
1817  flaggedAsZipBomb = true;
1818  }
1819 
1825  synchronized boolean isFlaggedAsZipBomb() {
1826  return flaggedAsZipBomb;
1827  }
1828 
1834  AbstractFile getArchiveFile() {
1835  return archiveFile;
1836  }
1837 
1843  long getRootArchiveId() {
1844  return rootArchiveId;
1845  }
1846 
1852  long getObjectId() {
1853  return archiveFile.getId();
1854  }
1855 
1863  int getDepth() {
1864  return depth;
1865  }
1866  }
1867 
1872  private final class ZipFileStatusWrapper {
1873 
1874  private final AbstractFile abstractFile;
1876 
1884  private ZipFileStatusWrapper(AbstractFile file, ZipFileStatus status) {
1885  abstractFile = file;
1886  zipStatus = status;
1887  }
1888 
1894  private AbstractFile getFile() {
1895  return abstractFile;
1896  }
1897 
1904  return zipStatus;
1905  }
1906 
1912  private void setStatus(ZipFileStatus status) {
1913  zipStatus = status;
1914  }
1915 
1916  }
1917 
1922  private enum ZipFileStatus {
1923  UPDATE, //Should be updated //NON-NLS
1924  SKIP, //File is current can be skipped //NON-NLS
1925  EXISTS //File exists but it is unknown if it is current //NON-NLS
1926  }
1927 }
UnpackedNode addNode(UnpackedNode parent, List< String > tokenPath, List< byte[]> tokenPathBytes)
void updateOrAddFileToCaseRec(UnpackedNode node, FileManager fileManager, HashMap< String, ZipFileStatusWrapper > statusMap, String archiveFilePath, Archive parentAr, AbstractFile archiveFile, ConcurrentHashMap< Long, Archive > depthMap)

Copyright © 2012-2021 Basis Technology. Generated on: Thu Sep 30 2021
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.