Autopsy  4.19.3
Graphical digital forensics platform for The Sleuth Kit and other tools.
SevenZipExtractor.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2015-2021 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.modules.embeddedfileextractor;
20 
21 import java.io.File;
22 import java.io.FileOutputStream;
23 import java.io.IOException;
24 import java.nio.charset.Charset;
25 import java.nio.file.Path;
26 import java.nio.file.Paths;
27 import java.util.ArrayList;
28 import java.util.Arrays;
29 import java.util.Collection;
30 import java.util.Collections;
31 import java.util.Date;
32 import java.util.HashMap;
33 import java.util.List;
34 import java.util.Map;
35 import java.util.concurrent.ConcurrentHashMap;
36 import java.util.logging.Level;
37 import net.sf.sevenzipjbinding.ArchiveFormat;
38 import static net.sf.sevenzipjbinding.ArchiveFormat.RAR;
39 import net.sf.sevenzipjbinding.ExtractAskMode;
40 import net.sf.sevenzipjbinding.ExtractOperationResult;
41 import net.sf.sevenzipjbinding.IArchiveExtractCallback;
42 import net.sf.sevenzipjbinding.ICryptoGetTextPassword;
43 import net.sf.sevenzipjbinding.IInArchive;
44 import net.sf.sevenzipjbinding.ISequentialOutStream;
45 import net.sf.sevenzipjbinding.PropID;
46 import net.sf.sevenzipjbinding.SevenZip;
47 import net.sf.sevenzipjbinding.SevenZipException;
48 import net.sf.sevenzipjbinding.SevenZipNativeInitializationException;
49 import org.apache.tika.Tika;
50 import org.apache.tika.parser.txt.CharsetDetector;
51 import org.apache.tika.parser.txt.CharsetMatch;
52 import org.netbeans.api.progress.ProgressHandle;
53 import org.openide.util.NbBundle;
54 import org.openide.util.NbBundle.Messages;
69 import org.sleuthkit.datamodel.AbstractFile;
70 import org.sleuthkit.datamodel.Blackboard;
71 import org.sleuthkit.datamodel.BlackboardArtifact;
72 import org.sleuthkit.datamodel.BlackboardAttribute;
73 import static org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE.TSK_COMMENT;
74 import static org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DESCRIPTION;
75 import static org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME;
76 import org.sleuthkit.datamodel.Content;
77 import org.sleuthkit.datamodel.DerivedFile;
78 import org.sleuthkit.datamodel.EncodedFileOutputStream;
79 import org.sleuthkit.datamodel.ReadContentInputStream;
80 import org.sleuthkit.datamodel.Score;
81 import org.sleuthkit.datamodel.SleuthkitCase.CaseDbTransaction;
82 import org.sleuthkit.datamodel.TskCoreException;
83 import org.sleuthkit.datamodel.TskData;
84 
89 class SevenZipExtractor {
90 
91  private static final Logger logger = Logger.getLogger(SevenZipExtractor.class.getName());
92 
93  private static final String MODULE_NAME = EmbeddedFileExtractorModuleFactory.getModuleName();
94 
95  //encryption type strings
96  private static final String ENCRYPTION_FILE_LEVEL = NbBundle.getMessage(EmbeddedFileExtractorIngestModule.class,
97  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.encryptionFileLevel");
98  private static final String ENCRYPTION_FULL = EncryptionDetectionModuleFactory.PASSWORD_PROTECT_MESSAGE;
99 
100  //zip bomb detection
101  private static final int MAX_DEPTH = 4;
102  private static final int MAX_COMPRESSION_RATIO = 600;
103  private static final long MIN_COMPRESSION_RATIO_SIZE = 500 * 1000000L;
104  private static final long MIN_FREE_DISK_SPACE = 1 * 1000 * 1000000L; //1GB
105 
106  private IngestServices services = IngestServices.getInstance();
107  private final IngestJobContext context;
108  private final FileTypeDetector fileTypeDetector;
109  private final FileTaskExecutor fileTaskExecutor;
110 
111  private String moduleDirRelative;
112  private String moduleDirAbsolute;
113 
114  private Blackboard blackboard;
115 
116  private ProgressHandle progress;
117  private int numItems;
118  private String currentArchiveName;
119 
124 
125  ZIP("application/zip"), //NON-NLS
126  SEVENZ("application/x-7z-compressed"), //NON-NLS
127  GZIP("application/gzip"), //NON-NLS
128  XGZIP("application/x-gzip"), //NON-NLS
129  XBZIP2("application/x-bzip2"), //NON-NLS
130  XTAR("application/x-tar"), //NON-NLS
131  XGTAR("application/x-gtar"),
132  XRAR("application/x-rar-compressed"); //NON-NLS
133 
134  private final String mimeType;
135 
136  SupportedArchiveExtractionFormats(final String mimeType) {
137  this.mimeType = mimeType;
138  }
139 
140  @Override
141  public String toString() {
142  return this.mimeType;
143  }
144  // TODO Expand to support more formats after upgrading Tika
145  }
146 
167  SevenZipExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector, String moduleDirRelative, String moduleDirAbsolute, FileTaskExecutor fileTaskExecutor) throws SevenZipNativeInitializationException {
168  if (!SevenZip.isInitializedSuccessfully()) {
169  throw new SevenZipNativeInitializationException("SevenZip has not been previously initialized.");
170  }
171  this.context = context;
172  this.fileTypeDetector = fileTypeDetector;
173  this.moduleDirRelative = moduleDirRelative;
174  this.moduleDirAbsolute = moduleDirAbsolute;
175  this.fileTaskExecutor = fileTaskExecutor;
176  }
177 
186  boolean isSevenZipExtractionSupported(AbstractFile file) {
187  String fileMimeType = fileTypeDetector.getMIMEType(file);
188  for (SupportedArchiveExtractionFormats mimeType : SupportedArchiveExtractionFormats.values()) {
189  if (checkForIngestCancellation(file)) {
190  break;
191  }
192  if (mimeType.toString().equals(fileMimeType)) {
193  return true;
194  }
195  }
196  return false;
197  }
198 
199  boolean isSevenZipExtractionSupported(String mimeType) {
200  for (SupportedArchiveExtractionFormats supportedMimeType : SupportedArchiveExtractionFormats.values()) {
201  if (mimeType.contains(supportedMimeType.toString())) {
202  return true;
203  }
204  }
205  return false;
206  }
207 
218  private boolean checkForIngestCancellation(AbstractFile file) {
219  if (fileTaskExecutor != null && context != null && context.fileIngestIsCancelled()) {
220  logger.log(Level.INFO, "Ingest was cancelled. Results extracted from the following archive file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()});
221  return true;
222  }
223  return false;
224  }
225 
249  private boolean isZipBombArchiveItemCheck(AbstractFile archiveFile, IInArchive inArchive, int inArchiveItemIndex, ConcurrentHashMap<Long, Archive> depthMap, String escapedFilePath) {
250  //If a file is corrupted as a result of reconstructing it from unallocated space, then
251  //7zip does a poor job estimating the original uncompressed file size.
252  //As a result, many corrupted files have wonky compression ratios and could flood the UI
253  //with false zip bomb notifications. The decision was made to skip compression ratio checks
254  //for unallocated zip files. Instead, we let the depth be an indicator of a zip bomb.
255  //Gzip archives compress a single file. They may have a sparse file,
256  //and that file could be much larger, however it won't be the exponential growth seen with more dangerous zip bombs.
257  //In addition a fair number of browser cache files will be gzip archives,
258  //and their file sizes are frequently retrieved incorrectly so ignoring gzip files is a reasonable decision.
259  if (archiveFile.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.UNALLOC) || archiveFile.getMIMEType().equalsIgnoreCase(SupportedArchiveExtractionFormats.XGZIP.toString())) {
260  return false;
261  }
262 
263  try {
264  final Long archiveItemSize = (Long) inArchive.getProperty(
265  inArchiveItemIndex, PropID.SIZE);
266 
267  //skip the check for small files
268  if (archiveItemSize == null || archiveItemSize < MIN_COMPRESSION_RATIO_SIZE) {
269  return false;
270  }
271 
272  final Long archiveItemPackedSize = (Long) inArchive.getProperty(
273  inArchiveItemIndex, PropID.PACKED_SIZE);
274 
275  if (archiveItemPackedSize == null || archiveItemPackedSize <= 0) {
276  logger.log(Level.WARNING, "Cannot getting compression ratio, cannot detect if zipbomb: {0}, item: {1}", //NON-NLS
277  new Object[]{archiveFile.getName(), (String) inArchive.getProperty(inArchiveItemIndex, PropID.PATH)}); //NON-NLS
278  return false;
279  }
280 
281  int cRatio = (int) (archiveItemSize / archiveItemPackedSize);
282 
283  if (cRatio >= MAX_COMPRESSION_RATIO) {
284  Archive rootArchive = depthMap.get(depthMap.get(archiveFile.getId()).getRootArchiveId());
285  String details = NbBundle.getMessage(SevenZipExtractor.class,
286  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.isZipBombCheck.warnDetails",
287  cRatio, FileUtil.escapeFileName(getArchiveFilePath(rootArchive.getArchiveFile())));
288 
289  flagRootArchiveAsZipBomb(rootArchive, archiveFile, details, escapedFilePath);
290  return true;
291  } else {
292  return false;
293  }
294 
295  } catch (SevenZipException ex) {
296  logger.log(Level.WARNING, "Error getting archive item size and cannot detect if zipbomb. ", ex); //NON-NLS
297  return false;
298  }
299  }
300 
312  private void flagRootArchiveAsZipBomb(Archive rootArchive, AbstractFile archiveFile, String details, String escapedFilePath) {
313  rootArchive.flagAsZipBomb();
314  logger.log(Level.INFO, details);
315 
316  String setName = "Possible Zip Bomb";
317  try {
318  Collection<BlackboardAttribute> attributes = Arrays.asList(
319  new BlackboardAttribute(
320  TSK_SET_NAME, MODULE_NAME,
321  setName),
322  new BlackboardAttribute(
323  TSK_DESCRIPTION, MODULE_NAME,
324  Bundle.SevenZipExtractor_zipBombArtifactCreation_text(archiveFile.getName())),
325  new BlackboardAttribute(
326  TSK_COMMENT, MODULE_NAME,
327  details));
328 
329  if (!blackboard.artifactExists(archiveFile, BlackboardArtifact.Type.TSK_INTERESTING_ITEM, attributes)) {
330  BlackboardArtifact artifact = rootArchive.getArchiveFile().newAnalysisResult(
331  BlackboardArtifact.Type.TSK_INTERESTING_ITEM, Score.SCORE_LIKELY_NOTABLE,
332  null, setName, null,
333  attributes)
334  .getAnalysisResult();
335 
336  try {
337  /*
338  * post the artifact which will index the artifact for
339  * keyword search, and fire an event to notify UI of this
340  * new artifact
341  */
342  blackboard.postArtifact(artifact, MODULE_NAME, context.getJobId());
343 
344  String msg = NbBundle.getMessage(SevenZipExtractor.class,
345  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.isZipBombCheck.warnMsg", archiveFile.getName(), escapedFilePath);//NON-NLS
346 
347  services.postMessage(IngestMessage.createWarningMessage(MODULE_NAME, msg, details));
348 
349  } catch (Blackboard.BlackboardException ex) {
350  logger.log(Level.SEVERE, "Unable to index blackboard artifact " + artifact.getArtifactID(), ex); //NON-NLS
351  MessageNotifyUtil.Notify.error(
352  Bundle.SevenZipExtractor_indexError_message(), artifact.getDisplayName());
353  }
354  }
355  } catch (TskCoreException ex) {
356  logger.log(Level.SEVERE, "Error creating blackboard artifact for Zip Bomb Detection for file: " + escapedFilePath, ex); //NON-NLS
357  }
358  }
359 
368  private ArchiveFormat get7ZipOptions(AbstractFile archiveFile) {
369  // try to get the file type from the BB
370  String detectedFormat;
371  detectedFormat = archiveFile.getMIMEType();
372 
373  if (detectedFormat == null) {
374  logger.log(Level.WARNING, "Could not detect format for file: {0}", archiveFile); //NON-NLS
375 
376  // if we don't have attribute info then use file extension
377  String extension = archiveFile.getNameExtension();
378  if ("rar".equals(extension)) //NON-NLS
379  {
380  // for RAR files we need to open them explicitly as RAR. Otherwise, if there is a ZIP archive inside RAR archive
381  // it will be opened incorrectly when using 7zip's built-in auto-detect functionality
382  return RAR;
383  }
384 
385  // Otherwise open the archive using 7zip's built-in auto-detect functionality
386  return null;
387  } else if (detectedFormat.contains("application/x-rar-compressed")) //NON-NLS
388  {
389  // for RAR files we need to open them explicitly as RAR. Otherwise, if there is a ZIP archive inside RAR archive
390  // it will be opened incorrectly when using 7zip's built-in auto-detect functionality
391  return RAR;
392  }
393 
394  // Otherwise open the archive using 7zip's built-in auto-detect functionality
395  return null;
396  }
397 
408  private long getRootArchiveId(AbstractFile file) throws TskCoreException {
409  long id = file.getId();
410  Content parentContent = file.getParent();
411  while (parentContent != null) {
412  id = parentContent.getId();
413  parentContent = parentContent.getParent();
414  }
415  return id;
416  }
417 
437  private List<AbstractFile> getAlreadyExtractedFiles(AbstractFile archiveFile, String archiveFilePath) throws TskCoreException, InterruptedException, FileTaskExecutor.FileTaskFailedException {
438  /*
439  * TODO (Jira-7145): Is this logic correct?
440  */
441  List<AbstractFile> extractedFiles = new ArrayList<>();
442  File outputDirectory = new File(moduleDirAbsolute, EmbeddedFileExtractorIngestModule.getUniqueName(archiveFile));
443  if (archiveFile.hasChildren() && fileTaskExecutor.exists(outputDirectory)) {
444  Case currentCase = Case.getCurrentCase();
445  FileManager fileManager = currentCase.getServices().getFileManager();
446  extractedFiles.addAll(fileManager.findFilesByParentPath(getRootArchiveId(archiveFile), archiveFilePath));
447  }
448  return extractedFiles;
449  }
450 
458  private String getArchiveFilePath(AbstractFile archiveFile) {
459  return archiveFile.getParentPath() + archiveFile.getName();
460  }
461 
471  private boolean makeExtractedFilesDirectory(String uniqueArchiveFileName) {
472  boolean success = true;
473  Path rootDirectoryPath = Paths.get(moduleDirAbsolute, uniqueArchiveFileName);
474  File rootDirectory = rootDirectoryPath.toFile();
475  try {
476  if (!fileTaskExecutor.exists(rootDirectory)) {
477  success = fileTaskExecutor.mkdirs(rootDirectory);
478  }
479  } catch (SecurityException | FileTaskFailedException | InterruptedException ex) {
480  logger.log(Level.SEVERE, String.format("Error creating root extracted files directory %s", rootDirectory), ex); //NON-NLS
481  success = false;
482  }
483  return success;
484  }
485 
498  private String getPathInArchive(IInArchive archive, int inArchiveItemIndex, AbstractFile archiveFile) throws SevenZipException {
499  String pathInArchive = (String) archive.getProperty(inArchiveItemIndex, PropID.PATH);
500 
501  if (pathInArchive == null || pathInArchive.isEmpty()) {
502  //some formats (.tar.gz) may not be handled correctly -- file in archive has no name/path
503  //handle this for .tar.gz and tgz but assuming the child is tar,
504  //otherwise, unpack using itemNumber as name
505 
506  //TODO this should really be signature based, not extension based
507  String archName = archiveFile.getName();
508  int dotI = archName.lastIndexOf(".");
509  String useName = null;
510  if (dotI != -1) {
511  String base = archName.substring(0, dotI);
512  String ext = archName.substring(dotI);
513  int colonIndex = ext.lastIndexOf(":");
514  if (colonIndex != -1) {
515  // If alternate data stream is found, fix the name
516  // so Windows doesn't choke on the colon character.
517  ext = ext.substring(0, colonIndex);
518  }
519  switch (ext) {
520  case ".gz": //NON-NLS
521  useName = base;
522  break;
523  case ".tgz": //NON-NLS
524  useName = base + ".tar"; //NON-NLS
525  break;
526  case ".bz2": //NON-NLS
527  useName = base;
528  break;
529  }
530  }
531  if (useName == null) {
532  pathInArchive = "/" + archName + "/" + Integer.toString(inArchiveItemIndex);
533  } else {
534  pathInArchive = "/" + useName;
535  }
536  }
537  return pathInArchive;
538  }
539 
540  private byte[] getPathBytesInArchive(IInArchive archive, int inArchiveItemIndex, AbstractFile archiveFile) throws SevenZipException {
541  return (byte[]) archive.getProperty(inArchiveItemIndex, PropID.PATH_BYTES);
542  }
543 
544  /*
545  * Get the String that will represent the key for the hashmap which keeps
546  * track of existing files from an AbstractFile
547  */
548  private String getKeyAbstractFile(AbstractFile fileInDatabase) {
549  return fileInDatabase == null ? null : fileInDatabase.getParentPath() + fileInDatabase.getName();
550  }
551 
552  /*
553  * Get the String that will represent the key for the hashmap which keeps
554  * track of existing files from an unpacked node and the archiveFilePath
555  */
556  private String getKeyFromUnpackedNode(UnpackedTree.UnpackedNode node, String archiveFilePath) {
557  return node == null ? null : archiveFilePath + "/" + node.getFileName();
558  }
559 
567  void unpack(AbstractFile archiveFile, ConcurrentHashMap<Long, Archive> depthMap) {
568  unpack(archiveFile, depthMap, null);
569  }
570 
582  @Messages({"SevenZipExtractor.indexError.message=Failed to index encryption detected artifact for keyword search.",
583  "# {0} - rootArchive",
584  "SevenZipExtractor.zipBombArtifactCreation.text=Zip Bomb Detected {0}"})
585  boolean unpack(AbstractFile archiveFile, ConcurrentHashMap<Long, Archive> depthMap, String password) {
586  boolean unpackSuccessful = true; //initialized to true change to false if any files fail to extract and
587  boolean hasEncrypted = false;
588  boolean fullEncryption = true;
589  boolean progressStarted = false;
590  final String archiveFilePath = getArchiveFilePath(archiveFile);
591  final String escapedArchiveFilePath = FileUtil.escapeFileName(archiveFilePath);
592  HashMap<String, ZipFileStatusWrapper> statusMap = new HashMap<>();
593  List<AbstractFile> unpackedFiles = Collections.<AbstractFile>emptyList();
594 
595  currentArchiveName = archiveFile.getName();
596 
597  SevenZipContentReadStream stream = null;
598  progress = ProgressHandle.createHandle(Bundle.EmbeddedFileExtractorIngestModule_ArchiveExtractor_moduleName());
599  //recursion depth check for zip bomb
600  Archive parentAr;
601  try {
602  blackboard = Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboard();
603  } catch (NoCurrentCaseException ex) {
604  logger.log(Level.INFO, "Exception while getting open case.", ex); //NON-NLS
605  unpackSuccessful = false;
606  return unpackSuccessful;
607  }
608  if (checkForIngestCancellation(archiveFile)) {
609  return false;
610  }
611  try {
612  List<AbstractFile> existingFiles = getAlreadyExtractedFiles(archiveFile, archiveFilePath);
613  for (AbstractFile file : existingFiles) {
614  statusMap.put(getKeyAbstractFile(file), new ZipFileStatusWrapper(file, ZipFileStatus.EXISTS));
615  }
616  } catch (TskCoreException | FileTaskFailedException | InterruptedException ex) {
617  logger.log(Level.SEVERE, String.format("Error checking if %s has already been processed, skipping", escapedArchiveFilePath), ex); //NON-NLS
618  unpackSuccessful = false;
619  return unpackSuccessful;
620  }
621  if (checkForIngestCancellation(archiveFile)) {
622  return false;
623  }
624  parentAr = depthMap.get(archiveFile.getId());
625  if (parentAr == null) {
626  parentAr = new Archive(0, archiveFile.getId(), archiveFile);
627  depthMap.put(archiveFile.getId(), parentAr);
628  } else {
629  Archive rootArchive = depthMap.get(parentAr.getRootArchiveId());
630  if (rootArchive.isFlaggedAsZipBomb()) {
631  //skip this archive as the root archive has already been determined to contain a zip bomb
632  unpackSuccessful = false;
633  return unpackSuccessful;
634  } else if (parentAr.getDepth() == MAX_DEPTH) {
635  String details = NbBundle.getMessage(SevenZipExtractor.class,
636  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.warnDetails.zipBomb",
637  parentAr.getDepth(), FileUtil.escapeFileName(getArchiveFilePath(rootArchive.getArchiveFile())));
638  flagRootArchiveAsZipBomb(rootArchive, archiveFile, details, escapedArchiveFilePath);
639  unpackSuccessful = false;
640  return unpackSuccessful;
641  }
642  }
643  if (checkForIngestCancellation(archiveFile)) {
644  return false;
645  }
646  IInArchive inArchive = null;
647  try {
648  stream = new SevenZipContentReadStream(new ReadContentInputStream(archiveFile));
649  // for RAR files we need to open them explicitly as RAR. Otherwise, if there is a ZIP archive inside RAR archive
650  // it will be opened incorrectly when using 7zip's built-in auto-detect functionality.
651  // All other archive formats are still opened using 7zip built-in auto-detect functionality.
652  ArchiveFormat options = get7ZipOptions(archiveFile);
653  if (checkForIngestCancellation(archiveFile)) {
654  return false;
655  }
656  if (password == null) {
657  inArchive = SevenZip.openInArchive(options, stream);
658  } else {
659  inArchive = SevenZip.openInArchive(options, stream, password);
660  }
661  numItems = inArchive.getNumberOfItems();
662  progress.start(numItems);
663  progressStarted = true;
664  if (checkForIngestCancellation(archiveFile)) {
665  return false;
666  }
667  //setup the archive local root folder
668  final String uniqueArchiveFileName = FileUtil.escapeFileName(EmbeddedFileExtractorIngestModule.getUniqueName(archiveFile));
669  if (!makeExtractedFilesDirectory(uniqueArchiveFileName)) {
670  return false;
671  }
672 
673  //initialize tree hierarchy to keep track of unpacked file structure
674  SevenZipExtractor.UnpackedTree unpackedTree = new SevenZipExtractor.UnpackedTree(moduleDirRelative + "/" + uniqueArchiveFileName, archiveFile);
675 
676  long freeDiskSpace;
677  try {
678  freeDiskSpace = services.getFreeDiskSpace();
679  } catch (NullPointerException ex) {
680  //If ingest has not been run at least once getFreeDiskSpace() will throw a null pointer exception
681  //currently getFreeDiskSpace always returns DISK_FREE_SPACE_UNKNOWN
682  freeDiskSpace = IngestMonitor.DISK_FREE_SPACE_UNKNOWN;
683  }
684 
685  Map<Integer, InArchiveItemDetails> archiveDetailsMap = new HashMap<>();
686  for (int inArchiveItemIndex = 0; inArchiveItemIndex < numItems; inArchiveItemIndex++) {
687  if (checkForIngestCancellation(archiveFile)) {
688  return false;
689  }
690  progress.progress(String.format("%s: Analyzing archive metadata and creating local files (%d of %d)", currentArchiveName, inArchiveItemIndex + 1, numItems), 0);
691  if (isZipBombArchiveItemCheck(archiveFile, inArchive, inArchiveItemIndex, depthMap, escapedArchiveFilePath)) {
692  unpackSuccessful = false;
693  return unpackSuccessful;
694  }
695 
696  String pathInArchive = getPathInArchive(inArchive, inArchiveItemIndex, archiveFile);
697  byte[] pathBytesInArchive = getPathBytesInArchive(inArchive, inArchiveItemIndex, archiveFile);
698  UnpackedTree.UnpackedNode unpackedNode = unpackedTree.addNode(pathInArchive, pathBytesInArchive);
699  if (checkForIngestCancellation(archiveFile)) {
700  return false;
701  }
702  final boolean isEncrypted = (Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.ENCRYPTED);
703 
704  if (isEncrypted && password == null) {
705  logger.log(Level.WARNING, "Skipping encrypted file in archive: {0}", pathInArchive); //NON-NLS
706  hasEncrypted = true;
707  unpackSuccessful = false;
708  continue;
709  } else {
710  fullEncryption = false;
711  }
712 
713  // NOTE: item size may return null in case of certain
714  // archiving formats. Eg: BZ2
715  //check if unpacking this file will result in out of disk space
716  //this is additional to zip bomb prevention mechanism
717  Long archiveItemSize = (Long) inArchive.getProperty(
718  inArchiveItemIndex, PropID.SIZE);
719  if (freeDiskSpace != IngestMonitor.DISK_FREE_SPACE_UNKNOWN && archiveItemSize != null && archiveItemSize > 0) { //if free space is known and file is not empty.
720  String archiveItemPath = (String) inArchive.getProperty(
721  inArchiveItemIndex, PropID.PATH);
722  long newDiskSpace = freeDiskSpace - archiveItemSize;
723  if (newDiskSpace < MIN_FREE_DISK_SPACE) {
724  String msg = NbBundle.getMessage(SevenZipExtractor.class,
725  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.notEnoughDiskSpace.msg",
726  escapedArchiveFilePath, archiveItemPath);
727  String details = NbBundle.getMessage(SevenZipExtractor.class,
728  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.notEnoughDiskSpace.details");
729  services.postMessage(IngestMessage.createErrorMessage(MODULE_NAME, msg, details));
730  logger.log(Level.INFO, "Skipping archive item due to insufficient disk space: {0}, {1}", new String[]{escapedArchiveFilePath, archiveItemPath}); //NON-NLS
731  logger.log(Level.INFO, "Available disk space: {0}", new Object[]{freeDiskSpace}); //NON-NLS
732  unpackSuccessful = false;
733  continue; //skip this file
734  } else {
735  //update est. disk space during this archive, so we don't need to poll for every file extracted
736  freeDiskSpace = newDiskSpace;
737  }
738  }
739  if (checkForIngestCancellation(archiveFile)) {
740  return false;
741  }
742  final String uniqueExtractedName = FileUtil.escapeFileName(uniqueArchiveFileName + File.separator + (inArchiveItemIndex / 1000) + File.separator + inArchiveItemIndex);
743  final String localAbsPath = moduleDirAbsolute + File.separator + uniqueExtractedName;
744  final String localRelPath = moduleDirRelative + File.separator + uniqueExtractedName;
745 
746  //create local dirs and empty files before extracted
747  //cannot rely on files in top-bottom order
748  File localFile = new File(localAbsPath);
749  boolean localFileExists;
750  try {
751  if ((Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.IS_FOLDER)) {
752  localFileExists = findOrCreateDirectory(localFile);
753  } else {
754  localFileExists = findOrCreateEmptyFile(localFile);
755  }
756  } catch (FileTaskFailedException | InterruptedException ex) {
757  localFileExists = false;
758  logger.log(Level.SEVERE, String.format("Error fiding or creating %s", localFile.getAbsolutePath()), ex); //NON-NLS
759  }
760  if (checkForIngestCancellation(archiveFile)) {
761  return false;
762  }
763  // skip the rest of this loop if we couldn't create the file
764  //continue will skip details from being added to the map
765  if (!localFileExists) {
766  logger.log(Level.SEVERE, String.format("Skipping %s because it could not be created", localFile.getAbsolutePath())); //NON-NLS
767  continue;
768  }
769 
770  //Store archiveItemIndex with local paths and unpackedNode reference.
771  //Necessary for the extract call back to write the current archive
772  //file to the correct disk location and to correctly update it's
773  //corresponding unpackedNode
774  archiveDetailsMap.put(inArchiveItemIndex, new InArchiveItemDetails(
775  unpackedNode, localAbsPath, localRelPath));
776  }
777 
778  int[] extractionIndices = getExtractableFilesFromDetailsMap(archiveDetailsMap);
779  if (checkForIngestCancellation(archiveFile)) {
780  return false;
781  }
782  StandardIArchiveExtractCallback archiveCallBack
783  = new StandardIArchiveExtractCallback(
784  inArchive, archiveFile, progress,
785  archiveDetailsMap, password, freeDiskSpace);
786 
787  //According to the documentation, indices in sorted order are optimal
788  //for efficiency. Hence, the HashMap and linear processing of
789  //inArchiveItemIndex. False indicates non-test mode
790  inArchive.extract(extractionIndices, false, archiveCallBack);
791  if (checkForIngestCancellation(archiveFile)) {
792  return false;
793  }
794  unpackSuccessful &= archiveCallBack.wasSuccessful();
795 
796  archiveDetailsMap = null;
797 
798  // add them to the DB. We wait until the end so that we have the metadata on all of the
799  // intermediate nodes since the order is not guaranteed
800  try {
801  unpackedTree.updateOrAddFileToCaseRec(statusMap, archiveFilePath, parentAr, archiveFile, depthMap);
802  unpackedTree.commitCurrentTransaction();
803  } catch (TskCoreException | NoCurrentCaseException ex) {
804  logger.log(Level.SEVERE, "Error populating complete derived file hierarchy from the unpacked dir structure", ex); //NON-NLS
805  //TODO decide if anything to cleanup, for now bailing
806  unpackedTree.rollbackCurrentTransaction();
807  }
808 
809  if (checkForIngestCancellation(archiveFile)) {
810  return false;
811  }
812 
813  // Get the new files to be added to the case.
814  unpackedFiles = unpackedTree.getAllFileObjects();
815  } catch (SevenZipException | IllegalArgumentException ex) {
816  logger.log(Level.WARNING, "Error unpacking file: " + archiveFile, ex); //NON-NLS
817  //inbox message
818 
819  // print a message if the file is allocated
820  if (archiveFile.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.ALLOC)) {
821  String msg = NbBundle.getMessage(SevenZipExtractor.class,
822  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.errUnpacking.msg",
823  currentArchiveName);
824  String details = NbBundle.getMessage(SevenZipExtractor.class,
825  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.errUnpacking.details",
826  escapedArchiveFilePath, ex.getMessage());
827  services.postMessage(IngestMessage.createErrorMessage(MODULE_NAME, msg, details));
828  }
829  } finally {
830  if (inArchive != null) {
831  try {
832  inArchive.close();
833  } catch (SevenZipException e) {
834  logger.log(Level.SEVERE, "Error closing archive: " + archiveFile, e); //NON-NLS
835  }
836  }
837 
838  if (stream != null) {
839  try {
840  stream.close();
841  } catch (IOException ex) {
842  logger.log(Level.SEVERE, "Error closing stream after unpacking archive: " + archiveFile, ex); //NON-NLS
843  }
844  }
845 
846  //close progress bar
847  if (progressStarted) {
848  progress.finish();
849  }
850  }
851  if (checkForIngestCancellation(archiveFile)) {
852  return false;
853  }
854  //create artifact and send user message
855  if (hasEncrypted) {
856  String encryptionType = fullEncryption ? ENCRYPTION_FULL : ENCRYPTION_FILE_LEVEL;
857  try {
858  BlackboardArtifact artifact = archiveFile.newAnalysisResult(
859  new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED),
860  Score.SCORE_NOTABLE,
861  null, null, encryptionType,
862  Arrays.asList(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_COMMENT, MODULE_NAME, encryptionType)))
863  .getAnalysisResult();
864 
865  try {
866  /*
867  * post the artifact which will index the artifact for
868  * keyword search, and fire an event to notify UI of this
869  * new artifact
870  */
871  blackboard.postArtifact(artifact, MODULE_NAME, context.getJobId());
872  } catch (Blackboard.BlackboardException ex) {
873  logger.log(Level.SEVERE, "Unable to post blackboard artifact " + artifact.getArtifactID(), ex); //NON-NLS
874  MessageNotifyUtil.Notify.error(
875  Bundle.SevenZipExtractor_indexError_message(), artifact.getDisplayName());
876  }
877 
878  } catch (TskCoreException ex) {
879  logger.log(Level.SEVERE, "Error creating blackboard artifact for encryption detected for file: " + escapedArchiveFilePath, ex); //NON-NLS
880  }
881 
882  String msg = NbBundle.getMessage(SevenZipExtractor.class,
883  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.encrFileDetected.msg");
884  String details = NbBundle.getMessage(SevenZipExtractor.class,
885  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.encrFileDetected.details",
886  currentArchiveName, MODULE_NAME);
887  services.postMessage(IngestMessage.createWarningMessage(MODULE_NAME, msg, details));
888  }
889 
890  // adding unpacked extracted derived files to the job after closing relevant resources.
891  if (!unpackedFiles.isEmpty()) {
892  //currently sending a single event for all new files
893  services.fireModuleContentEvent(new ModuleContentEvent(archiveFile));
894  if (context != null) {
895  context.addFilesToJob(unpackedFiles);
896  }
897  }
898 
899  return unpackSuccessful;
900  }
901 
909  private boolean findOrCreateDirectory(File directory) throws FileTaskFailedException, InterruptedException {
910  if (!fileTaskExecutor.exists(directory)) {
911  return fileTaskExecutor.mkdirs(directory);
912  } else {
913  return true;
914  }
915  }
916 
924  private boolean findOrCreateEmptyFile(File file) throws FileTaskFailedException, InterruptedException {
925  if (!fileTaskExecutor.exists(file)) {
926  fileTaskExecutor.mkdirs(file.getParentFile());
927  return fileTaskExecutor.createNewFile(file);
928  } else {
929  return true;
930  }
931  }
932 
933  private Charset detectFilenamesCharset(List<byte[]> byteDatas) {
934  Charset detectedCharset = null;
935  CharsetDetector charsetDetector = new CharsetDetector();
936  int byteSum = 0;
937  int fileNum = 0;
938  for (byte[] byteData : byteDatas) {
939  fileNum++;
940  byteSum += byteData.length;
941  // Only read ~1000 bytes of filenames in this directory
942  if (byteSum >= 1000) {
943  break;
944  }
945  }
946  byte[] allBytes = new byte[byteSum];
947  int start = 0;
948  for (int i = 0; i < fileNum; i++) {
949  byte[] byteData = byteDatas.get(i);
950  System.arraycopy(byteData, 0, allBytes, start, byteData.length);
951  start += byteData.length;
952  }
953  charsetDetector.setText(allBytes);
954  CharsetMatch cm = charsetDetector.detect();
955  if (cm != null && cm.getConfidence() >= 90 && Charset.isSupported(cm.getName())) {
956  detectedCharset = Charset.forName(cm.getName());
957  }
958  return detectedCharset;
959  }
960 
965  private int[] getExtractableFilesFromDetailsMap(
966  Map<Integer, InArchiveItemDetails> archiveDetailsMap) {
967 
968  Integer[] wrappedExtractionIndices = archiveDetailsMap.keySet()
969  .toArray(new Integer[archiveDetailsMap.size()]);
970 
971  return Arrays.stream(wrappedExtractionIndices)
972  .mapToInt(Integer::intValue)
973  .toArray();
974 
975  }
976 
984  private final static class UnpackStream implements ISequentialOutStream {
985 
986  private EncodedFileOutputStream output;
987  private String localAbsPath;
988  private int bytesWritten;
989  private static final Tika tika = new Tika();
990  private String mimeType = "";
991 
992  UnpackStream(String localAbsPath) throws IOException {
993  this.output = new EncodedFileOutputStream(new FileOutputStream(localAbsPath), TskData.EncodingType.XOR1);
994  this.localAbsPath = localAbsPath;
995  this.bytesWritten = 0;
996  }
997 
998  public void setNewOutputStream(String localAbsPath) throws IOException {
999  this.output.close();
1000  this.output = new EncodedFileOutputStream(new FileOutputStream(localAbsPath), TskData.EncodingType.XOR1);
1001  this.localAbsPath = localAbsPath;
1002  this.bytesWritten = 0;
1003  this.mimeType = "";
1004  }
1005 
1006  public int getSize() {
1007  return bytesWritten;
1008  }
1009 
1010  @Override
1011  public int write(byte[] bytes) throws SevenZipException {
1012  try {
1013  // Detect MIME type now while the file is in memory
1014  if (bytesWritten == 0) {
1015  mimeType = tika.detect(bytes);
1016  }
1017  output.write(bytes);
1018  this.bytesWritten += bytes.length;
1019  } catch (IOException ex) {
1020  throw new SevenZipException(
1021  NbBundle.getMessage(SevenZipExtractor.class,
1022  "EmbeddedFileExtractorIngestModule.ArchiveExtractor.UnpackStream.write.exception.msg",
1023  localAbsPath), ex);
1024  }
1025  return bytes.length;
1026  }
1027 
1028  public String getMIMEType() {
1029  return mimeType;
1030  }
1031 
1032  public void close() throws IOException {
1033  try (EncodedFileOutputStream out = output) {
1034  out.flush();
1035  }
1036  }
1037 
1038  }
1039 
1043  private static class InArchiveItemDetails {
1044 
1045  private final SevenZipExtractor.UnpackedTree.UnpackedNode unpackedNode;
1046  private final String localAbsPath;
1047  private final String localRelPath;
1048 
1050  SevenZipExtractor.UnpackedTree.UnpackedNode unpackedNode,
1051  String localAbsPath, String localRelPath) {
1052  this.unpackedNode = unpackedNode;
1053  this.localAbsPath = localAbsPath;
1054  this.localRelPath = localRelPath;
1055  }
1056 
1057  public SevenZipExtractor.UnpackedTree.UnpackedNode getUnpackedNode() {
1058  return unpackedNode;
1059  }
1060 
1061  public String getLocalAbsPath() {
1062  return localAbsPath;
1063  }
1064 
1065  public String getLocalRelPath() {
1066  return localRelPath;
1067  }
1068  }
1069 
1074  private static class StandardIArchiveExtractCallback
1075  implements IArchiveExtractCallback, ICryptoGetTextPassword {
1076 
1077  private final AbstractFile archiveFile;
1078  private final IInArchive inArchive;
1079  private UnpackStream unpackStream = null;
1080  private final Map<Integer, InArchiveItemDetails> archiveDetailsMap;
1081  private final ProgressHandle progressHandle;
1082 
1083  private int inArchiveItemIndex;
1084 
1085  private long createTimeInSeconds;
1086  private long modTimeInSeconds;
1087  private long accessTimeInSeconds;
1088 
1089  private boolean isFolder;
1090  private final String password;
1091 
1092  private boolean unpackSuccessful = true;
1093 
1094  StandardIArchiveExtractCallback(IInArchive inArchive,
1095  AbstractFile archiveFile, ProgressHandle progressHandle,
1096  Map<Integer, InArchiveItemDetails> archiveDetailsMap,
1097  String password, long freeDiskSpace) {
1098  this.inArchive = inArchive;
1099  this.progressHandle = progressHandle;
1100  this.archiveFile = archiveFile;
1101  this.archiveDetailsMap = archiveDetailsMap;
1102  this.password = password;
1103  }
1104 
1119  @Override
1120  public ISequentialOutStream getStream(int inArchiveItemIndex,
1121  ExtractAskMode mode) throws SevenZipException {
1122 
1123  this.inArchiveItemIndex = inArchiveItemIndex;
1124 
1125  isFolder = (Boolean) inArchive
1126  .getProperty(inArchiveItemIndex, PropID.IS_FOLDER);
1127  if (isFolder || mode != ExtractAskMode.EXTRACT) {
1128  return null;
1129  }
1130 
1131  final String localAbsPath = archiveDetailsMap.get(
1132  inArchiveItemIndex).getLocalAbsPath();
1133 
1134  //If the Unpackstream has been allocated, then set the Outputstream
1135  //to another file rather than creating a new unpack stream. The 7Zip
1136  //binding has a memory leak, so creating new unpack streams will not be
1137  //dereferenced. As a fix, we create one UnpackStream, and mutate its state,
1138  //so that there only exists one 8192 byte buffer in memory per archive.
1139  try {
1140  if (unpackStream != null) {
1141  unpackStream.setNewOutputStream(localAbsPath);
1142  } else {
1143  unpackStream = new UnpackStream(localAbsPath);
1144  }
1145  } catch (IOException ex) {
1146  logger.log(Level.WARNING, String.format("Error opening or setting new stream " //NON-NLS
1147  + "for archive file at %s", localAbsPath), ex.getMessage()); //NON-NLS
1148  return null;
1149  }
1150 
1151  return unpackStream;
1152  }
1153 
1162  @Override
1163  public void prepareOperation(ExtractAskMode mode) throws SevenZipException {
1164  final Date createTime = (Date) inArchive.getProperty(
1165  inArchiveItemIndex, PropID.CREATION_TIME);
1166  final Date accessTime = (Date) inArchive.getProperty(
1167  inArchiveItemIndex, PropID.LAST_ACCESS_TIME);
1168  final Date writeTime = (Date) inArchive.getProperty(
1169  inArchiveItemIndex, PropID.LAST_MODIFICATION_TIME);
1170 
1171  createTimeInSeconds = createTime == null ? 0L
1172  : createTime.getTime() / 1000;
1173  modTimeInSeconds = writeTime == null ? 0L
1174  : writeTime.getTime() / 1000;
1175  accessTimeInSeconds = accessTime == null ? 0L
1176  : accessTime.getTime() / 1000;
1177 
1178  progressHandle.progress(archiveFile.getName() + ": "
1179  + (String) inArchive.getProperty(inArchiveItemIndex, PropID.PATH),
1181 
1182  }
1183 
1192  @Override
1193  public void setOperationResult(ExtractOperationResult result) throws SevenZipException {
1194 
1195  final SevenZipExtractor.UnpackedTree.UnpackedNode unpackedNode
1196  = archiveDetailsMap.get(inArchiveItemIndex).getUnpackedNode();
1197  final String localRelPath = archiveDetailsMap.get(
1198  inArchiveItemIndex).getLocalRelPath();
1199  if (isFolder) {
1200  unpackedNode.addDerivedInfo(0,
1201  !(Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.IS_FOLDER),
1203  localRelPath);
1204  return;
1205  } else {
1206  unpackedNode.setMimeType(unpackStream.getMIMEType());
1207  }
1208 
1209  final String localAbsPath = archiveDetailsMap.get(
1210  inArchiveItemIndex).getLocalAbsPath();
1211  if (result != ExtractOperationResult.OK) {
1212  if (archiveFile.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.UNALLOC)) {
1213  logger.log(Level.WARNING, "Extraction of : {0} encountered error {1} (file is unallocated and may be corrupt)", //NON-NLS
1214  new Object[]{localAbsPath, result});
1215  } else {
1216  logger.log(Level.WARNING, "Extraction of : {0} encountered error {1}", //NON-NLS
1217  new Object[]{localAbsPath, result});
1218  }
1219  unpackSuccessful = false;
1220  }
1221 
1222  //record derived data in unode, to be traversed later after unpacking the archive
1223  unpackedNode.addDerivedInfo(unpackStream.getSize(),
1224  !(Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.IS_FOLDER),
1226 
1227  try {
1228  unpackStream.close();
1229  } catch (IOException e) {
1230  logger.log(Level.WARNING, "Error closing unpack stream for file: {0}", localAbsPath); //NON-NLS
1231  }
1232  }
1233 
1234  @Override
1235  public void setTotal(long value) throws SevenZipException {
1236  //Not necessary for extract, left intenionally blank
1237  }
1238 
1239  @Override
1240  public void setCompleted(long value) throws SevenZipException {
1241  //Not necessary for extract, left intenionally blank
1242  }
1243 
1251  @Override
1252  public String cryptoGetTextPassword() throws SevenZipException {
1253  return password;
1254  }
1255 
1256  public boolean wasSuccessful() {
1257  return unpackSuccessful;
1258  }
1259  }
1260 
1268  private class UnpackedTree {
1269 
1270  final UnpackedNode rootNode;
1271  private int nodesProcessed = 0;
1272 
1273  // It is significantly faster to add the DerivedFiles to the case on a transaction,
1274  // but we don't want to hold the transaction (and case write lock) for the entire
1275  // stage. Instead, we use the same transaction for MAX_TRANSACTION_SIZE database operations
1276  // and then commit that transaction and start a new one, giving at least a short window
1277  // for other processes.
1278  private CaseDbTransaction currentTransaction = null;
1279  private long transactionCounter = 0;
1280  private final static long MAX_TRANSACTION_SIZE = 1000;
1281 
1288  UnpackedTree(String localPathRoot, AbstractFile archiveFile) {
1289  this.rootNode = new UnpackedNode();
1290  this.rootNode.setFile(archiveFile);
1291  this.rootNode.setFileName(archiveFile.getName());
1292  this.rootNode.setLocalRelPath(localPathRoot);
1293  }
1294 
1304  UnpackedNode addNode(String filePath, byte[] filePathBytes) {
1305  String[] toks = filePath.split("[\\/\\\\]");
1306  List<String> tokens = new ArrayList<>();
1307  for (int i = 0; i < toks.length; ++i) {
1308  if (!toks[i].isEmpty()) {
1309  tokens.add(toks[i]);
1310  }
1311  }
1312 
1313  List<byte[]> byteTokens;
1314  if (filePathBytes == null) {
1315  return addNode(rootNode, tokens, null);
1316  } else {
1317  byteTokens = new ArrayList<>(tokens.size());
1318  int last = 0;
1319  for (int i = 0; i < filePathBytes.length; i++) {
1320  if (filePathBytes[i] == '/') {
1321  int len = i - last;
1322  if (len > 0) {
1323  byte[] arr = new byte[len];
1324  System.arraycopy(filePathBytes, last, arr, 0, len);
1325  byteTokens.add(arr);
1326  }
1327  last = i + 1;
1328  }
1329  }
1330  int len = filePathBytes.length - last;
1331  if (len > 0) {
1332  byte[] arr = new byte[len];
1333  System.arraycopy(filePathBytes, last, arr, 0, len);
1334  byteTokens.add(arr);
1335  }
1336 
1337  if (tokens.size() != byteTokens.size()) {
1338  String rootFileInfo = "(unknown)";
1339  if (rootNode.getFile() != null) {
1340  rootFileInfo = rootNode.getFile().getParentPath() + rootNode.getFile().getName()
1341  + "(ID: " + rootNode.getFile().getId() + ")";
1342  }
1343  logger.log(Level.WARNING, "Could not map path bytes to path string while extracting archive {0} (path string: \"{1}\", bytes: {2})",
1344  new Object[]{rootFileInfo, this.rootNode.getFile().getId(), filePath, bytesToString(filePathBytes)});
1345  return addNode(rootNode, tokens, null);
1346  }
1347  }
1348 
1349  return addNode(rootNode, tokens, byteTokens);
1350  }
1351 
1359  private String bytesToString(byte[] bytes) {
1360  StringBuilder result = new StringBuilder();
1361  for (byte b : bytes) {
1362  result.append(String.format("%02x", b));
1363  }
1364  return result.toString();
1365  }
1366 
1377  List<String> tokenPath, List<byte[]> tokenPathBytes) {
1378  // we found all of the tokens
1379  if (tokenPath.isEmpty()) {
1380  return parent;
1381  }
1382 
1383  // get the next name in the path and look it up
1384  String childName = tokenPath.remove(0);
1385  byte[] childNameBytes = null;
1386  if (tokenPathBytes != null) {
1387  childNameBytes = tokenPathBytes.remove(0);
1388  }
1389  UnpackedNode child = parent.getChild(childName);
1390  // create new node
1391  if (child == null) {
1392  child = new UnpackedNode(childName, parent);
1393  child.setFileNameBytes(childNameBytes);
1394  parent.addChild(child);
1395  }
1396 
1397  // go down one more level
1398  return addNode(child, tokenPath, tokenPathBytes);
1399  }
1400 
1407  List<AbstractFile> getRootFileObjects() {
1408  List<AbstractFile> ret = new ArrayList<>();
1409  rootNode.getChildren().forEach((child) -> {
1410  ret.add(child.getFile());
1411  });
1412  return ret;
1413  }
1414 
1421  List<AbstractFile> getAllFileObjects() {
1422  List<AbstractFile> ret = new ArrayList<>();
1423  rootNode.getChildren().forEach((child) -> {
1424  getAllFileObjectsRec(ret, child);
1425  });
1426  return ret;
1427  }
1428 
1429  private void getAllFileObjectsRec(List<AbstractFile> list, UnpackedNode parent) {
1430  list.add(parent.getFile());
1431  parent.getChildren().forEach((child) -> {
1432  getAllFileObjectsRec(list, child);
1433  });
1434  }
1435 
1440  void updateOrAddFileToCaseRec(HashMap<String, ZipFileStatusWrapper> statusMap, String archiveFilePath, Archive parentAr, AbstractFile archiveFile, ConcurrentHashMap<Long, Archive> depthMap) throws TskCoreException, NoCurrentCaseException {
1442  for (UnpackedNode child : rootNode.getChildren()) {
1443  updateOrAddFileToCaseRec(child, fileManager, statusMap, archiveFilePath, parentAr, archiveFile, depthMap);
1444  }
1445  }
1446 
1464  private void updateOrAddFileToCaseRec(UnpackedNode node, FileManager fileManager, HashMap<String, ZipFileStatusWrapper> statusMap, String archiveFilePath, Archive parentAr, AbstractFile archiveFile, ConcurrentHashMap<Long, Archive> depthMap) throws TskCoreException {
1465  DerivedFile df;
1466  progress.progress(String.format("%s: Adding/updating files in case database (%d of %d)", currentArchiveName, ++nodesProcessed, numItems));
1467  try {
1468  String nameInDatabase = getKeyFromUnpackedNode(node, archiveFilePath);
1469  ZipFileStatusWrapper existingFile = nameInDatabase == null ? null : statusMap.get(nameInDatabase);
1470  if (existingFile == null) {
1471  df = Case.getCurrentCaseThrows().getSleuthkitCase().addDerivedFile(node.getFileName(), node.getLocalRelPath(), node.getSize(),
1472  node.getCtime(), node.getCrtime(), node.getAtime(), node.getMtime(),
1473  node.isIsFile(), node.getParent().getFile(), "", MODULE_NAME,
1474  "", "", TskData.EncodingType.XOR1, getCurrentTransaction());
1475  statusMap.put(getKeyAbstractFile(df), new ZipFileStatusWrapper(df, ZipFileStatus.EXISTS));
1476  } else {
1477  String key = getKeyAbstractFile(existingFile.getFile());
1478  if (existingFile.getStatus() == ZipFileStatus.EXISTS && existingFile.getFile().getSize() < node.getSize()) {
1479  existingFile.setStatus(ZipFileStatus.UPDATE);
1480  statusMap.put(key, existingFile);
1481  }
1482  if (existingFile.getStatus() == ZipFileStatus.UPDATE) {
1483  //if the we are updating a file and its mime type was octet-stream we want to re-type it
1484  String mimeType = existingFile.getFile().getMIMEType().equalsIgnoreCase("application/octet-stream") ? null : existingFile.getFile().getMIMEType();
1485  df = Case.getCurrentCaseThrows().getSleuthkitCase().updateDerivedFile((DerivedFile) existingFile.getFile(), node.getLocalRelPath(), node.getSize(),
1486  node.getCtime(), node.getCrtime(), node.getAtime(), node.getMtime(),
1487  node.isIsFile(), mimeType, "", MODULE_NAME,
1488  "", "", TskData.EncodingType.XOR1, existingFile.getFile().getParent(), getCurrentTransaction());
1489  } else {
1490  //ALREADY CURRENT - SKIP
1491  statusMap.put(key, new ZipFileStatusWrapper(existingFile.getFile(), ZipFileStatus.SKIP));
1492  df = (DerivedFile) existingFile.getFile();
1493  }
1494  }
1495  node.setFile(df);
1496  } catch (TskCoreException | NoCurrentCaseException ex) {
1497  logger.log(Level.SEVERE, "Error adding a derived file to db:" + node.getFileName(), ex); //NON-NLS
1498  throw new TskCoreException(
1499  NbBundle.getMessage(SevenZipExtractor.class, "EmbeddedFileExtractorIngestModule.ArchiveExtractor.UnpackedTree.exception.msg",
1500  node.getFileName()), ex);
1501  }
1502 
1503  // Determine encoding of children
1504  if (node.getChildren().size() > 0) {
1505  String names = "";
1506  ArrayList<byte[]> byteDatas = new ArrayList<>();
1507  for (UnpackedNode child : node.getChildren()) {
1508  byte[] childBytes = child.getFileNameBytes();
1509  if (childBytes != null) {
1510  byteDatas.add(childBytes);
1511  }
1512  names += child.getFileName();
1513  }
1514  Charset detectedCharset = detectFilenamesCharset(byteDatas);
1515 
1516  // If a charset was detected, transcode filenames accordingly
1517  if (detectedCharset != null && detectedCharset.canEncode()) {
1518  for (UnpackedNode child : node.getChildren()) {
1519  byte[] childBytes = child.getFileNameBytes();
1520  if (childBytes != null) {
1521  String decodedName = new String(childBytes, detectedCharset);
1522  child.setFileName(decodedName);
1523  }
1524  }
1525  }
1526  }
1527 
1528  // Check for zip bombs
1529  if (isSevenZipExtractionSupported(node.getMimeType())) {
1530  Archive child = new Archive(parentAr.getDepth() + 1, parentAr.getRootArchiveId(), archiveFile);
1531  parentAr.addChild(child);
1532  depthMap.put(node.getFile().getId(), child);
1533  }
1534 
1535  //recurse adding the children if this file was incomplete the children presumably need to be added
1536  for (UnpackedNode child : node.getChildren()) {
1537  updateOrAddFileToCaseRec(child, fileManager, statusMap, getKeyFromUnpackedNode(node, archiveFilePath), parentAr, archiveFile, depthMap);
1538  }
1539  }
1540 
1551  private CaseDbTransaction getCurrentTransaction() throws TskCoreException {
1552 
1553  if (currentTransaction == null) {
1554  startTransaction();
1555  }
1556 
1557  if (transactionCounter > MAX_TRANSACTION_SIZE) {
1559  startTransaction();
1560  }
1561 
1562  transactionCounter++;
1563  return currentTransaction;
1564  }
1565 
1571  private void startTransaction() throws TskCoreException {
1572  try {
1573  currentTransaction = Case.getCurrentCaseThrows().getSleuthkitCase().beginTransaction();
1574  transactionCounter = 0;
1575  } catch (NoCurrentCaseException ex) {
1576  throw new TskCoreException("Case is closed");
1577  }
1578  }
1579 
1585  private void commitCurrentTransaction() throws TskCoreException {
1586  if (currentTransaction != null) {
1587  currentTransaction.commit();
1588  currentTransaction = null;
1589  }
1590  }
1591 
1596  if (currentTransaction != null) {
1597  try {
1598  currentTransaction.rollback();
1599  currentTransaction = null;
1600  } catch (TskCoreException ex) {
1601  // Ignored
1602  }
1603  }
1604  }
1605 
1609  private class UnpackedNode {
1610 
1611  private String fileName;
1612  private byte[] fileNameBytes;
1613  private AbstractFile file;
1614  private final List<UnpackedNode> children = new ArrayList<>();
1615  private String localRelPath = "";
1616  private long size;
1617  private long ctime, crtime, atime, mtime;
1618  private boolean isFile;
1619  private String mimeType = "";
1621 
1622  //root constructor
1623  UnpackedNode() {
1624  }
1625 
1626  //child node constructor
1627  UnpackedNode(String fileName, UnpackedNode parent) {
1628  this.fileName = fileName;
1629  this.parent = parent;
1630  this.localRelPath = parent.getLocalRelPath() + File.separator + fileName;
1631  }
1632 
1633  long getCtime() {
1634  return ctime;
1635  }
1636 
1637  long getCrtime() {
1638  return crtime;
1639  }
1640 
1641  long getAtime() {
1642  return atime;
1643  }
1644 
1645  long getMtime() {
1646  return mtime;
1647  }
1648 
1649  void setFileName(String fileName) {
1650  this.fileName = fileName;
1651  }
1652 
1658  void addChild(UnpackedNode child) {
1659  children.add(child);
1660  }
1661 
1668  List<UnpackedNode> getChildren() {
1669  return children;
1670  }
1671 
1677  UnpackedNode getParent() {
1678  return parent;
1679  }
1680 
1681  void addDerivedInfo(long size,
1682  boolean isFile,
1683  long ctime, long crtime, long atime, long mtime, String relLocalPath) {
1684  this.size = size;
1685  this.isFile = isFile;
1686  this.ctime = ctime;
1687  this.crtime = crtime;
1688  this.atime = atime;
1689  this.mtime = mtime;
1690  this.localRelPath = relLocalPath;
1691  }
1692 
1693  void setFile(AbstractFile file) {
1694  this.file = file;
1695  }
1696 
1697  void setMimeType(String mimeType) {
1698  this.mimeType = mimeType;
1699  }
1700 
1701  String getMimeType() {
1702  return mimeType;
1703  }
1704 
1712  UnpackedNode getChild(String childFileName) {
1713  UnpackedNode ret = null;
1714  for (UnpackedNode child : children) {
1715  if (child.getFileName().equals(childFileName)) {
1716  ret = child;
1717  break;
1718  }
1719  }
1720  return ret;
1721  }
1722 
1723  String getFileName() {
1724  return fileName;
1725  }
1726 
1727  AbstractFile getFile() {
1728  return file;
1729  }
1730 
1731  String getLocalRelPath() {
1732  return localRelPath;
1733  }
1734 
1741  void setLocalRelPath(String localRelativePath) {
1742  localRelPath = localRelativePath;
1743  }
1744 
1745  long getSize() {
1746  return size;
1747  }
1748 
1749  boolean isIsFile() {
1750  return isFile;
1751  }
1752 
1753  void setFileNameBytes(byte[] fileNameBytes) {
1754  if (fileNameBytes != null) {
1755  this.fileNameBytes = Arrays.copyOf(fileNameBytes, fileNameBytes.length);
1756  }
1757  }
1758 
1759  byte[] getFileNameBytes() {
1760  if (fileNameBytes == null) {
1761  return null;
1762  }
1763  return Arrays.copyOf(fileNameBytes, fileNameBytes.length);
1764  }
1765  }
1766  }
1767 
1772  static class Archive {
1773 
1774  //depth will be 0 for the root archive unpack was called on, and increase as unpack recurses down through archives contained within
1775  private final int depth;
1776  private final List<Archive> children;
1777  private final long rootArchiveId;
1778  private boolean flaggedAsZipBomb = false;
1779  private final AbstractFile archiveFile;
1780 
1793  Archive(int depth, long rootArchiveId, AbstractFile archiveFile) {
1794  this.children = new ArrayList<>();
1795  this.depth = depth;
1796  this.rootArchiveId = rootArchiveId;
1797  this.archiveFile = archiveFile;
1798  }
1799 
1806  void addChild(Archive child) {
1807  children.add(child);
1808  }
1809 
1814  synchronized void flagAsZipBomb() {
1815  flaggedAsZipBomb = true;
1816  }
1817 
1823  synchronized boolean isFlaggedAsZipBomb() {
1824  return flaggedAsZipBomb;
1825  }
1826 
1832  AbstractFile getArchiveFile() {
1833  return archiveFile;
1834  }
1835 
1841  long getRootArchiveId() {
1842  return rootArchiveId;
1843  }
1844 
1850  long getObjectId() {
1851  return archiveFile.getId();
1852  }
1853 
1861  int getDepth() {
1862  return depth;
1863  }
1864  }
1865 
1870  private final class ZipFileStatusWrapper {
1871 
1872  private final AbstractFile abstractFile;
1874 
1882  private ZipFileStatusWrapper(AbstractFile file, ZipFileStatus status) {
1883  abstractFile = file;
1884  zipStatus = status;
1885  }
1886 
1892  private AbstractFile getFile() {
1893  return abstractFile;
1894  }
1895 
1902  return zipStatus;
1903  }
1904 
1910  private void setStatus(ZipFileStatus status) {
1911  zipStatus = status;
1912  }
1913 
1914  }
1915 
1920  private enum ZipFileStatus {
1921  UPDATE, //Should be updated //NON-NLS
1922  SKIP, //File is current can be skipped //NON-NLS
1923  EXISTS //File exists but it is unknown if it is current //NON-NLS
1924  }
1925 }
UnpackedNode addNode(UnpackedNode parent, List< String > tokenPath, List< byte[]> tokenPathBytes)
void updateOrAddFileToCaseRec(UnpackedNode node, FileManager fileManager, HashMap< String, ZipFileStatusWrapper > statusMap, String archiveFilePath, Archive parentAr, AbstractFile archiveFile, ConcurrentHashMap< Long, Archive > depthMap)

Copyright © 2012-2022 Basis Technology. Generated on: Tue Jun 27 2023
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.