19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import java.io.ByteArrayInputStream;
 
   22 import java.io.IOException;
 
   23 import java.io.InputStream;
 
   24 import java.io.Reader;
 
   25 import java.io.UnsupportedEncodingException;
 
   26 import java.util.HashMap;
 
   28 import java.util.logging.Level;
 
   29 import org.apache.solr.client.solrj.SolrServerException;
 
   30 import org.apache.solr.common.util.ContentStream;
 
   31 import org.apache.solr.common.SolrInputDocument;
 
   32 import org.openide.util.NbBundle;
 
   53     private static final Logger logger = Logger.getLogger(Ingester.class.getName());
 
   54     private volatile boolean uncommitedIngests = 
false;
 
   55     private final Server solrServer = KeywordSearch.getServer();
 
   56     private final GetContentFieldsV getContentFieldsV = 
new GetContentFieldsV();
 
   57     private static Ingester instance;
 
   61     private static final int MAX_DOC_CHUNK_SIZE = 1024 * 1024;
 
   62     private static final String ENCODING = 
"UTF-8"; 
 
   67     public static synchronized Ingester getDefault() {
 
   68         if (instance == null) {
 
   69             instance = 
new Ingester();
 
   75     @SuppressWarnings(
"FinalizeDeclaration")
 
   76     protected 
void finalize() throws Throwable {
 
   80         if (uncommitedIngests) {
 
   81             logger.warning(
"Ingester was used to add files that it never committed."); 
 
   94     void ingest(AbstractFileStringContentStream afscs) 
throws IngesterException {
 
   95         Map<String, String> params = getContentFields(afscs.getSourceContent());
 
   96         ingest(afscs, params, afscs.getSourceContent().getSize());
 
  111     void ingest(TextExtractor fe) 
throws IngesterException {
 
  112         Map<String, String> params = getContentFields(fe.getSourceFile());
 
  114         params.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(fe.getNumChunks()));
 
  116         ingest(
new NullContentStream(fe.getSourceFile()), params, 0);
 
  131     void ingest(AbstractFileChunk fec, ByteContentStream bcs, 
int size) 
throws IngesterException {
 
  132         AbstractContent sourceContent = bcs.getSourceContent();
 
  133         Map<String, String> params = getContentFields(sourceContent);
 
  136         params.put(Server.Schema.ID.toString(),
 
  137                 Server.getChunkIdString(sourceContent.getId(), fec.getChunkNumber()));
 
  139         ingest(bcs, params, size);
 
  155     void ingest(AbstractFile file, 
boolean ingestContent) 
throws IngesterException {
 
  156         if (ingestContent == 
false || file.isDir()) {
 
  157             ingest(
new NullContentStream(file), getContentFields(file), 0);
 
  159             ingest(
new FscContentStream(file), getContentFields(file), file.getSize());
 
  170     private Map<String, String> getContentFields(AbstractContent fsc) {
 
  171         return fsc.accept(getContentFieldsV);
 
  181             return new HashMap<>();
 
  185         public Map<String, String> 
visit(File f) {
 
  192         public Map<String, String> 
visit(DerivedFile df) {
 
  199         public Map<String, String> 
visit(Directory d) {
 
  206         public Map<String, String> 
visit(LayoutFile lf) {
 
  212         public Map<String, String> 
visit(LocalFile lf) {
 
  227             Map<String, String> params = 
new HashMap<>();
 
  228             params.put(
Server.
Schema.ID.toString(), Long.toString(af.getId()));
 
  230                 long dataSourceId = af.getDataSource().getId();
 
  231                 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(dataSourceId));
 
  232             } 
catch (TskCoreException ex) {
 
  233                 logger.log(Level.SEVERE, 
"Could not get data source id to properly index the file {0}", af.getId()); 
 
  234                 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
 
  237             params.put(
Server.
Schema.FILE_NAME.toString(), af.getName());
 
  258     void ingest(ContentStream cs, Map<String, String> fields, 
final long size) 
throws IngesterException {
 
  260         if (fields.get(
Server.
Schema.IMAGE_ID.toString()) == null) {
 
  262             String msg = NbBundle.getMessage(this.getClass(),
 
  263                     "Ingester.ingest.exception.unknownImgId.msg", cs.getName());
 
  264             logger.log(Level.SEVERE, msg);
 
  265             throw new IngesterException(msg);
 
  268         final byte[] docChunkContentBuf = 
new byte[MAX_DOC_CHUNK_SIZE];
 
  269         SolrInputDocument updateDoc = 
new SolrInputDocument();
 
  271         for (String key : fields.keySet()) {
 
  272             updateDoc.addField(key, fields.get(key));
 
  279             InputStream is = null;
 
  283                 read = is.read(docChunkContentBuf);
 
  284             } 
catch (IOException ex) {
 
  285                 throw new IngesterException(
 
  286                         NbBundle.getMessage(
this.getClass(), 
"Ingester.ingest.exception.cantReadStream.msg",
 
  292                     } 
catch (IOException ex) {
 
  293                         logger.log(Level.WARNING, 
"Could not close input stream after reading content, " + cs.getName(), ex); 
 
  301                     s = 
new String(docChunkContentBuf, 0, read, ENCODING);
 
  304                     for (
int i = 0; i < s.length(); i++) {
 
  305                         if (!TextUtil.isValidSolrUTF8(s.charAt(i))) {
 
  308                                 chars = s.toCharArray();
 
  315                         s = 
new String(chars);
 
  317                 } 
catch (UnsupportedEncodingException ex) {
 
  318                     logger.log(Level.SEVERE, 
"Unsupported encoding", ex); 
 
  320                 updateDoc.addField(Server.Schema.CONTENT.toString(), s);
 
  322                 updateDoc.addField(Server.Schema.CONTENT.toString(), 
"");
 
  326             updateDoc.addField(Server.Schema.CONTENT.toString(), 
"");
 
  331             solrServer.addDocument(updateDoc);
 
  332             uncommitedIngests = 
true;
 
  333         } 
catch (KeywordSearchModuleException ex) {
 
  334             throw new IngesterException(
 
  335                     NbBundle.getMessage(
this.getClass(), 
"Ingester.ingest.exception.err.msg", cs.getName()), ex);
 
  347     static int getTimeout(
long size) {
 
  348         if (size < 1024 * 1024L) 
 
  351         } 
else if (size < 10 * 1024 * 1024L) 
 
  354         } 
else if (size < 100 * 1024 * 1024L) 
 
  370             uncommitedIngests = 
false;
 
  371         } 
catch (NoOpenCoreException | SolrServerException ex) {
 
  372             logger.log(Level.WARNING, 
"Error commiting index", ex); 
 
  381         private AbstractFile 
f;
 
  394             return NbBundle.getMessage(this.getClass(), 
"Ingester.FscContentStream.getSrcInfo", f.getId());
 
  409             return new ReadContentInputStream(f);
 
  414             throw new UnsupportedOperationException(
 
  415                     NbBundle.getMessage(
this.getClass(), 
"Ingester.FscContentStream.getReader"));
 
  424         AbstractContent aContent;
 
  427             this.aContent = aContent;
 
  432             return aContent.getName();
 
  437             return NbBundle.getMessage(this.getClass(), 
"Ingester.NullContentStream.getSrcInfo.text", aContent.getId());
 
  452             return new ByteArrayInputStream(
new byte[0]);
 
  457             throw new UnsupportedOperationException(
 
  458                     NbBundle.getMessage(
this.getClass(), 
"Ingester.NullContentStream.getReader"));
 
  466     static class IngesterException 
extends Exception {
 
  468         private static final long serialVersionUID = 1L;
 
  470         IngesterException(String message, Throwable ex) {
 
  474         IngesterException(String message) {
 
Map< String, String > visit(Directory d)
Map< String, String > defaultVisit(Content cntnt)
Map< String, String > visit(DerivedFile df)
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > visit(File f)
Map< String, String > getCommonFields(AbstractFile af)
Map< String, String > getCommonFileContentFields(Map< String, String > params, AbstractFile file)
Map< String, String > visit(LocalFile lf)
Map< String, String > visit(LayoutFile lf)