19 package org.sleuthkit.autopsy.keywordsearch;
 
   21 import java.io.ByteArrayInputStream;
 
   22 import java.io.IOException;
 
   23 import java.io.InputStream;
 
   24 import java.io.Reader;
 
   25 import java.io.UnsupportedEncodingException;
 
   26 import java.util.HashMap;
 
   28 import java.util.Map.Entry;
 
   29 import java.util.concurrent.ExecutorService;
 
   30 import java.util.concurrent.Executors;
 
   31 import java.util.concurrent.Future;
 
   32 import java.util.concurrent.TimeUnit;
 
   33 import java.util.concurrent.TimeoutException;
 
   34 import java.util.logging.Level;
 
   35 import org.apache.solr.client.solrj.SolrRequest.METHOD;
 
   36 import org.apache.solr.client.solrj.SolrServerException;
 
   37 import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
 
   38 import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
 
   39 import org.apache.solr.common.SolrException;
 
   40 import org.apache.solr.common.SolrException.ErrorCode;
 
   41 import org.apache.solr.common.util.ContentStream;
 
   42 import org.apache.solr.common.SolrInputDocument;
 
   43 import org.openide.util.Exceptions;
 
   44 import org.openide.util.NbBundle;
 
   67     private static final Logger logger = Logger.getLogger(Ingester.class.getName());
 
   68     private volatile boolean uncommitedIngests = 
false;
 
   69     private final ExecutorService upRequestExecutor = Executors.newSingleThreadExecutor();
 
   70     private final Server solrServer = KeywordSearch.getServer();
 
   71     private final GetContentFieldsV getContentFieldsV = 
new GetContentFieldsV();
 
   72     private static Ingester instance;
 
   76     private static final int MAX_DOC_CHUNK_SIZE = 1024*1024;
 
   77     private static final String docContentEncoding = 
"UTF-8"; 
 
   83     public static synchronized Ingester getDefault() {
 
   84         if (instance == null) {
 
   85             instance = 
new Ingester();
 
   91     @SuppressWarnings(
"FinalizeDeclaration")
 
   92     protected 
void finalize() throws Throwable {
 
   96         if (uncommitedIngests) {
 
   97             logger.warning(
"Ingester was used to add files that it never committed."); 
 
  109     void ingest(AbstractFileStringContentStream afscs) 
throws IngesterException {
 
  110         Map<String, String> params = getContentFields(afscs.getSourceContent());
 
  111         ingest(afscs, params, afscs.getSourceContent().getSize());
 
  125     void ingest(TextExtractor fe) 
throws IngesterException {
 
  126         Map<String, String> params = getContentFields(fe.getSourceFile());
 
  128         params.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(fe.getNumChunks()));
 
  130         ingest(
new NullContentStream(fe.getSourceFile()), params, 0);
 
  144     void ingest(AbstractFileChunk fec, ByteContentStream bcs, 
int size) 
throws IngesterException {
 
  145         AbstractContent sourceContent = bcs.getSourceContent();
 
  146         Map<String, String> params = getContentFields(sourceContent);
 
  149         params.put(Server.Schema.ID.toString(),
 
  150                 Server.getChunkIdString(sourceContent.getId(), fec.getChunkId()));
 
  152         ingest(bcs, params, size);
 
  167     void ingest(AbstractFile file, 
boolean ingestContent) 
throws IngesterException {
 
  168         if (ingestContent == 
false || file.isDir()) {
 
  169             ingest(
new NullContentStream(file), getContentFields(file), 0);
 
  171             ingest(
new FscContentStream(file), getContentFields(file), file.getSize());
 
  181     private Map<String, String> getContentFields(AbstractContent fsc) {
 
  182         return fsc.accept(getContentFieldsV);
 
  192             return new HashMap<String, String>();
 
  239             Map<String, String> params = 
new HashMap<String, String>();
 
  241             long dataSourceId = -1;
 
  244                 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(dataSourceId));
 
  246                 logger.log(Level.SEVERE, 
"Could not get data source id to properly index the file " + af.
getId()); 
 
  247                 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
 
  271      void ingest(ContentStream cs, Map<String, String> fields, 
final long size) 
throws IngesterException {
 
  273         if (fields.get(
Server.
Schema.IMAGE_ID.toString()) == null) {
 
  275             String msg = NbBundle.getMessage(this.getClass(),
 
  276                                              "Ingester.ingest.exception.unknownImgId.msg", cs.getName());
 
  277             logger.log(Level.SEVERE, msg);
 
  278             throw new IngesterException(msg);
 
  281         final byte[] docChunkContentBuf = 
new byte[MAX_DOC_CHUNK_SIZE];
 
  282         SolrInputDocument updateDoc = 
new SolrInputDocument();
 
  284         for (String key : fields.keySet()) {
 
  285             updateDoc.addField(key, fields.get(key));
 
  293             InputStream is = null;
 
  297                 read = is.read(docChunkContentBuf);
 
  298             } 
catch (IOException ex) {
 
  299                 throw new IngesterException(
 
  300                         NbBundle.getMessage(
this.getClass(), 
"Ingester.ingest.exception.cantReadStream.msg",
 
  305                 } 
catch (IOException ex) {
 
  306                     logger.log(Level.WARNING, 
"Could not close input stream after reading content, " + cs.getName(), ex); 
 
  313                     s = 
new String(docChunkContentBuf, 0, read, docContentEncoding);
 
  314                 } 
catch (UnsupportedEncodingException ex) {
 
  315                     Exceptions.printStackTrace(ex);
 
  317                 updateDoc.addField(Server.Schema.CONTENT.toString(), s);
 
  319                 updateDoc.addField(Server.Schema.CONTENT.toString(), 
"");
 
  324             updateDoc.addField(Server.Schema.CONTENT.toString(), 
"");
 
  330             solrServer.addDocument(updateDoc);
 
  331             uncommitedIngests = 
true;
 
  332         } 
catch (KeywordSearchModuleException ex) {
 
  333             throw new IngesterException(
 
  334                     NbBundle.getMessage(
this.getClass(), 
"Ingester.ingest.exception.err.msg", cs.getName()), ex);
 
  352     private void ingestExtract(ContentStream cs, Map<String, String> fields, 
final long size) 
throws IngesterException {
 
  353         final ContentStreamUpdateRequest up = 
new ContentStreamUpdateRequest(
"/update/extract"); 
 
  354         up.addContentStream(cs);
 
  355         setFields(up, fields);
 
  356         up.setAction(AbstractUpdateRequest.ACTION.COMMIT, 
true, 
true);
 
  358         final String contentType = cs.getContentType();
 
  359         if (contentType != null && !contentType.trim().equals(
"")) {
 
  360             up.setParam(
"stream.contentType", contentType); 
 
  364         up.setParam(
"commit", 
"false"); 
 
  366         final Future<?> f = upRequestExecutor.submit(
new UpRequestTask(up));
 
  369             f.get(getTimeout(size), TimeUnit.SECONDS);
 
  370         } 
catch (TimeoutException te) {
 
  371             logger.log(Level.WARNING, 
"Solr timeout encountered, trying to restart Solr"); 
 
  374             throw new IngesterException(
 
  375                     NbBundle.getMessage(
this.getClass(), 
"Ingester.ingestExtract.exception.solrTimeout.msg",
 
  376                                         fields.get(
"id"), fields.get(
"file_name"))); 
 
  377         } 
catch (Exception e) {
 
  378             throw new IngesterException(
 
  379                     NbBundle.getMessage(
this.getClass(), 
"Ingester.ingestExtract.exception.probPostToSolr.msg",
 
  380                                         fields.get(
"id"), fields.get(
"file_name")), e); 
 
  382         uncommitedIngests = 
true;
 
  388     private void hardSolrRestart() {
 
  390             solrServer.closeCore();
 
  391         } 
catch (KeywordSearchModuleException ex) {
 
  392             logger.log(Level.WARNING, 
"Cannot close core", ex); 
 
  399         } 
catch (KeywordSearchModuleException ex) {
 
  400             logger.log(Level.WARNING, 
"Cannot start", ex); 
 
  401         } 
catch (SolrServerNoPortException ex) {
 
  402             logger.log(Level.WARNING, 
"Cannot start server with this port", ex); 
 
  406             solrServer.openCore();
 
  407         } 
catch (KeywordSearchModuleException ex) {
 
  408             logger.log(Level.WARNING, 
"Cannot open core", ex); 
 
  418     static int getTimeout(
long size) {
 
  419         if (size < 1024 * 1024L) 
 
  422         } 
else if (size < 10 * 1024 * 1024L) 
 
  425         } 
else if (size < 100 * 1024 * 1024L) 
 
  436         ContentStreamUpdateRequest up;
 
  445                 up.setMethod(METHOD.POST);
 
  446                 solrServer.request(up);
 
  448                 throw new RuntimeException(
 
  449                         NbBundle.getMessage(
this.getClass(), 
"Ingester.UpReqestTask.run.exception.sorlNotAvail.msg"), ex);
 
  450             } 
catch (IllegalStateException ex) {
 
  452                 throw new RuntimeException(
 
  453                         NbBundle.getMessage(
this.getClass(), 
"Ingester.UpRequestTask.run.exception.probReadFile.msg"), ex);
 
  454             } 
catch (SolrServerException ex) {
 
  457                 throw new RuntimeException(
 
  458                         NbBundle.getMessage(
this.getClass(), 
"Ingester.UpRequestTask.run.exception.solrProb.msg"), ex);
 
  459             } 
catch (SolrException ex) {
 
  461                 ErrorCode ec = ErrorCode.getErrorCode(ex.code());
 
  465                 if (ec.equals(ErrorCode.SERVER_ERROR)) {
 
  466                     throw new RuntimeException(NbBundle.getMessage(
this.getClass(),
 
  467                                                                    "Ingester.UpRequestTask.run.exception.probPostToSolr.msg",
 
  486             uncommitedIngests = 
false;
 
  487         } 
catch (NoOpenCoreException ex) {
 
  488             logger.log(Level.WARNING, 
"Error commiting index", ex); 
 
  489         } 
catch (SolrServerException ex) {
 
  490             logger.log(Level.WARNING, 
"Error commiting index", ex); 
 
  500     private static void setFields(ContentStreamUpdateRequest up, Map<String, String> fields) {
 
  501         for (Entry<String, String> field : fields.entrySet()) {
 
  502             up.setParam(
"literal." + field.getKey(), field.getValue()); 
 
  524             return NbBundle.getMessage(this.getClass(), 
"Ingester.FscContentStream.getSrcInfo", f.
getId());
 
  544             throw new UnsupportedOperationException(
 
  545                     NbBundle.getMessage(
this.getClass(), 
"Ingester.FscContentStream.getReader"));
 
  557             this.aContent = aContent;
 
  567             return NbBundle.getMessage(this.getClass(), 
"Ingester.NullContentStream.getSrcInfo.text", aContent.
getId());
 
  582             return new ByteArrayInputStream(
new byte[0]);
 
  587             throw new UnsupportedOperationException(
 
  588                     NbBundle.getMessage(
this.getClass(), 
"Ingester.NullContentStream.getReader"));
 
  596     static class IngesterException 
extends Exception {
 
  598         IngesterException(String message, Throwable ex) {
 
  602         IngesterException(String message) {
 
Map< String, String > visit(Directory d)
 
Map< String, String > defaultVisit(Content cntnt)
 
Map< String, String > visit(DerivedFile df)
 
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
 
Map< String, String > visit(File f)
 
Map< String, String > getCommonFields(AbstractFile af)
 
Map< String, String > getCommonFileContentFields(Map< String, String > params, AbstractFile file)
 
Map< String, String > visit(LocalFile lf)
 
Map< String, String > visit(LayoutFile lf)