19 package org.sleuthkit.autopsy.keywordsearch;
21 import java.io.ByteArrayInputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.Reader;
25 import java.io.UnsupportedEncodingException;
26 import java.util.HashMap;
28 import java.util.logging.Level;
29 import org.apache.solr.client.solrj.SolrServerException;
30 import org.apache.solr.common.util.ContentStream;
31 import org.apache.solr.common.SolrInputDocument;
32 import org.openide.util.NbBundle;
53 private static final Logger logger = Logger.getLogger(Ingester.class.getName());
54 private volatile boolean uncommitedIngests =
false;
55 private final Server solrServer = KeywordSearch.getServer();
56 private final GetContentFieldsV getContentFieldsV =
new GetContentFieldsV();
57 private static Ingester instance;
61 private static final int MAX_DOC_CHUNK_SIZE = 1024 * 1024;
62 private static final String ENCODING =
"UTF-8";
67 public static synchronized Ingester getDefault() {
68 if (instance == null) {
69 instance =
new Ingester();
75 @SuppressWarnings(
"FinalizeDeclaration")
76 protected
void finalize() throws Throwable {
80 if (uncommitedIngests) {
81 logger.warning(
"Ingester was used to add files that it never committed.");
94 void ingest(AbstractFileStringContentStream afscs)
throws IngesterException {
95 Map<String, String> params = getContentFields(afscs.getSourceContent());
96 ingest(afscs, params, afscs.getSourceContent().getSize());
111 void ingest(TextExtractor fe)
throws IngesterException {
112 Map<String, String> params = getContentFields(fe.getSourceFile());
114 params.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(fe.getNumChunks()));
116 ingest(
new NullContentStream(fe.getSourceFile()), params, 0);
131 void ingest(AbstractFileChunk fec, ByteContentStream bcs,
int size)
throws IngesterException {
132 AbstractContent sourceContent = bcs.getSourceContent();
133 Map<String, String> params = getContentFields(sourceContent);
136 params.put(Server.Schema.ID.toString(),
137 Server.getChunkIdString(sourceContent.getId(), fec.getChunkNumber()));
139 ingest(bcs, params, size);
155 void ingest(AbstractFile file,
boolean ingestContent)
throws IngesterException {
156 if (ingestContent ==
false || file.isDir()) {
157 ingest(
new NullContentStream(file), getContentFields(file), 0);
159 ingest(
new FscContentStream(file), getContentFields(file), file.getSize());
170 private Map<String, String> getContentFields(AbstractContent fsc) {
171 return fsc.accept(getContentFieldsV);
181 return new HashMap<>();
185 public Map<String, String>
visit(File f) {
192 public Map<String, String>
visit(DerivedFile df) {
199 public Map<String, String>
visit(Directory d) {
206 public Map<String, String>
visit(LayoutFile lf) {
212 public Map<String, String>
visit(LocalFile lf) {
227 Map<String, String> params =
new HashMap<>();
228 params.put(
Server.
Schema.ID.toString(), Long.toString(af.getId()));
230 long dataSourceId = af.getDataSource().getId();
231 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(dataSourceId));
232 }
catch (TskCoreException ex) {
233 logger.log(Level.SEVERE,
"Could not get data source id to properly index the file {0}", af.getId());
234 params.put(
Server.
Schema.IMAGE_ID.toString(), Long.toString(-1));
237 params.put(
Server.
Schema.FILE_NAME.toString(), af.getName());
258 void ingest(ContentStream cs, Map<String, String> fields,
final long size)
throws IngesterException {
260 if (fields.get(
Server.
Schema.IMAGE_ID.toString()) == null) {
262 String msg = NbBundle.getMessage(this.getClass(),
263 "Ingester.ingest.exception.unknownImgId.msg", cs.getName());
264 logger.log(Level.SEVERE, msg);
265 throw new IngesterException(msg);
268 final byte[] docChunkContentBuf =
new byte[MAX_DOC_CHUNK_SIZE];
269 SolrInputDocument updateDoc =
new SolrInputDocument();
271 for (String key : fields.keySet()) {
272 updateDoc.addField(key, fields.get(key));
279 InputStream is = null;
283 read = is.read(docChunkContentBuf);
284 }
catch (IOException ex) {
285 throw new IngesterException(
286 NbBundle.getMessage(
this.getClass(),
"Ingester.ingest.exception.cantReadStream.msg",
292 }
catch (IOException ex) {
293 logger.log(Level.WARNING,
"Could not close input stream after reading content, " + cs.getName(), ex);
301 s =
new String(docChunkContentBuf, 0, read, ENCODING);
304 for (
int i = 0; i < s.length(); i++) {
305 if (!TextUtil.isValidSolrUTF8(s.charAt(i))) {
308 chars = s.toCharArray();
315 s =
new String(chars);
317 }
catch (UnsupportedEncodingException ex) {
318 logger.log(Level.SEVERE,
"Unsupported encoding", ex);
320 updateDoc.addField(Server.Schema.CONTENT.toString(), s);
322 updateDoc.addField(Server.Schema.CONTENT.toString(),
"");
326 updateDoc.addField(Server.Schema.CONTENT.toString(),
"");
331 solrServer.addDocument(updateDoc);
332 uncommitedIngests =
true;
333 }
catch (KeywordSearchModuleException ex) {
334 throw new IngesterException(
335 NbBundle.getMessage(
this.getClass(),
"Ingester.ingest.exception.err.msg", cs.getName()), ex);
347 static int getTimeout(
long size) {
348 if (size < 1024 * 1024L)
351 }
else if (size < 10 * 1024 * 1024L)
354 }
else if (size < 100 * 1024 * 1024L)
370 uncommitedIngests =
false;
371 }
catch (NoOpenCoreException | SolrServerException ex) {
372 logger.log(Level.WARNING,
"Error commiting index", ex);
381 private AbstractFile
f;
394 return NbBundle.getMessage(this.getClass(),
"Ingester.FscContentStream.getSrcInfo", f.getId());
409 return new ReadContentInputStream(f);
414 throw new UnsupportedOperationException(
415 NbBundle.getMessage(
this.getClass(),
"Ingester.FscContentStream.getReader"));
424 AbstractContent aContent;
427 this.aContent = aContent;
432 return aContent.getName();
437 return NbBundle.getMessage(this.getClass(),
"Ingester.NullContentStream.getSrcInfo.text", aContent.getId());
452 return new ByteArrayInputStream(
new byte[0]);
457 throw new UnsupportedOperationException(
458 NbBundle.getMessage(
this.getClass(),
"Ingester.NullContentStream.getReader"));
466 static class IngesterException
extends Exception {
468 private static final long serialVersionUID = 1L;
470 IngesterException(String message, Throwable ex) {
474 IngesterException(String message) {
Map< String, String > visit(Directory d)
Map< String, String > defaultVisit(Content cntnt)
Map< String, String > visit(DerivedFile df)
static String getStringTimeISO8601(long epochSeconds, TimeZone tzone)
Map< String, String > visit(File f)
Map< String, String > getCommonFields(AbstractFile af)
Map< String, String > getCommonFileContentFields(Map< String, String > params, AbstractFile file)
Map< String, String > visit(LocalFile lf)
Map< String, String > visit(LayoutFile lf)