19 package org.sleuthkit.autopsy.keywordsearch;
21 import org.apache.commons.lang3.math.NumberUtils;
22 import org.apache.solr.common.SolrInputDocument;
23 import org.openide.util.NbBundle;
27 import java.util.ArrayList;
28 import java.util.List;
30 import java.util.Optional;
35 class LanguageSpecificContentIndexingHelper {
37 private final LanguageDetector languageDetector =
new LanguageDetector();
39 Optional<Language> detectLanguageIfNeeded(String text)
throws NoOpenCoreException {
40 double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
41 if (2.2 <= indexSchemaVersion) {
42 return languageDetector.detect(text);
44 return Optional.empty();
48 void updateLanguageSpecificFields(Map<String, Object> fields, Chunker.Chunk chunk, Language language) {
49 List<String> values =
new ArrayList<>();
50 values.add(chunk.toString());
51 if (fields.containsKey(Server.Schema.FILE_NAME.toString())) {
52 values.add(Chunker.sanitize(fields.get(Server.Schema.FILE_NAME.toString()).toString()).toString());
56 fields.put(Server.Schema.CONTENT_JA.toString(), values);
57 fields.put(Server.Schema.LANGUAGE.toString(), Chunker.sanitize(language.getValue()).toString());
60 void indexMiniChunk(Chunker.Chunk chunk, String sourceName, Map<String, Object> fields, String baseChunkID, Language language)
61 throws Ingester.IngesterException {
63 SolrInputDocument updateDoc =
new SolrInputDocument();
64 for (String key : fields.keySet()) {
65 if (fields.get(key).getClass() == String.class) {
66 updateDoc.addField(key, Chunker.sanitize((String)fields.get(key)).toString());
68 updateDoc.addField(key, fields.get(key));
73 updateDoc.setField(Server.Schema.ID.toString(), Chunker.sanitize(MiniChunkHelper.getChunkIdString(baseChunkID)).toString());
76 updateDoc.addField(Server.Schema.CONTENT_JA.toString(), Chunker.sanitize(chunk.toString().substring(chunk.getBaseChunkLength())).toString());
77 updateDoc.addField(Server.Schema.LANGUAGE.toString(), Chunker.sanitize(language.getValue()).toString());
79 TimingMetric metric = HealthMonitor.getTimingMetric(
"Solr: Index chunk");
81 KeywordSearch.getServer().addDocument(updateDoc);
82 HealthMonitor.submitTimingMetric(metric);
84 }
catch (KeywordSearchModuleException | NoOpenCoreException ex) {
85 throw new Ingester.IngesterException(
86 NbBundle.getMessage(Ingester.class,
"Ingester.ingest.exception.err.msg", sourceName), ex);