Autopsy  4.14.0
Graphical digital forensics platform for The Sleuth Kit and other tools.
LanguageSpecificContentIndexingHelper.java
Go to the documentation of this file.
1 /*
2  * Autopsy Forensic Browser
3  *
4  * Copyright 2011-2019 Basis Technology Corp.
5  * Contact: carrier <at> sleuthkit <dot> org
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 package org.sleuthkit.autopsy.keywordsearch;
20 
21 import org.apache.commons.lang3.math.NumberUtils;
22 import org.apache.solr.common.SolrInputDocument;
23 import org.openide.util.NbBundle;
26 
27 import java.util.ArrayList;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.Optional;
31 
35 class LanguageSpecificContentIndexingHelper {
36 
37  private final LanguageDetector languageDetector = new LanguageDetector();
38 
39  Optional<Language> detectLanguageIfNeeded(Chunker.Chunk chunk) throws NoOpenCoreException {
40  double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
41  if (2.2 <= indexSchemaVersion) {
42  return languageDetector.detect(chunk.toString());
43  } else {
44  return Optional.empty();
45  }
46  }
47 
48  void updateLanguageSpecificFields(Map<String, Object> fields, Chunker.Chunk chunk, Language language) {
49  List<String> values = new ArrayList<>();
50  values.add(chunk.toString());
51  if (fields.containsKey(Server.Schema.FILE_NAME.toString())) {
52  values.add(fields.get(Server.Schema.FILE_NAME.toString()).toString());
53  }
54 
55  // index the chunk to a language specific field
56  fields.put(Server.Schema.CONTENT_JA.toString(), values);
57  fields.put(Server.Schema.LANGUAGE.toString(), language.getValue());
58  }
59 
60  void indexMiniChunk(Chunker.Chunk chunk, String sourceName, Map<String, Object> fields, String baseChunkID, Language language)
61  throws Ingester.IngesterException {
62  //Make a SolrInputDocument out of the field map
63  SolrInputDocument updateDoc = new SolrInputDocument();
64  for (String key : fields.keySet()) {
65  updateDoc.addField(key, fields.get(key));
66  }
67 
68  try {
69  updateDoc.setField(Server.Schema.ID.toString(), MiniChunkHelper.getChunkIdString(baseChunkID));
70 
71  // index the chunk to a language specific field
72  updateDoc.addField(Server.Schema.CONTENT_JA.toString(), chunk.toString().substring(chunk.getBaseChunkLength()));
73  updateDoc.addField(Server.Schema.LANGUAGE.toString(), language.getValue());
74 
75  TimingMetric metric = HealthMonitor.getTimingMetric("Solr: Index chunk");
76 
77  KeywordSearch.getServer().addDocument(updateDoc);
78  HealthMonitor.submitTimingMetric(metric);
79 
80  } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
81  throw new Ingester.IngesterException(
82  NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
83  }
84  }
85 }

Copyright © 2012-2020 Basis Technology. Generated on: Wed Apr 8 2020
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.