api-docs/4.19.3/_language_specific_content_indexing_helper_8java_source.html

 /*

  * Autopsy Forensic Browser

  *

  * Copyright 2011-2021 Basis Technology Corp.

  * Contact: carrier <at> sleuthkit <dot> org

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */

 package org.sleuthkit.autopsy.keywordsearch;


 import org.apache.commons.lang3.math.NumberUtils;

 import org.apache.solr.common.SolrInputDocument;

 import org.openide.util.NbBundle;

 import org.sleuthkit.autopsy.healthmonitor.HealthMonitor;

 import org.sleuthkit.autopsy.healthmonitor.TimingMetric;


 import java.util.ArrayList;

 import java.util.List;

 import java.util.Map;

 import java.util.Optional;


 class LanguageSpecificContentIndexingHelper {


     private final LanguageDetector languageDetector = new LanguageDetector();


     Optional<Language> detectLanguageIfNeeded(String text) throws NoOpenCoreException {

         double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());

         if (2.2 <= indexSchemaVersion) {

             return languageDetector.detect(text);

         } else {

             return Optional.empty();

         }

     }


     void updateLanguageSpecificFields(Map<String, Object> fields, Chunker.Chunk chunk, Language language) {

         List<String> values = new ArrayList<>();

         values.add(chunk.toString());

         if (fields.containsKey(Server.Schema.FILE_NAME.toString())) {

             values.add(Chunker.sanitize(fields.get(Server.Schema.FILE_NAME.toString()).toString()).toString());

         }


         // index the chunk to a language specific field

         fields.put(Server.Schema.CONTENT_JA.toString(), values);

         fields.put(Server.Schema.LANGUAGE.toString(), Chunker.sanitize(language.getValue()).toString());

     }


     void indexMiniChunk(Chunker.Chunk chunk, String sourceName, Map<String, Object> fields, String baseChunkID, Language language)

         throws Ingester.IngesterException {

         //Make a SolrInputDocument out of the field map

         SolrInputDocument updateDoc = new SolrInputDocument();

         for (String key : fields.keySet()) {

             if (fields.get(key).getClass() == String.class) {

                 updateDoc.addField(key, Chunker.sanitize((String)fields.get(key)).toString());

             } else {

                 updateDoc.addField(key, fields.get(key));

             }

         }


         try {

             updateDoc.setField(Server.Schema.ID.toString(), Chunker.sanitize(MiniChunkHelper.getChunkIdString(baseChunkID)).toString());


             // index the chunk to a language specific field

             updateDoc.addField(Server.Schema.CONTENT_JA.toString(), Chunker.sanitize(chunk.toString().substring(chunk.getBaseChunkLength())).toString());

             updateDoc.addField(Server.Schema.LANGUAGE.toString(), Chunker.sanitize(language.getValue()).toString());


             TimingMetric metric = HealthMonitor.getTimingMetric("Solr: Index chunk");


             KeywordSearch.getServer().addDocument(updateDoc);

             HealthMonitor.submitTimingMetric(metric);


         } catch (KeywordSearchModuleException | NoOpenCoreException ex) {

             throw new Ingester.IngesterException(

                 NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);

         }

     }

 }

org.sleuthkit

org.sleuthkit.autopsy.healthmonitor
Definition: HealthMonitor.java:19

org

org.sleuthkit.autopsy.healthmonitor.TimingMetric
Definition: TimingMetric.java:24

org.sleuthkit.autopsy

org.sleuthkit.autopsy.healthmonitor.HealthMonitor
Definition: HealthMonitor.java:63