/*
 * Decompiled with CFR 0.152.
 */
package opennlp.uima.namefind;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import opennlp.maxent.GIS;
import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.namefind.NameSampleDataStream;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.ObjectStreamUtils;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.model.BaseModel;
import opennlp.uima.util.CasConsumerUtil;
import opennlp.uima.util.ContainingConstraint;
import opennlp.uima.util.OpennlpUtil;
import opennlp.uima.util.SampleTraceStream;
import opennlp.uima.util.UimaUtil;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.FSMatchConstraint;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.collection.CasConsumer_ImplBase;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceProcessException;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.util.ProcessTrace;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public final class NameFinderTrainer
extends CasConsumer_ImplBase {
    private static final String FEATURE_GENERATOR_DEFINITION_FILE_PARAMETER = "opennlp.uima.FeatureGeneratorFile";
    private static final String FEATURE_GENERATOR_RESOURCES_PARAMETER = "opennlp.uima.FeatureGeneratorResources";
    private Logger logger;
    private String modelPath;
    private byte[] featureGeneratorDefinition;
    private File featureGeneratorResourceDir;
    private String additionalTrainingDataFile;
    private String additionalTrainingDataEncoding;
    private File sampleTraceFile = null;
    private String sampleTraceFileEncoding = null;
    private Type sentenceType;
    private Type tokenType;
    private Type nameType;
    private String language;
    private List<NameSample> nameFinderSamples = new ArrayList<NameSample>();
    private TrainingParameters trainingParams;

    public void initialize() throws ResourceInitializationException {
        String sampleTraceFileName;
        super.initialize();
        this.logger = this.getUimaContext().getLogger();
        if (this.logger.isLoggable(Level.INFO)) {
            this.logger.log(Level.INFO, "Initializing the OpenNLP Name Trainer.");
        }
        this.modelPath = CasConsumerUtil.getRequiredStringParameter(this.getUimaContext(), UimaUtil.MODEL_PARAMETER);
        this.language = CasConsumerUtil.getRequiredStringParameter(this.getUimaContext(), "opennlp.uima.Language");
        this.trainingParams = OpennlpUtil.loadTrainingParams(CasConsumerUtil.getOptionalStringParameter(this.getUimaContext(), "opennlp.uima.TrainingParamsFile"), true);
        String featureGeneratorDefinitionFile = CasConsumerUtil.getOptionalStringParameter(this.getUimaContext(), FEATURE_GENERATOR_DEFINITION_FILE_PARAMETER);
        if (featureGeneratorDefinitionFile != null) {
            try {
                this.featureGeneratorDefinition = OpennlpUtil.loadBytes(new File(featureGeneratorDefinitionFile));
            }
            catch (IOException e) {
                throw new ResourceInitializationException((Throwable)e);
            }
            String featureGeneratorResourcesDirName = CasConsumerUtil.getOptionalStringParameter(this.getUimaContext(), FEATURE_GENERATOR_RESOURCES_PARAMETER);
            if (featureGeneratorResourcesDirName != null) {
                this.featureGeneratorResourceDir = new File(featureGeneratorResourcesDirName);
            }
        }
        this.additionalTrainingDataFile = CasConsumerUtil.getOptionalStringParameter(this.getUimaContext(), "opennlp.uima.AdditionalTrainingDataFile");
        if (this.additionalTrainingDataFile != null) {
            this.additionalTrainingDataEncoding = CasConsumerUtil.getRequiredStringParameter(this.getUimaContext(), "opennlp.uima.AdditionalTrainingDataEncoding");
        }
        if ((sampleTraceFileName = CasConsumerUtil.getOptionalStringParameter(this.getUimaContext(), "opennlp.uima.SampleTraceFile")) != null) {
            this.sampleTraceFile = new File(this.getUimaContextAdmin().getResourceManager().getDataPath() + File.separatorChar + sampleTraceFileName);
            this.sampleTraceFileEncoding = CasConsumerUtil.getRequiredStringParameter(this.getUimaContext(), "opennlp.uima.SampleTraceFileEncoding");
        }
    }

    public void typeSystemInit(TypeSystem typeSystem) throws ResourceInitializationException {
        String sentenceTypeName = CasConsumerUtil.getRequiredStringParameter(this.getUimaContext(), UimaUtil.SENTENCE_TYPE_PARAMETER);
        this.sentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
        String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(this.getUimaContext(), "opennlp.uima.TokenType");
        this.tokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
        String nameTypeName = CasConsumerUtil.getRequiredStringParameter(this.getUimaContext(), "opennlp.uima.NameType");
        this.nameType = CasConsumerUtil.getType(typeSystem, nameTypeName);
    }

    private static <T> List<T> iteratorToList(Iterator<T> it) {
        LinkedList<T> list = new LinkedList<T>();
        while (it.hasNext()) {
            list.add(it.next());
        }
        return list;
    }

    private static boolean isContaining(AnnotationFS annotation, AnnotationFS containtedAnnotation) {
        boolean isEndContaining;
        boolean isStartContaining;
        boolean bl = isStartContaining = annotation.getBegin() <= containtedAnnotation.getBegin();
        if (!isStartContaining) {
            return false;
        }
        boolean bl2 = isEndContaining = annotation.getEnd() >= containtedAnnotation.getEnd();
        return isEndContaining;
    }

    private static Span[] createNames(List<AnnotationFS> tokenList, List<AnnotationFS> entityAnnotations) {
        LinkedList<Span> nameList = new LinkedList<Span>();
        AnnotationFS currentEntity = null;
        int startIndex = -1;
        int index = 0;
        for (AnnotationFS token : tokenList) {
            for (AnnotationFS entity : entityAnnotations) {
                if (!NameFinderTrainer.isContaining(entity, token)) {
                    if (currentEntity != entity) continue;
                    nameList.add(new Span(startIndex, index));
                    startIndex = -1;
                    currentEntity = null;
                }
                if (currentEntity != null || !NameFinderTrainer.isContaining(entity, token)) continue;
                startIndex = index;
                currentEntity = entity;
            }
            ++index;
        }
        if (currentEntity != null) {
            Span name = new Span(startIndex, index);
            nameList.add(name);
        }
        return nameList.toArray(new Span[nameList.size()]);
    }

    public void processCas(CAS cas) {
        AnnotationIndex sentenceIndex = cas.getAnnotationIndex(this.sentenceType);
        boolean isClearAdaptiveData = true;
        for (AnnotationFS sentenceAnnotation : sentenceIndex) {
            ContainingConstraint sentenceContainingConstraint = new ContainingConstraint(sentenceAnnotation);
            AnnotationIndex tokenAnnotations = cas.getAnnotationIndex(this.tokenType);
            FSIterator containingTokens = cas.createFilteredIterator(tokenAnnotations.iterator(), (FSMatchConstraint)sentenceContainingConstraint);
            AnnotationIndex allNames = cas.getAnnotationIndex(this.nameType);
            FSIterator containingNames = cas.createFilteredIterator(allNames.iterator(), (FSMatchConstraint)sentenceContainingConstraint);
            List<AnnotationFS> tokenList = NameFinderTrainer.iteratorToList(containingTokens);
            Span[] names = NameFinderTrainer.createNames(tokenList, NameFinderTrainer.iteratorToList(containingNames));
            String[] tokenArray = new String[tokenList.size()];
            for (int i = 0; i < tokenArray.length; ++i) {
                tokenArray[i] = tokenList.get(i).getCoveredText();
            }
            NameSample trainingSentence = new NameSample(tokenArray, names, (String[][])null, isClearAdaptiveData);
            if (trainingSentence.getSentence().length != 0) {
                this.nameFinderSamples.add(trainingSentence);
                if (!isClearAdaptiveData) continue;
                isClearAdaptiveData = false;
                continue;
            }
            if (!this.logger.isLoggable(Level.INFO)) continue;
            this.logger.log(Level.INFO, "Sentence without tokens: " + sentenceAnnotation.getCoveredText());
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void collectionProcessComplete(ProcessTrace trace) throws ResourceProcessException, IOException {
        TokenNameFinderModel nameModel;
        if (this.logger.isLoggable(Level.INFO)) {
            this.logger.log(Level.INFO, "Collected " + this.nameFinderSamples.size() + " name samples.");
        }
        GIS.PRINT_MESSAGES = false;
        Object samples = ObjectStreamUtils.createObjectStream(this.nameFinderSamples);
        InputStream additionalTrainingDataIn = null;
        Writer samplesOut = null;
        try {
            if (this.additionalTrainingDataFile != null) {
                if (this.logger.isLoggable(Level.INFO)) {
                    this.logger.log(Level.INFO, "Using additional training data file: " + this.additionalTrainingDataFile);
                }
                additionalTrainingDataIn = new FileInputStream(this.additionalTrainingDataFile);
                NameSampleDataStream additionalSamples = new NameSampleDataStream((ObjectStream)new PlainTextByLineStream((Reader)new InputStreamReader(additionalTrainingDataIn, this.additionalTrainingDataEncoding)));
                samples = ObjectStreamUtils.createObjectStream((ObjectStream[])new ObjectStream[]{samples, additionalSamples});
            }
            if (this.sampleTraceFile != null) {
                samplesOut = new OutputStreamWriter((OutputStream)new FileOutputStream(this.sampleTraceFile), this.sampleTraceFileEncoding);
                samples = new SampleTraceStream(samples, samplesOut);
            }
            Map resourceMap = this.featureGeneratorResourceDir != null ? TokenNameFinderTrainerTool.loadResources((File)this.featureGeneratorResourceDir) : Collections.emptyMap();
            nameModel = NameFinderME.train((String)this.language, null, (ObjectStream)samples, (TrainingParameters)this.trainingParams, (byte[])this.featureGeneratorDefinition, (Map)resourceMap);
        }
        finally {
            if (additionalTrainingDataIn != null) {
                additionalTrainingDataIn.close();
            }
            if (samplesOut != null) {
                samplesOut.close();
            }
        }
        this.nameFinderSamples = null;
        File modelFile = new File(this.getUimaContextAdmin().getResourceManager().getDataPath() + File.separatorChar + this.modelPath);
        OpennlpUtil.serialize((BaseModel)nameModel, modelFile);
        if (this.logger.isLoggable(Level.INFO)) {
            this.logger.log(Level.INFO, "Model was written to: " + modelFile.getAbsolutePath());
        }
    }

    public boolean isStateless() {
        return false;
    }

    public void destroy() {
        this.nameFinderSamples = null;
    }
}

