/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.classify.tui;

import cc.mallet.pipe.CountsToFeatureSequencePipe;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.iterator.CsvIterator;
import cc.mallet.types.Alphabet;
import cc.mallet.types.AlphabetFactory;
import cc.mallet.types.InstanceList;
import cc.mallet.util.CommandOption;
import cc.mallet.util.MalletLogger;
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.logging.Logger;

public class MultFileToSequences {
    protected static Logger logger = MalletLogger.getLogger(MultFileToSequences.class.getName());
    static CommandOption.SpacedStrings inputFiles = new CommandOption.SpacedStrings(MultFileToSequences.class, "input", "FILE [FILE ...]", true, null, "The file(s) containing data, one instance per line", null);
    static CommandOption.File outputFile = new CommandOption.File(MultFileToSequences.class, "output", "FILE", true, new File("mallet.data"), "Write the instance list to this file", null);
    static CommandOption.File vocabularyFile = new CommandOption.File(MultFileToSequences.class, "vocabulary", "FILE", true, null, "Read newline-separated words from this file.", null);
    static CommandOption.String lineRegex = new CommandOption.String(MultFileToSequences.class, "line-regex", "REGEX", true, "^([^\\t]*)\\t([^\\t]*)\\t(.*)", "Regular expression containing regex-groups for label, name and data.", null);
    static CommandOption.Integer nameGroup = new CommandOption.Integer(MultFileToSequences.class, "name", "INTEGER", true, 1, "The index of the group containing the instance name.\n   Use 0 to indicate that this field is not used.", null);
    static CommandOption.Integer labelGroup = new CommandOption.Integer(MultFileToSequences.class, "label", "INTEGER", true, 2, "The index of the group containing the label string.\n   Use 0 to indicate that this field is not used.", null);
    static CommandOption.Integer dataGroup = new CommandOption.Integer(MultFileToSequences.class, "data", "INTEGER", true, 3, "The index of the group containing the data.", null);

    public static void main(String[] args) throws Exception {
        CommandOption.setSummary(MultFileToSequences.class, "Tool for importing text in id:count format as Mallet feature sequences.");
        CommandOption.process(MultFileToSequences.class, args);
        Alphabet alphabet = AlphabetFactory.loadFromFile(MultFileToSequences.vocabularyFile.value);
        alphabet.stopGrowth();
        ArrayList<Pipe> pipes = new ArrayList<Pipe>();
        CountsToFeatureSequencePipe pipe = new CountsToFeatureSequencePipe();
        pipe.setDataAlphabet(alphabet);
        pipes.add(pipe);
        InstanceList instances = new InstanceList(new SerialPipes(pipes));
        for (String filename : MultFileToSequences.inputFiles.value) {
            logger.info("Loading " + filename);
            CsvIterator reader = new CsvIterator((Reader)new FileReader(filename), MultFileToSequences.lineRegex.value, MultFileToSequences.dataGroup.value, MultFileToSequences.labelGroup.value, MultFileToSequences.nameGroup.value);
            instances.addThruPipe(reader);
        }
        instances.save(MultFileToSequences.outputFile.value);
    }
}

