/*
 * Decompiled with CFR 0.152.
 */
package weka.classifiers.bayes;

import java.util.Enumeration;
import weka.classifiers.Classifier;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;

public class ComplementNaiveBayes
extends Classifier
implements OptionHandler,
WeightedInstancesHandler,
TechnicalInformationHandler {
    static final long serialVersionUID = 7246302925903086397L;
    private double[][] wordWeights;
    private double smoothingParameter = 1.0;
    private boolean m_normalizeWordWeights = false;
    private int numClasses;
    private Instances header;

    @Override
    public Enumeration listOptions() {
        FastVector newVector = new FastVector(2);
        newVector.addElement(new Option("\tNormalize the word weights for each class\n", "N", 0, "-N"));
        newVector.addElement(new Option("\tSmoothing value to avoid zero WordGivenClass probabilities (default=1.0).\n", "S", 1, "-S"));
        return newVector.elements();
    }

    @Override
    public String[] getOptions() {
        String[] options = new String[4];
        int current = 0;
        if (this.getNormalizeWordWeights()) {
            options[current++] = "-N";
        }
        options[current++] = "-S";
        options[current++] = Double.toString(this.smoothingParameter);
        while (current < options.length) {
            options[current++] = "";
        }
        return options;
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        this.setNormalizeWordWeights(Utils.getFlag('N', options));
        String val = Utils.getOption('S', options);
        if (val.length() != 0) {
            this.setSmoothingParameter(Double.parseDouble(val));
        } else {
            this.setSmoothingParameter(1.0);
        }
    }

    public boolean getNormalizeWordWeights() {
        return this.m_normalizeWordWeights;
    }

    public void setNormalizeWordWeights(boolean doNormalize) {
        this.m_normalizeWordWeights = doNormalize;
    }

    public String normalizeWordWeightsTipText() {
        return "Normalizes the word weights for each class.";
    }

    public double getSmoothingParameter() {
        return this.smoothingParameter;
    }

    public void setSmoothingParameter(double val) {
        this.smoothingParameter = val;
    }

    public String smoothingParameterTipText() {
        return "Sets the smoothing parameter to avoid zero WordGivenClass probabilities (default=1.0).";
    }

    public String globalInfo() {
        return "Class for building and using a Complement class Naive Bayes classifier.\n\nFor more information see, \n\n" + this.getTechnicalInformation().toString() + "\n\n" + "P.S.: TF, IDF and length normalization transforms, as " + "described in the paper, can be performed through " + "weka.filters.unsupervised.StringToWordVector.";
    }

    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(TechnicalInformation.Type.INPROCEEDINGS);
        result.setValue(TechnicalInformation.Field.AUTHOR, "Jason D. Rennie and Lawrence Shih and Jaime Teevan and David R. Karger");
        result.setValue(TechnicalInformation.Field.TITLE, "Tackling the Poor Assumptions of Naive Bayes Text Classifiers");
        result.setValue(TechnicalInformation.Field.BOOKTITLE, "ICML");
        result.setValue(TechnicalInformation.Field.YEAR, "2003");
        result.setValue(TechnicalInformation.Field.PAGES, "616-623");
        result.setValue(TechnicalInformation.Field.PUBLISHER, "AAAI Press");
        return result;
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enable(Capabilities.Capability.NUMERIC_ATTRIBUTES);
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enable(Capabilities.Capability.NOMINAL_CLASS);
        result.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        return result;
    }

    @Override
    public void buildClassifier(Instances instances) throws Exception {
        this.getCapabilities().testWithFail(instances);
        instances = new Instances(instances);
        instances.deleteWithMissingClass();
        this.numClasses = instances.numClasses();
        int numAttributes = instances.numAttributes();
        this.header = new Instances(instances, 0);
        double[][] ocrnceOfWordInClass = new double[this.numClasses][numAttributes];
        this.wordWeights = new double[this.numClasses][numAttributes];
        double[] wordsPerClass = new double[this.numClasses];
        double totalWordOccurrences = 0.0;
        double sumOfSmoothingParams = (double)(numAttributes - 1) * this.smoothingParameter;
        int classIndex = instances.instance(0).classIndex();
        Enumeration enumInsts = instances.enumerateInstances();
        while (enumInsts.hasMoreElements()) {
            Instance instance = (Instance)enumInsts.nextElement();
            int docClass = (int)instance.value(classIndex);
            int a = 0;
            while (a < instance.numValues()) {
                if (instance.index(a) != instance.classIndex() && !instance.isMissing(a)) {
                    double numOccurrences = instance.valueSparse(a) * instance.weight();
                    if (numOccurrences < 0.0) {
                        throw new Exception("Numeric attribute values must all be greater or equal to zero.");
                    }
                    totalWordOccurrences += numOccurrences;
                    int n = docClass;
                    wordsPerClass[n] = wordsPerClass[n] + numOccurrences;
                    double[] dArray = ocrnceOfWordInClass[docClass];
                    int n2 = instance.index(a);
                    dArray[n2] = dArray[n2] + numOccurrences;
                    double[] dArray2 = this.wordWeights[0];
                    int n3 = instance.index(a);
                    dArray2[n3] = dArray2[n3] + numOccurrences;
                }
                ++a;
            }
        }
        int c = 1;
        while (c < this.numClasses) {
            double totalWordOcrnces = totalWordOccurrences - wordsPerClass[c];
            int w = 0;
            while (w < numAttributes) {
                if (w != classIndex) {
                    double ocrncesOfWord = this.wordWeights[0][w] - ocrnceOfWordInClass[c][w];
                    this.wordWeights[c][w] = Math.log((ocrncesOfWord + this.smoothingParameter) / (totalWordOcrnces + sumOfSmoothingParams));
                }
                ++w;
            }
            ++c;
        }
        int w = 0;
        while (w < numAttributes) {
            if (w != classIndex) {
                double ocrncesOfWord = this.wordWeights[0][w] - ocrnceOfWordInClass[0][w];
                double totalWordOcrnces = totalWordOccurrences - wordsPerClass[0];
                this.wordWeights[0][w] = Math.log((ocrncesOfWord + this.smoothingParameter) / (totalWordOcrnces + sumOfSmoothingParams));
            }
            ++w;
        }
        if (this.m_normalizeWordWeights) {
            c = 0;
            while (c < this.numClasses) {
                double sum = 0.0;
                int w2 = 0;
                while (w2 < numAttributes) {
                    if (w2 != classIndex) {
                        sum += Math.abs(this.wordWeights[c][w2]);
                    }
                    ++w2;
                }
                w2 = 0;
                while (w2 < numAttributes) {
                    if (w2 != classIndex) {
                        this.wordWeights[c][w2] = this.wordWeights[c][w2] / sum;
                    }
                    ++w2;
                }
                ++c;
            }
        }
    }

    @Override
    public double classifyInstance(Instance instance) throws Exception {
        if (this.wordWeights == null) {
            throw new Exception("Error. The classifier has not been built properly.");
        }
        double[] valueForClass = new double[this.numClasses];
        double sumOfClassValues = 0.0;
        int c = 0;
        while (c < this.numClasses) {
            double sumOfWordValues = 0.0;
            int w = 0;
            while (w < instance.numValues()) {
                if (instance.index(w) != instance.classIndex()) {
                    double freqOfWordInDoc = instance.valueSparse(w);
                    sumOfWordValues += freqOfWordInDoc * this.wordWeights[c][instance.index(w)];
                }
                ++w;
            }
            valueForClass[c] = sumOfWordValues;
            sumOfClassValues += valueForClass[c];
            ++c;
        }
        int minidx = 0;
        int i = 0;
        while (i < this.numClasses) {
            if (valueForClass[i] < valueForClass[minidx]) {
                minidx = i;
            }
            ++i;
        }
        return minidx;
    }

    public String toString() {
        if (this.wordWeights == null) {
            return "The classifier hasn't been built yet.";
        }
        int numAttributes = this.header.numAttributes();
        StringBuffer result = new StringBuffer("The word weights for each class are: \n------------------------------------\n\t");
        int c = 0;
        while (c < this.numClasses) {
            result.append(this.header.classAttribute().value(c)).append("\t");
            ++c;
        }
        result.append("\n");
        int w = 0;
        while (w < numAttributes) {
            result.append(this.header.attribute(w).name()).append("\t");
            int c2 = 0;
            while (c2 < this.numClasses) {
                result.append(Double.toString(this.wordWeights[c2][w])).append("\t");
                ++c2;
            }
            result.append("\n");
            ++w;
        }
        return result.toString();
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 5516 $");
    }

    public static void main(String[] argv) {
        ComplementNaiveBayes.runClassifier(new ComplementNaiveBayes(), argv);
    }
}

