/*
 * Decompiled with CFR 0.152.
 */
package weka.classifiers.meta;

import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.classifiers.Classifier;
import weka.classifiers.RandomizableIteratedSingleClassifierEnhancer;
import weka.classifiers.trees.J48;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.UnsupportedClassTypeException;
import weka.core.Utils;

public class Decorate
extends RandomizableIteratedSingleClassifierEnhancer
implements TechnicalInformationHandler {
    static final long serialVersionUID = -6020193348750269931L;
    protected Vector m_Committee = null;
    protected int m_DesiredSize = 15;
    protected double m_ArtSize = 1.0;
    protected Random m_Random = new Random(0L);
    protected Vector m_AttributeStats = null;

    public Decorate() {
        this.m_Classifier = new J48();
        this.m_NumIterations = 50;
    }

    @Override
    protected String defaultClassifierString() {
        return "weka.classifiers.trees.J48";
    }

    @Override
    public Enumeration listOptions() {
        Vector<Option> newVector = new Vector<Option>(8);
        newVector.addElement(new Option("\tDesired size of ensemble.\n\t(default 15)", "E", 1, "-E"));
        newVector.addElement(new Option("\tNumber of iterations.\n\t(default 50)", "I", 1, "-I <num>"));
        newVector.addElement(new Option("\tFactor that determines number of artificial examples to generate.\n\tSpecified proportional to training set size.\n\t(default 1.0)", "R", 1, "-R"));
        Enumeration enu = super.listOptions();
        while (enu.hasMoreElements()) {
            newVector.addElement((Option)enu.nextElement());
        }
        newVector.remove(4);
        return newVector.elements();
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        String desiredSize = Utils.getOption('E', options);
        if (desiredSize.length() != 0) {
            this.setDesiredSize(Integer.parseInt(desiredSize));
        } else {
            this.setDesiredSize(15);
        }
        String artSize = Utils.getOption('R', options);
        if (artSize.length() != 0) {
            this.setArtificialSize(Double.parseDouble(artSize));
        } else {
            this.setArtificialSize(1.0);
        }
        super.setOptions(options);
    }

    @Override
    public String[] getOptions() {
        String[] superOptions = super.getOptions();
        String[] options = new String[superOptions.length + 4];
        int current = 0;
        options[current++] = "-E";
        options[current++] = "" + this.getDesiredSize();
        options[current++] = "-R";
        options[current++] = "" + this.getArtificialSize();
        System.arraycopy(superOptions, 0, options, current, superOptions.length);
        current += superOptions.length;
        while (current < options.length) {
            options[current++] = "";
        }
        return options;
    }

    public String desiredSizeTipText() {
        return "the desired number of member classifiers in the Decorate ensemble. Decorate may terminate before this size is reached (depending on the value of numIterations). Larger ensemble sizes usually lead to more accurate models, but increases training time and model complexity.";
    }

    @Override
    public String numIterationsTipText() {
        return "the maximum number of Decorate iterations to run. Each iteration generates a classifier, but does not necessarily add it to the ensemble. Decorate stops when the desired ensemble size is reached. This parameter should be greater than equal to the desiredSize. If the desiredSize is not being reached it may help to increase this value.";
    }

    public String artificialSizeTipText() {
        return "determines the number of artificial examples to use during training. Specified as a proportion of the training data. Higher values can increase ensemble diversity.";
    }

    public String globalInfo() {
        return "DECORATE is a meta-learner for building diverse ensembles of classifiers by using specially constructed artificial training examples. Comprehensive experiments have demonstrated that this technique is consistently more accurate than the base classifier, Bagging and Random Forests.Decorate also obtains higher accuracy than Boosting on small training sets, and achieves comparable performance on larger training sets. \n\nFor more details see: \n\n" + this.getTechnicalInformation().toString();
    }

    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(TechnicalInformation.Type.INPROCEEDINGS);
        result.setValue(TechnicalInformation.Field.AUTHOR, "P. Melville and R. J. Mooney");
        result.setValue(TechnicalInformation.Field.TITLE, "Constructing Diverse Classifier Ensembles Using Artificial Training Examples");
        result.setValue(TechnicalInformation.Field.BOOKTITLE, "Eighteenth International Joint Conference on Artificial Intelligence");
        result.setValue(TechnicalInformation.Field.YEAR, "2003");
        result.setValue(TechnicalInformation.Field.PAGES, "505-510");
        TechnicalInformation additional = result.add(TechnicalInformation.Type.ARTICLE);
        additional.setValue(TechnicalInformation.Field.AUTHOR, "P. Melville and R. J. Mooney");
        additional.setValue(TechnicalInformation.Field.TITLE, "Creating Diversity in Ensembles Using Artificial Data");
        additional.setValue(TechnicalInformation.Field.JOURNAL, "Information Fusion: Special Issue on Diversity in Multiclassifier Systems");
        additional.setValue(TechnicalInformation.Field.YEAR, "2004");
        additional.setValue(TechnicalInformation.Field.NOTE, "submitted");
        return result;
    }

    public double getArtificialSize() {
        return this.m_ArtSize;
    }

    public void setArtificialSize(double newArtSize) {
        this.m_ArtSize = newArtSize;
    }

    public int getDesiredSize() {
        return this.m_DesiredSize;
    }

    public void setDesiredSize(int newDesiredSize) {
        this.m_DesiredSize = newDesiredSize;
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAllClasses();
        result.disableAllClassDependencies();
        result.enable(Capabilities.Capability.NOMINAL_CLASS);
        result.setMinimumNumberInstances(this.m_DesiredSize);
        return result;
    }

    @Override
    public void buildClassifier(Instances data) throws Exception {
        if (this.m_Classifier == null) {
            throw new Exception("A base classifier has not been specified!");
        }
        this.getCapabilities().testWithFail(data);
        data = new Instances(data);
        data.deleteWithMissingClass();
        this.m_Random = this.m_Seed == -1 ? new Random() : new Random(this.m_Seed);
        int i = 1;
        int numTrials = 1;
        Instances divData = new Instances(data);
        Instances artData = null;
        int artSize = (int)(Math.abs(this.m_ArtSize) * (double)divData.numInstances());
        if (artSize == 0) {
            artSize = 1;
        }
        this.computeStats(data);
        this.m_Committee = new Vector();
        Classifier newClassifier = this.m_Classifier;
        newClassifier.buildClassifier(divData);
        this.m_Committee.add(newClassifier);
        double eComm = this.computeError(divData);
        if (this.m_Debug) {
            System.out.println("Initialize:\tClassifier " + i + " added to ensemble. Ensemble error = " + eComm);
        }
        while (i < this.m_DesiredSize && numTrials < this.m_NumIterations) {
            artData = this.generateArtificialData(artSize, data);
            this.labelData(artData);
            this.addInstances(divData, artData);
            Classifier[] tmp = Classifier.makeCopies(this.m_Classifier, 1);
            newClassifier = tmp[0];
            newClassifier.buildClassifier(divData);
            this.removeInstances(divData, artSize);
            this.m_Committee.add(newClassifier);
            double currError = this.computeError(divData);
            if (currError <= eComm) {
                ++i;
                eComm = currError;
                if (this.m_Debug) {
                    System.out.println("Iteration: " + (1 + numTrials) + "\tClassifier " + i + " added to ensemble. Ensemble error = " + eComm);
                }
            } else {
                this.m_Committee.removeElementAt(this.m_Committee.size() - 1);
            }
            ++numTrials;
        }
    }

    protected void computeStats(Instances data) throws Exception {
        int numAttributes = data.numAttributes();
        this.m_AttributeStats = new Vector(numAttributes);
        int j = 0;
        while (j < numAttributes) {
            if (data.attribute(j).isNominal()) {
                int[] nomCounts = data.attributeStats((int)j).nominalCounts;
                double[] counts = new double[nomCounts.length];
                if (counts.length < 2) {
                    throw new Exception("Nominal attribute has less than two distinct values!");
                }
                int i = 0;
                while (i < counts.length) {
                    counts[i] = nomCounts[i] + 1;
                    ++i;
                }
                Utils.normalize(counts);
                double[] stats = new double[counts.length - 1];
                stats[0] = counts[0];
                int i2 = 1;
                while (i2 < stats.length) {
                    stats[i2] = stats[i2 - 1] + counts[i2];
                    ++i2;
                }
                this.m_AttributeStats.add(j, stats);
            } else if (data.attribute(j).isNumeric()) {
                double[] stats = new double[]{data.meanOrMode(j), Math.sqrt(data.variance(j))};
                this.m_AttributeStats.add(j, stats);
            } else {
                System.err.println("Decorate can only handle numeric and nominal values.");
            }
            ++j;
        }
    }

    protected Instances generateArtificialData(int artSize, Instances data) {
        int numAttributes = data.numAttributes();
        Instances artData = new Instances(data, artSize);
        int i = 0;
        while (i < artSize) {
            double[] att = new double[numAttributes];
            int j = 0;
            while (j < numAttributes) {
                double[] stats;
                if (data.attribute(j).isNominal()) {
                    stats = (double[])this.m_AttributeStats.get(j);
                    att[j] = this.selectIndexProbabilistically(stats);
                } else if (data.attribute(j).isNumeric()) {
                    stats = (double[])this.m_AttributeStats.get(j);
                    att[j] = this.m_Random.nextGaussian() * stats[1] + stats[0];
                } else {
                    System.err.println("Decorate can only handle numeric and nominal values.");
                }
                ++j;
            }
            Instance artInstance = new Instance(1.0, att);
            artData.add(artInstance);
            ++i;
        }
        return artData;
    }

    protected void labelData(Instances artData) throws Exception {
        int i = 0;
        while (i < artData.numInstances()) {
            Instance curr = artData.instance(i);
            double[] probs = this.distributionForInstance(curr);
            curr.setClassValue(this.inverseLabel(probs));
            ++i;
        }
    }

    protected int inverseLabel(double[] probs) throws Exception {
        double[] invProbs = new double[probs.length];
        int i = 0;
        while (i < probs.length) {
            invProbs[i] = probs[i] == 0.0 ? Double.MAX_VALUE / (double)probs.length : 1.0 / probs[i];
            ++i;
        }
        Utils.normalize(invProbs);
        double[] cdf = new double[invProbs.length];
        cdf[0] = invProbs[0];
        int i2 = 1;
        while (i2 < invProbs.length) {
            cdf[i2] = invProbs[i2] + cdf[i2 - 1];
            ++i2;
        }
        if (Double.isNaN(cdf[invProbs.length - 1])) {
            System.err.println("Cumulative class membership probability is NaN!");
        }
        return this.selectIndexProbabilistically(cdf);
    }

    protected int selectIndexProbabilistically(double[] cdf) {
        double rnd = this.m_Random.nextDouble();
        int index = 0;
        while (index < cdf.length && rnd > cdf[index]) {
            ++index;
        }
        return index;
    }

    protected void removeInstances(Instances data, int numRemove) {
        int num = data.numInstances();
        int i = num - 1;
        while (i > num - 1 - numRemove) {
            data.delete(i);
            --i;
        }
    }

    protected void addInstances(Instances data, Instances newData) {
        int i = 0;
        while (i < newData.numInstances()) {
            data.add(newData.instance(i));
            ++i;
        }
    }

    protected double computeError(Instances data) throws Exception {
        double error = 0.0;
        int numInstances = data.numInstances();
        int i = 0;
        while (i < numInstances) {
            Instance curr = data.instance(i);
            if (curr.classValue() != (double)((int)this.classifyInstance(curr))) {
                error += 1.0;
            }
            ++i;
        }
        return error / (double)numInstances;
    }

    @Override
    public double[] distributionForInstance(Instance instance) throws Exception {
        if (instance.classAttribute().isNumeric()) {
            throw new UnsupportedClassTypeException("Decorate can't handle a numeric class!");
        }
        double[] sums = new double[instance.numClasses()];
        int i = 0;
        while (i < this.m_Committee.size()) {
            Classifier curr = (Classifier)this.m_Committee.get(i);
            double[] newProbs = curr.distributionForInstance(instance);
            int j = 0;
            while (j < newProbs.length) {
                int n = j;
                sums[n] = sums[n] + newProbs[j];
                ++j;
            }
            ++i;
        }
        if (Utils.eq(Utils.sum(sums), 0.0)) {
            return sums;
        }
        Utils.normalize(sums);
        return sums;
    }

    public String toString() {
        if (this.m_Committee == null) {
            return "Decorate: No model built yet.";
        }
        StringBuffer text = new StringBuffer();
        text.append("Decorate base classifiers: \n\n");
        int i = 0;
        while (i < this.m_Committee.size()) {
            text.append(String.valueOf(((Classifier)this.m_Committee.get(i)).toString()) + "\n\n");
            ++i;
        }
        text.append("Number of classifier in the ensemble: " + this.m_Committee.size() + "\n");
        return text.toString();
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 8037 $");
    }

    public static void main(String[] argv) {
        Decorate.runClassifier(new Decorate(), argv);
    }
}

