package weka.filters.unsupervised.instance;

import java.util.Arrays;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Vector;
import org.xmlpull.v1.XmlPullParser;
import weka.core.Attribute;
import weka.core.AttributeStats;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SingleIndex;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;

/* loaded from: input_file:pmmlDevelopment/lib/weka.jar:weka/filters/unsupervised/instance/RemoveFrequentValues.class */
public class RemoveFrequentValues extends Filter implements OptionHandler, UnsupervisedFilter {
    static final long serialVersionUID = -2447432930070059511L;
    protected int[] m_NominalMapping;
    private SingleIndex m_AttIndex = new SingleIndex("last");
    protected int m_NumValues = 2;
    protected boolean m_LeastValues = false;
    protected boolean m_Invert = false;
    protected boolean m_ModifyHeader = false;
    protected HashSet m_Values = null;

    public String globalInfo() {
        return "Determines which values (frequent or infrequent ones) of an (nominal) attribute are retained and filters the instances accordingly. In case of values with the same frequency, they are kept in the way they appear in the original instances object. E.g. if you have the values \"1,2,3,4\" with the frequencies \"10,5,5,3\" and you chose to keep the 2 most common values, the values \"1,2\" would be returned, since the value \"2\" comes before \"3\", even though they have the same frequency.";
    }

    @Override // weka.core.OptionHandler
    public Enumeration listOptions() {
        Vector vector = new Vector(5);
        vector.addElement(new Option("\tChoose attribute to be used for selection.", "C", 1, "-C <num>"));
        vector.addElement(new Option("\tNumber of values to retain for the sepcified attribute, \n\ti.e. the ones with the most instances (default 2).", "N", 1, "-N <num>"));
        vector.addElement(new Option("\tInstead of values with the most instances the ones with the \n\tleast are retained.\n", "L", 0, "-L"));
        vector.addElement(new Option("\tWhen selecting on nominal attributes, removes header\n\treferences to excluded values.", "H", 0, "-H"));
        vector.addElement(new Option("\tInvert matching sense.", "V", 0, "-V"));
        return vector.elements();
    }

    @Override // weka.core.OptionHandler
    public void setOptions(String[] strArr) throws Exception {
        String option = Utils.getOption('C', strArr);
        if (option.length() != 0) {
            setAttributeIndex(option);
        } else {
            setAttributeIndex("last");
        }
        String option2 = Utils.getOption('N', strArr);
        if (option2.length() != 0) {
            setNumValues(Integer.parseInt(option2));
        } else {
            setNumValues(2);
        }
        setUseLeastValues(Utils.getFlag('L', strArr));
        setModifyHeader(Utils.getFlag('H', strArr));
        setInvertSelection(Utils.getFlag('V', strArr));
        if (getInputFormat() != null) {
            setInputFormat(getInputFormat());
        }
    }

    @Override // weka.core.OptionHandler
    public String[] getOptions() {
        String[] strArr = new String[7];
        int i = 0 + 1;
        strArr[0] = "-C";
        int i2 = i + 1;
        strArr[i] = XmlPullParser.NO_NAMESPACE + getAttributeIndex();
        int i3 = i2 + 1;
        strArr[i2] = "-N";
        int i4 = i3 + 1;
        strArr[i3] = XmlPullParser.NO_NAMESPACE + getNumValues();
        if (getUseLeastValues()) {
            i4++;
            strArr[i4] = "-H";
        }
        if (getModifyHeader()) {
            int i5 = i4;
            i4++;
            strArr[i5] = "-H";
        }
        if (getInvertSelection()) {
            int i6 = i4;
            i4++;
            strArr[i6] = "-V";
        }
        while (i4 < strArr.length) {
            int i7 = i4;
            i4++;
            strArr[i7] = XmlPullParser.NO_NAMESPACE;
        }
        return strArr;
    }

    public String attributeIndexTipText() {
        return "Choose attribute to be used for selection (default last).";
    }

    public String getAttributeIndex() {
        return this.m_AttIndex.getSingleIndex();
    }

    public void setAttributeIndex(String str) {
        this.m_AttIndex.setSingleIndex(str);
    }

    public String numValuesTipText() {
        return "The number of values to retain.";
    }

    public int getNumValues() {
        return this.m_NumValues;
    }

    public void setNumValues(int i) {
        this.m_NumValues = i;
    }

    public String useLeastValuesTipText() {
        return "Retains values with least instance instead of most.";
    }

    public boolean getUseLeastValues() {
        return this.m_LeastValues;
    }

    public void setUseLeastValues(boolean z) {
        this.m_LeastValues = z;
    }

    public String modifyHeaderTipText() {
        return "When selecting on nominal attributes, removes header references to excluded values.";
    }

    public boolean getModifyHeader() {
        return this.m_ModifyHeader;
    }

    public void setModifyHeader(boolean z) {
        this.m_ModifyHeader = z;
    }

    public String invertSelectionTipText() {
        return "Invert matching sense.";
    }

    public boolean getInvertSelection() {
        return this.m_Invert;
    }

    public void setInvertSelection(boolean z) {
        this.m_Invert = z;
    }

    public boolean isNominal() {
        if (getInputFormat() == null) {
            return false;
        }
        return getInputFormat().attribute(this.m_AttIndex.getIndex()).isNominal();
    }

    public void determineValues(Instances instances) {
        int i;
        int i2;
        this.m_AttIndex.setUpper(instances.numAttributes() - 1);
        int index = this.m_AttIndex.getIndex();
        this.m_Values = new HashSet();
        if (instances == null) {
            return;
        }
        AttributeStats attributeStats = instances.attributeStats(index);
        int length = this.m_Invert ? attributeStats.nominalCounts.length - this.m_NumValues : this.m_NumValues;
        if (length < 1) {
            length = 1;
        }
        if (length > attributeStats.nominalCounts.length) {
            length = attributeStats.nominalCounts.length;
        }
        Arrays.sort(attributeStats.nominalCounts);
        if (this.m_LeastValues) {
            i = attributeStats.nominalCounts[0];
            i2 = attributeStats.nominalCounts[length - 1];
        } else {
            i = attributeStats.nominalCounts[((attributeStats.nominalCounts.length - 1) - length) + 1];
            i2 = attributeStats.nominalCounts[attributeStats.nominalCounts.length - 1];
        }
        AttributeStats attributeStats2 = instances.attributeStats(index);
        for (int i3 = 0; i3 < attributeStats2.nominalCounts.length; i3++) {
            if (attributeStats2.nominalCounts[i3] >= i && attributeStats2.nominalCounts[i3] <= i2 && this.m_Values.size() < length) {
                this.m_Values.add(instances.attribute(index).value(i3));
            }
        }
    }

    protected Instances modifyHeader(Instances instances) {
        Instances instances2 = new Instances(getInputFormat(), 0);
        Attribute attribute = instances2.attribute(this.m_AttIndex.getIndex());
        int[] iArr = new int[this.m_Values.size()];
        Iterator it = this.m_Values.iterator();
        int i = 0;
        while (it.hasNext()) {
            iArr[i] = attribute.indexOfValue(it.next().toString());
            i++;
        }
        FastVector fastVector = new FastVector();
        for (int i2 : iArr) {
            fastVector.addElement(attribute.value(i2));
        }
        instances2.deleteAttributeAt(this.m_AttIndex.getIndex());
        instances2.insertAttributeAt(new Attribute(attribute.name(), fastVector), this.m_AttIndex.getIndex());
        this.m_NominalMapping = new int[attribute.numValues()];
        for (int i3 = 0; i3 < this.m_NominalMapping.length; i3++) {
            boolean z = false;
            int i4 = 0;
            while (true) {
                if (i4 >= iArr.length) {
                    break;
                }
                if (iArr[i4] == i3) {
                    this.m_NominalMapping[i3] = i4;
                    z = true;
                    break;
                }
                i4++;
            }
            if (!z) {
                this.m_NominalMapping[i3] = -1;
            }
        }
        return instances2;
    }

    @Override // weka.filters.Filter, weka.core.CapabilitiesHandler
    public Capabilities getCapabilities() {
        Capabilities capabilities = super.getCapabilities();
        capabilities.disableAll();
        capabilities.enableAllAttributes();
        capabilities.enable(Capabilities.Capability.MISSING_VALUES);
        capabilities.enableAllClasses();
        capabilities.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        capabilities.enable(Capabilities.Capability.NO_CLASS);
        return capabilities;
    }

    @Override // weka.filters.Filter
    public boolean setInputFormat(Instances instances) throws Exception {
        super.setInputFormat(instances);
        this.m_AttIndex.setUpper(instances.numAttributes() - 1);
        if (!isNominal()) {
            throw new UnsupportedAttributeTypeException("Can only handle nominal attributes.");
        }
        this.m_Values = null;
        return false;
    }

    protected void setOutputFormat() {
        if (this.m_Values == null) {
            setOutputFormat(null);
            return;
        }
        setOutputFormat(getModifyHeader() ? modifyHeader(getInputFormat()) : new Instances(getInputFormat(), 0));
        for (int i = 0; i < getInputFormat().numInstances(); i++) {
            Instance instance = getInputFormat().instance(i);
            if (this.m_Values.contains(instance.stringValue(this.m_AttIndex.getIndex()))) {
                if (getModifyHeader()) {
                    instance.setValue(this.m_AttIndex.getIndex(), this.m_NominalMapping[(int) instance.value(this.m_AttIndex.getIndex())]);
                }
                push(instance);
            }
        }
    }

    @Override // weka.filters.Filter
    public boolean input(Instance instance) {
        if (getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_NewBatch) {
            resetQueue();
            this.m_NewBatch = false;
        }
        if (isFirstBatchDone()) {
            push(instance);
            return true;
        }
        bufferInput(instance);
        return false;
    }

    @Override // weka.filters.Filter
    public boolean batchFinished() {
        if (getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_Values == null) {
            determineValues(getInputFormat());
            setOutputFormat();
        }
        flushInput();
        this.m_NewBatch = true;
        this.m_FirstBatchDone = true;
        return numPendingOutput() != 0;
    }

    @Override // weka.filters.Filter, weka.core.RevisionHandler
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 5499 $");
    }

    public static void main(String[] strArr) {
        runFilter(new RemoveFrequentValues(), strArr);
    }
}
