package weka.core.converters;

import com.aliasi.util.Strings;
import com.sleepycat.asm.Opcodes;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StreamTokenizer;
import java.io.StringReader;
import java.net.URL;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.Utils;

/* loaded from: input_file:lib/weka-3.7.9.jar:weka/core/converters/ArffLoader.class */
public class ArffLoader extends AbstractFileLoader implements BatchConverter, IncrementalConverter, URLSourcedLoader {
    static final long serialVersionUID = 2726929550544048587L;
    public static String FILE_EXTENSION = Instances.FILE_EXTENSION;
    public static String FILE_EXTENSION_COMPRESSED = FILE_EXTENSION + ".gz";
    protected String m_URL = "http://";
    protected transient Reader m_sourceReader = null;
    protected transient ArffReader m_ArffReader = null;

    /* loaded from: input_file:lib/weka-3.7.9.jar:weka/core/converters/ArffLoader$ArffReader.class */
    public static class ArffReader implements RevisionHandler {
        protected StreamTokenizer m_Tokenizer;
        protected double[] m_ValueBuffer;
        protected int[] m_IndicesBuffer;
        protected List<Integer> m_stringAttIndices;
        protected Instances m_Data;
        protected int m_Lines;
        protected boolean m_batchMode;
        protected boolean m_retainStringValues;

        public ArffReader(Reader reader) throws IOException {
            this.m_batchMode = true;
            this.m_retainStringValues = true;
            this.m_Tokenizer = new StreamTokenizer(reader);
            initTokenizer();
            readHeader(1000);
            initBuffers();
            while (true) {
                Instance readInstance = readInstance(this.m_Data);
                if (readInstance == null) {
                    compactify();
                    return;
                }
                this.m_Data.add(readInstance);
            }
        }

        public ArffReader(Reader reader, int i) throws IOException {
            this(reader, i, true);
        }

        public ArffReader(Reader reader, int i, boolean z) throws IOException {
            this.m_batchMode = true;
            this.m_retainStringValues = true;
            this.m_batchMode = z;
            if (z) {
                this.m_retainStringValues = true;
            } else {
                this.m_retainStringValues = false;
            }
            if (i < 0) {
                throw new IllegalArgumentException("Capacity has to be positive!");
            }
            this.m_Tokenizer = new StreamTokenizer(reader);
            initTokenizer();
            readHeader(i);
            initBuffers();
        }

        public ArffReader(Reader reader, Instances instances, int i) throws IOException {
            this(reader, instances, i, 100, true);
            while (true) {
                Instance readInstance = readInstance(this.m_Data);
                if (readInstance == null) {
                    compactify();
                    return;
                }
                this.m_Data.add(readInstance);
            }
        }

        public ArffReader(Reader reader, Instances instances, int i, int i2) throws IOException {
            this(reader, instances, i, i2, false);
        }

        public ArffReader(Reader reader, Instances instances, int i, int i2, boolean z) throws IOException {
            this.m_batchMode = true;
            this.m_retainStringValues = true;
            this.m_batchMode = z;
            if (z) {
                this.m_retainStringValues = true;
            } else {
                this.m_retainStringValues = false;
            }
            this.m_Lines = i;
            this.m_Tokenizer = new StreamTokenizer(reader);
            initTokenizer();
            this.m_Data = new Instances(instances, i2);
            initBuffers();
        }

        protected void initBuffers() {
            this.m_ValueBuffer = new double[this.m_Data.numAttributes()];
            this.m_IndicesBuffer = new int[this.m_Data.numAttributes()];
            this.m_stringAttIndices = new ArrayList();
            if (this.m_Data.checkForStringAttributes()) {
                for (int i = 0; i < this.m_Data.numAttributes(); i++) {
                    if (this.m_Data.attribute(i).isString()) {
                        this.m_stringAttIndices.add(Integer.valueOf(i));
                    }
                }
            }
        }

        protected void compactify() {
            if (this.m_Data != null) {
                this.m_Data.compactify();
            }
        }

        protected void errorMessage(String str) throws IOException {
            String str2 = str + ", read " + this.m_Tokenizer.toString();
            if (this.m_Lines > 0) {
                str2 = str2.replaceAll(" line .*", " line " + ((this.m_Lines + Integer.parseInt(str2.replaceAll(".* line ", Strings.EMPTY_STRING))) - 1));
            }
            throw new IOException(str2);
        }

        public int getLineNo() {
            return this.m_Lines + this.m_Tokenizer.lineno();
        }

        protected void getFirstToken() throws IOException {
            do {
            } while (this.m_Tokenizer.nextToken() == 10);
            if (this.m_Tokenizer.ttype == 39 || this.m_Tokenizer.ttype == 34) {
                this.m_Tokenizer.ttype = -3;
            } else if (this.m_Tokenizer.ttype == -3 && this.m_Tokenizer.sval.equals("?")) {
                this.m_Tokenizer.ttype = 63;
            }
        }

        protected void getIndex() throws IOException {
            if (this.m_Tokenizer.nextToken() == 10) {
                errorMessage("premature end of line");
            }
            if (this.m_Tokenizer.ttype == -1) {
                errorMessage("premature end of file");
            }
        }

        protected void getLastToken(boolean z) throws IOException {
            if (this.m_Tokenizer.nextToken() != 10) {
                if (this.m_Tokenizer.ttype == -1 && z) {
                    return;
                }
                errorMessage("end of line expected");
            }
        }

        protected double getInstanceWeight() throws IOException {
            double d = Double.NaN;
            this.m_Tokenizer.nextToken();
            if (this.m_Tokenizer.ttype == 10 || this.m_Tokenizer.ttype == -1) {
                return Double.NaN;
            }
            if (this.m_Tokenizer.ttype == 123) {
                this.m_Tokenizer.nextToken();
                try {
                    d = Double.parseDouble(this.m_Tokenizer.sval);
                    this.m_Tokenizer.nextToken();
                    if (this.m_Tokenizer.ttype != 125) {
                        errorMessage("Problem reading instance weight");
                    }
                } catch (NumberFormatException e) {
                    return d;
                }
            }
            return d;
        }

        protected void getNextToken() throws IOException {
            if (this.m_Tokenizer.nextToken() == 10) {
                errorMessage("premature end of line");
            }
            if (this.m_Tokenizer.ttype == -1) {
                errorMessage("premature end of file");
                return;
            }
            if (this.m_Tokenizer.ttype == 39 || this.m_Tokenizer.ttype == 34) {
                this.m_Tokenizer.ttype = -3;
            } else if (this.m_Tokenizer.ttype == -3 && this.m_Tokenizer.sval.equals("?")) {
                this.m_Tokenizer.ttype = 63;
            }
        }

        protected void initTokenizer() {
            this.m_Tokenizer.resetSyntax();
            this.m_Tokenizer.whitespaceChars(0, 32);
            this.m_Tokenizer.wordChars(33, 255);
            this.m_Tokenizer.whitespaceChars(44, 44);
            this.m_Tokenizer.commentChar(37);
            this.m_Tokenizer.quoteChar(34);
            this.m_Tokenizer.quoteChar(39);
            this.m_Tokenizer.ordinaryChar(Opcodes.LSHR);
            this.m_Tokenizer.ordinaryChar(Opcodes.LUSHR);
            this.m_Tokenizer.eolIsSignificant(true);
        }

        public Instance readInstance(Instances instances) throws IOException {
            return readInstance(instances, true);
        }

        public Instance readInstance(Instances instances, boolean z) throws IOException {
            return getInstance(instances, z);
        }

        protected Instance getInstance(Instances instances, boolean z) throws IOException {
            this.m_Data = instances;
            if (this.m_Data.numAttributes() == 0) {
                errorMessage("no header information available");
            }
            getFirstToken();
            if (this.m_Tokenizer.ttype == -1) {
                return null;
            }
            return this.m_Tokenizer.ttype == 123 ? getInstanceSparse(z) : getInstanceFull(z);
        }

        protected Instance getInstanceSparse(boolean z) throws IOException {
            int i = 0;
            int i2 = -1;
            if (!this.m_batchMode && !this.m_retainStringValues && this.m_stringAttIndices != null) {
                for (int i3 = 0; i3 < this.m_stringAttIndices.size(); i3++) {
                    this.m_Data.attribute(this.m_stringAttIndices.get(i3).intValue()).setStringValue(Attribute.DUMMY_STRING_VAL);
                }
            }
            while (true) {
                getIndex();
                if (this.m_Tokenizer.ttype == 125) {
                    double d = 1.0d;
                    if (z) {
                        d = getInstanceWeight();
                        if (Double.isNaN(d)) {
                            d = 1.0d;
                        } else {
                            getLastToken(true);
                        }
                    }
                    double[] dArr = new double[i];
                    int[] iArr = new int[i];
                    System.arraycopy(this.m_ValueBuffer, 0, dArr, 0, i);
                    System.arraycopy(this.m_IndicesBuffer, 0, iArr, 0, i);
                    SparseInstance sparseInstance = new SparseInstance(d, dArr, iArr, this.m_Data.numAttributes());
                    sparseInstance.setDataset(this.m_Data);
                    return sparseInstance;
                }
                try {
                    this.m_IndicesBuffer[i] = Integer.valueOf(this.m_Tokenizer.sval).intValue();
                } catch (NumberFormatException e) {
                    errorMessage("index number expected");
                }
                if (this.m_IndicesBuffer[i] <= i2) {
                    errorMessage("indices have to be ordered");
                }
                if (this.m_IndicesBuffer[i] < 0 || this.m_IndicesBuffer[i] >= this.m_Data.numAttributes()) {
                    errorMessage("index out of bounds");
                }
                i2 = this.m_IndicesBuffer[i];
                getNextToken();
                if (this.m_Tokenizer.ttype != 63) {
                    if (this.m_Tokenizer.ttype != -3) {
                        errorMessage("not a valid value");
                    }
                    switch (this.m_Data.attribute(this.m_IndicesBuffer[i]).type()) {
                        case 0:
                            try {
                                this.m_ValueBuffer[i] = Double.valueOf(this.m_Tokenizer.sval).doubleValue();
                                break;
                            } catch (NumberFormatException e2) {
                                errorMessage("number expected");
                                break;
                            }
                        case 1:
                            int indexOfValue = this.m_Data.attribute(this.m_IndicesBuffer[i]).indexOfValue(this.m_Tokenizer.sval);
                            if (indexOfValue == -1) {
                                errorMessage("nominal value not declared in header");
                            }
                            this.m_ValueBuffer[i] = indexOfValue;
                            break;
                        case 2:
                            if (!this.m_batchMode && !this.m_retainStringValues) {
                                this.m_ValueBuffer[i] = 1.0d;
                                this.m_Data.attribute(this.m_IndicesBuffer[i]).setStringValue(Attribute.DUMMY_STRING_VAL);
                                this.m_Data.attribute(this.m_IndicesBuffer[i]).addStringValue(this.m_Tokenizer.sval);
                                break;
                            } else {
                                this.m_ValueBuffer[i] = this.m_Data.attribute(this.m_IndicesBuffer[i]).addStringValue(this.m_Tokenizer.sval);
                                break;
                            }
                            break;
                        case 3:
                            try {
                                this.m_ValueBuffer[i] = this.m_Data.attribute(this.m_IndicesBuffer[i]).parseDate(this.m_Tokenizer.sval);
                                break;
                            } catch (ParseException e3) {
                                errorMessage("unparseable date: " + this.m_Tokenizer.sval);
                                break;
                            }
                        case 4:
                            try {
                                this.m_ValueBuffer[i] = this.m_Data.attribute(this.m_IndicesBuffer[i]).addRelation(new ArffReader(new StringReader(this.m_Tokenizer.sval), this.m_Data.attribute(this.m_IndicesBuffer[i]).relation(), 0).getData());
                                break;
                            } catch (Exception e4) {
                                throw new IOException(e4.toString() + " of line " + getLineNo());
                            }
                        default:
                            errorMessage("unknown attribute type in column " + this.m_IndicesBuffer[i]);
                            break;
                    }
                } else {
                    this.m_ValueBuffer[i] = Utils.missingValue();
                }
                i++;
            }
        }

        protected Instance getInstanceFull(boolean z) throws IOException {
            double[] dArr = new double[this.m_Data.numAttributes()];
            for (int i = 0; i < this.m_Data.numAttributes(); i++) {
                if (i > 0) {
                    getNextToken();
                }
                if (this.m_Tokenizer.ttype != 63) {
                    if (this.m_Tokenizer.ttype != -3) {
                        errorMessage("not a valid value");
                    }
                    switch (this.m_Data.attribute(i).type()) {
                        case 0:
                            try {
                                dArr[i] = Double.valueOf(this.m_Tokenizer.sval).doubleValue();
                                break;
                            } catch (NumberFormatException e) {
                                errorMessage("number expected");
                                break;
                            }
                        case 1:
                            int indexOfValue = this.m_Data.attribute(i).indexOfValue(this.m_Tokenizer.sval);
                            if (indexOfValue == -1) {
                                errorMessage("nominal value not declared in header");
                            }
                            dArr[i] = indexOfValue;
                            break;
                        case 2:
                            if (!this.m_batchMode && !this.m_retainStringValues) {
                                dArr[i] = 0.0d;
                                this.m_Data.attribute(i).setStringValue(this.m_Tokenizer.sval);
                                break;
                            } else {
                                dArr[i] = this.m_Data.attribute(i).addStringValue(this.m_Tokenizer.sval);
                                break;
                            }
                        case 3:
                            try {
                                dArr[i] = this.m_Data.attribute(i).parseDate(this.m_Tokenizer.sval);
                                break;
                            } catch (ParseException e2) {
                                errorMessage("unparseable date: " + this.m_Tokenizer.sval);
                                break;
                            }
                        case 4:
                            try {
                                dArr[i] = this.m_Data.attribute(i).addRelation(new ArffReader(new StringReader(this.m_Tokenizer.sval), this.m_Data.attribute(i).relation(), 0).getData());
                                break;
                            } catch (Exception e3) {
                                throw new IOException(e3.toString() + " of line " + getLineNo());
                            }
                        default:
                            errorMessage("unknown attribute type in column " + i);
                            break;
                    }
                } else {
                    dArr[i] = Utils.missingValue();
                }
            }
            double d = 1.0d;
            if (z) {
                d = getInstanceWeight();
                if (Double.isNaN(d)) {
                    d = 1.0d;
                } else {
                    getLastToken(true);
                }
            }
            DenseInstance denseInstance = new DenseInstance(d, dArr);
            denseInstance.setDataset(this.m_Data);
            return denseInstance;
        }

        protected void readHeader(int i) throws IOException {
            this.m_Lines = 0;
            String str = Strings.EMPTY_STRING;
            getFirstToken();
            if (this.m_Tokenizer.ttype == -1) {
                errorMessage("premature end of file");
            }
            if (Instances.ARFF_RELATION.equalsIgnoreCase(this.m_Tokenizer.sval)) {
                getNextToken();
                str = this.m_Tokenizer.sval;
                getLastToken(false);
            } else {
                errorMessage("keyword @relation expected");
            }
            ArrayList<Attribute> arrayList = new ArrayList<>();
            getFirstToken();
            if (this.m_Tokenizer.ttype == -1) {
                errorMessage("premature end of file");
            }
            while (Attribute.ARFF_ATTRIBUTE.equalsIgnoreCase(this.m_Tokenizer.sval)) {
                arrayList = parseAttribute(arrayList);
            }
            if (!Instances.ARFF_DATA.equalsIgnoreCase(this.m_Tokenizer.sval)) {
                errorMessage("keyword @data expected");
            }
            if (arrayList.size() == 0) {
                errorMessage("no attributes declared");
            }
            this.m_Data = new Instances(str, arrayList, i);
        }

        protected ArrayList<Attribute> parseAttribute(ArrayList<Attribute> arrayList) throws IOException {
            getNextToken();
            String str = this.m_Tokenizer.sval;
            getNextToken();
            if (this.m_Tokenizer.ttype != -3) {
                ArrayList arrayList2 = new ArrayList();
                this.m_Tokenizer.pushBack();
                if (this.m_Tokenizer.nextToken() != 123) {
                    errorMessage("{ expected at beginning of enumeration");
                }
                while (this.m_Tokenizer.nextToken() != 125) {
                    if (this.m_Tokenizer.ttype == 10) {
                        errorMessage("} expected at end of enumeration");
                    } else {
                        arrayList2.add(this.m_Tokenizer.sval);
                    }
                }
                arrayList.add(new Attribute(str, arrayList2, arrayList.size()));
            } else if (this.m_Tokenizer.sval.equalsIgnoreCase(Attribute.ARFF_ATTRIBUTE_REAL) || this.m_Tokenizer.sval.equalsIgnoreCase(Attribute.ARFF_ATTRIBUTE_INTEGER) || this.m_Tokenizer.sval.equalsIgnoreCase("numeric")) {
                arrayList.add(new Attribute(str, arrayList.size()));
                readTillEOL();
            } else if (this.m_Tokenizer.sval.equalsIgnoreCase("string")) {
                arrayList.add(new Attribute(str, (ArrayList) null, arrayList.size()));
                readTillEOL();
            } else if (this.m_Tokenizer.sval.equalsIgnoreCase("date")) {
                String str2 = null;
                if (this.m_Tokenizer.nextToken() != 10) {
                    if (this.m_Tokenizer.ttype != -3 && this.m_Tokenizer.ttype != 39 && this.m_Tokenizer.ttype != 34) {
                        errorMessage("not a valid date format");
                    }
                    str2 = this.m_Tokenizer.sval;
                    readTillEOL();
                } else {
                    this.m_Tokenizer.pushBack();
                }
                arrayList.add(new Attribute(str, str2, arrayList.size()));
            } else if (this.m_Tokenizer.sval.equalsIgnoreCase("relational")) {
                readTillEOL();
                ArrayList<Attribute> arrayList3 = new ArrayList<>();
                getFirstToken();
                if (this.m_Tokenizer.ttype == -1) {
                    errorMessage("premature end of file");
                }
                while (true) {
                    if (Attribute.ARFF_ATTRIBUTE.equalsIgnoreCase(this.m_Tokenizer.sval)) {
                        arrayList3 = parseAttribute(arrayList3);
                    } else {
                        if (Attribute.ARFF_END_SUBRELATION.equalsIgnoreCase(this.m_Tokenizer.sval)) {
                            break;
                        }
                        errorMessage("declaration of subrelation " + str + " must be terminated by @end " + str);
                    }
                }
                getNextToken();
                if (!str.equalsIgnoreCase(this.m_Tokenizer.sval)) {
                    errorMessage("declaration of subrelation " + str + " must be terminated by @end " + str);
                }
                Instances instances = new Instances(str, arrayList3, 0);
                arrayList = arrayList;
                arrayList.add(new Attribute(str, instances, arrayList.size()));
            } else {
                errorMessage("no valid attribute type or invalid enumeration");
            }
            getLastToken(false);
            getFirstToken();
            if (this.m_Tokenizer.ttype == -1) {
                errorMessage("premature end of file");
            }
            return arrayList;
        }

        protected void readTillEOL() throws IOException {
            do {
            } while (this.m_Tokenizer.nextToken() != 10);
            this.m_Tokenizer.pushBack();
        }

        public Instances getStructure() {
            return new Instances(this.m_Data, 0);
        }

        public Instances getData() {
            return this.m_Data;
        }

        @Override // weka.core.RevisionHandler
        public String getRevision() {
            return RevisionUtils.extract("$Revision: 9515 $");
        }
    }

    public String globalInfo() {
        return "Reads a source that is in arff (attribute relation file format) format. ";
    }

    @Override // weka.core.converters.FileSourcedConverter
    public String getFileExtension() {
        return FILE_EXTENSION;
    }

    @Override // weka.core.converters.FileSourcedConverter
    public String[] getFileExtensions() {
        return new String[]{FILE_EXTENSION, FILE_EXTENSION_COMPRESSED};
    }

    @Override // weka.core.converters.FileSourcedConverter
    public String getFileDescription() {
        return "Arff data files";
    }

    @Override // weka.core.converters.AbstractFileLoader, weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public void reset() throws IOException {
        this.m_structure = null;
        this.m_ArffReader = null;
        setRetrieval(0);
        if (this.m_File != null && !new File(this.m_File).isDirectory()) {
            setFile(new File(this.m_File));
        } else {
            if (this.m_URL == null || this.m_URL.equals("http://")) {
                return;
            }
            setURL(this.m_URL);
        }
    }

    public void setSource(URL url) throws IOException {
        this.m_structure = null;
        setRetrieval(0);
        setSource(url.openStream());
        this.m_URL = url.toString();
        this.m_File = null;
    }

    @Override // weka.core.converters.AbstractFileLoader, weka.core.converters.FileSourcedConverter
    public File retrieveFile() {
        return new File(this.m_File);
    }

    @Override // weka.core.converters.AbstractFileLoader, weka.core.converters.FileSourcedConverter
    public void setFile(File file) throws IOException {
        this.m_File = file.getPath();
        setSource(file);
    }

    @Override // weka.core.converters.URLSourcedLoader
    public void setURL(String str) throws IOException {
        this.m_URL = str;
        setSource(new URL(str));
    }

    @Override // weka.core.converters.URLSourcedLoader
    public String retrieveURL() {
        return this.m_URL;
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public void setSource(InputStream inputStream) throws IOException {
        this.m_File = new File(System.getProperty("user.dir")).getAbsolutePath();
        this.m_URL = "http://";
        this.m_sourceReader = new BufferedReader(new InputStreamReader(inputStream));
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instances getStructure() throws IOException {
        if (this.m_structure == null) {
            if (this.m_sourceReader == null) {
                throw new IOException("No source has been specified");
            }
            try {
                this.m_ArffReader = new ArffReader(this.m_sourceReader, 1, getRetrieval() == 1);
                this.m_structure = this.m_ArffReader.getStructure();
            } catch (Exception e) {
                throw new IOException("Unable to determine structure as arff (Reason: " + e.toString() + ").");
            }
        }
        return new Instances(this.m_structure, 0);
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instances getDataSet() throws IOException {
        if (this.m_sourceReader == null) {
            throw new IOException("No source has been specified");
        }
        if (getRetrieval() == 2) {
            throw new IOException("Cannot mix getting Instances in both incremental and batch modes");
        }
        setRetrieval(1);
        if (this.m_structure == null) {
            getStructure();
        }
        Instances instances = new Instances(this.m_structure, 0);
        while (true) {
            Instance readInstance = this.m_ArffReader.readInstance(this.m_structure);
            if (readInstance == null) {
                this.m_sourceReader.close();
                return instances;
            }
            instances.add(readInstance);
        }
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instance getNextInstance(Instances instances) throws IOException {
        this.m_structure = instances;
        if (getRetrieval() == 1) {
            throw new IOException("Cannot mix getting Instances in both incremental and batch modes");
        }
        setRetrieval(2);
        Instance instance = null;
        if (this.m_sourceReader != null) {
            instance = this.m_ArffReader.readInstance(this.m_structure);
        }
        if (this.m_sourceReader != null && instance == null) {
            try {
                this.m_sourceReader.close();
                this.m_sourceReader = null;
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return instance;
    }

    @Override // weka.core.RevisionHandler
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 9515 $");
    }

    public static void main(String[] strArr) {
        runFileLoader(new ArffLoader(), strArr);
    }
}
