package com.aliasi.chunk;

import com.aliasi.symbol.SymbolTable;
import com.aliasi.tag.StringTagging;
import com.aliasi.tag.TagLattice;
import com.aliasi.tag.Tagging;
import com.aliasi.tokenizer.Tokenizer;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.BoundedPriorityQueue;
import com.aliasi.util.Iterators;
import com.aliasi.util.Math;
import com.aliasi.util.Scored;
import com.aliasi.util.ScoredObject;
import com.aliasi.util.Strings;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

/* loaded from: input_file:lib/lingpipe-4.1.0.jar:com/aliasi/chunk/BioTagChunkCodec.class */
public class BioTagChunkCodec extends AbstractTagChunkCodec implements Serializable {
    static final long serialVersionUID = -4597052413756614276L;
    private final String mBeginTagPrefix;
    private final String mInTagPrefix;
    private final String mOutTag;
    public static final String OUT_TAG = "O";
    public static final String BEGIN_TAG_PREFIX = "B_";
    public static final String IN_TAG_PREFIX = "I_";
    static final int PREFIX_LENGTH = 2;

    /* loaded from: input_file:lib/lingpipe-4.1.0.jar:com/aliasi/chunk/BioTagChunkCodec$NBestIterator.class */
    static class NBestIterator extends Iterators.Buffered<Chunk> {
        private final String mBeginTagPrefix;
        private final String mInTagPrefix;
        private final String mOutTag;
        private final TagLattice<String> mLattice;
        private final int[] mTokenStarts;
        private final int[] mTokenEnds;
        private final BoundedPriorityQueue<Chunk> mChunkQueue;
        private final BoundedPriorityQueue<NBestState> mStateQueue;
        private final int mMaxResults;
        private final String[] mChunkTypes;
        private final int[] mBeginTagIds;
        private final int[] mInTagIds;
        private final int mOutTagId;
        private int mNumResults = 0;

        /* JADX INFO: Access modifiers changed from: package-private */
        /* loaded from: input_file:lib/lingpipe-4.1.0.jar:com/aliasi/chunk/BioTagChunkCodec$NBestIterator$NBestState.class */
        public static class NBestState implements Scored {
            private final double mScore;
            private final int mPos;
            private final int mEndPos;
            private int mChunkId;

            public NBestState(double d, int i, int i2, int i3) {
                this.mScore = d;
                this.mPos = i;
                this.mEndPos = i2;
                this.mChunkId = i3;
            }

            @Override // com.aliasi.util.Scored
            public double score() {
                return this.mScore;
            }

            public String toString() {
                return "score=" + this.mScore + " pos=" + this.mPos + " end=" + this.mEndPos + " id=" + this.mChunkId;
            }
        }

        public NBestIterator(TagLattice<String> tagLattice, int[] iArr, int[] iArr2, int i, String str, String str2, String str3) {
            this.mBeginTagPrefix = str;
            this.mInTagPrefix = str2;
            this.mOutTag = str3;
            this.mLattice = tagLattice;
            this.mTokenStarts = iArr;
            this.mTokenEnds = iArr2;
            this.mMaxResults = i;
            HashSet hashSet = new HashSet();
            SymbolTable tagSymbolTable = tagLattice.tagSymbolTable();
            for (int i2 = 0; i2 < tagLattice.numTags(); i2++) {
                if (tagLattice.tag(i2).startsWith(this.mInTagPrefix)) {
                    hashSet.add(tagLattice.tag(i2).substring(this.mInTagPrefix.length()));
                }
            }
            this.mChunkTypes = (String[]) hashSet.toArray(Strings.EMPTY_STRING_ARRAY);
            this.mBeginTagIds = new int[this.mChunkTypes.length];
            this.mInTagIds = new int[this.mChunkTypes.length];
            for (int i3 = 0; i3 < this.mChunkTypes.length; i3++) {
                this.mBeginTagIds[i3] = tagSymbolTable.symbolToID(this.mBeginTagPrefix + this.mChunkTypes[i3]);
                this.mInTagIds[i3] = tagSymbolTable.symbolToID(this.mInTagPrefix + this.mChunkTypes[i3]);
            }
            this.mOutTagId = tagSymbolTable.symbolToID(this.mOutTag);
            this.mStateQueue = new BoundedPriorityQueue<>(ScoredObject.comparator(), i);
            this.mChunkQueue = new BoundedPriorityQueue<>(ScoredObject.comparator(), i);
            double[] dArr = new double[tagLattice.numTags() - 1];
            for (int i4 = 0; i4 < this.mChunkTypes.length; i4++) {
                int numTokens = tagLattice.numTokens() - 1;
                if (numTokens >= 0) {
                    String str4 = this.mChunkTypes[i4];
                    int i5 = this.mInTagIds[i4];
                    int i6 = this.mBeginTagIds[i4];
                    this.mChunkQueue.offer(ChunkFactory.createChunk(this.mTokenStarts[numTokens], this.mTokenEnds[numTokens], str4, tagLattice.logProbability(numTokens, i6)));
                    if (numTokens > 0) {
                        this.mStateQueue.offer(new NBestState(tagLattice.logBackward(numTokens, i5), numTokens, numTokens, i4));
                    }
                    for (int i7 = 0; i7 < numTokens; i7++) {
                        this.mChunkQueue.offer(ChunkFactory.createChunk(this.mTokenStarts[i7], this.mTokenEnds[i7], str4, (nonContLogSumExp(i4, i7, i6, tagLattice, dArr) + tagLattice.logForward(i7, i6)) - tagLattice.logZ()));
                    }
                    for (int i8 = 1; i8 < numTokens; i8++) {
                        this.mStateQueue.offer(new NBestState(nonContLogSumExp(i4, i8, i5, tagLattice, dArr), i8, i8, i4));
                    }
                }
            }
        }

        double nonContLogSumExp(int i, int i2, int i3, TagLattice<String> tagLattice, double[] dArr) {
            dArr[0] = tagLattice.logBackward(i2 + 1, this.mOutTagId) + tagLattice.logTransition(i2, i3, this.mOutTagId);
            int i4 = 1;
            for (int i5 = 0; i5 < this.mBeginTagIds.length; i5++) {
                int i6 = i4;
                i4++;
                dArr[i6] = tagLattice.logBackward(i2 + 1, this.mBeginTagIds[i5]) + tagLattice.logTransition(i2, i3, this.mBeginTagIds[i5]);
                if (i != i5) {
                    i4++;
                    dArr[i4] = tagLattice.logBackward(i2 + 1, this.mInTagIds[i5]) + tagLattice.logTransition(i2, i3, this.mInTagIds[i5]);
                }
            }
            return Math.logSumOfExponentials(dArr);
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // com.aliasi.util.Iterators.Buffered
        public Chunk bufferNext() {
            if (this.mNumResults >= this.mMaxResults) {
                return null;
            }
            search();
            Chunk poll = this.mChunkQueue.poll();
            if (poll == null) {
                return null;
            }
            this.mNumResults++;
            return poll;
        }

        void search() {
            while (!this.mStateQueue.isEmpty()) {
                if (!this.mChunkQueue.isEmpty() && this.mChunkQueue.peek().score() >= this.mStateQueue.peek().score()) {
                    return;
                } else {
                    extend(this.mStateQueue.poll());
                }
            }
        }

        void extend(NBestState nBestState) {
            int i = this.mBeginTagIds[nBestState.mChunkId];
            int i2 = this.mInTagIds[nBestState.mChunkId];
            this.mChunkQueue.offer(ChunkFactory.createChunk(this.mTokenStarts[nBestState.mPos - 1], this.mTokenEnds[nBestState.mEndPos], this.mChunkTypes[nBestState.mChunkId], ((nBestState.score() + this.mLattice.logForward(nBestState.mPos - 1, i)) + this.mLattice.logTransition(nBestState.mPos - 1, i, i2)) - this.mLattice.logZ()));
            if (nBestState.mPos > 1) {
                this.mStateQueue.offer(new NBestState(nBestState.score() + this.mLattice.logTransition(nBestState.mPos - 1, i2, i2), nBestState.mPos - 1, nBestState.mEndPos, nBestState.mChunkId));
            }
        }
    }

    /* loaded from: input_file:lib/lingpipe-4.1.0.jar:com/aliasi/chunk/BioTagChunkCodec$Serializer.class */
    static class Serializer extends AbstractExternalizable {
        static final long serialVersionUID = -2473387657606045149L;
        private final BioTagChunkCodec mCodec;

        public Serializer() {
            this(null);
        }

        public Serializer(BioTagChunkCodec bioTagChunkCodec) {
            this.mCodec = bioTagChunkCodec;
        }

        @Override // com.aliasi.util.AbstractExternalizable, java.io.Externalizable
        public void writeExternal(ObjectOutput objectOutput) throws IOException {
            objectOutput.writeBoolean(this.mCodec.mEnforceConsistency);
            objectOutput.writeObject(Boolean.TRUE);
            objectOutput.writeObject(this.mCodec.mTokenizerFactory != null ? this.mCodec.mTokenizerFactory : Boolean.FALSE);
            objectOutput.writeUTF(this.mCodec.mBeginTagPrefix);
            objectOutput.writeUTF(this.mCodec.mInTagPrefix);
            objectOutput.writeUTF(this.mCodec.mOutTag);
        }

        @Override // com.aliasi.util.AbstractExternalizable
        public Object read(ObjectInput objectInput) throws IOException, ClassNotFoundException {
            boolean readBoolean = objectInput.readBoolean();
            Object readObject = objectInput.readObject();
            if (!Boolean.TRUE.equals(readObject)) {
                return new BioTagChunkCodec(Boolean.FALSE.equals(readObject) ? null : (TokenizerFactory) readObject, readBoolean);
            }
            Object readObject2 = objectInput.readObject();
            return new BioTagChunkCodec(Boolean.FALSE.equals(readObject2) ? null : (TokenizerFactory) readObject2, readBoolean, objectInput.readUTF(), objectInput.readUTF(), objectInput.readUTF());
        }
    }

    public BioTagChunkCodec() {
        this(null, false);
    }

    public BioTagChunkCodec(TokenizerFactory tokenizerFactory, boolean z) {
        this(tokenizerFactory, z, BEGIN_TAG_PREFIX, IN_TAG_PREFIX, "O");
    }

    public BioTagChunkCodec(TokenizerFactory tokenizerFactory, boolean z, String str, String str2, String str3) {
        super(tokenizerFactory, z);
        this.mOutTag = str3;
        this.mBeginTagPrefix = str;
        this.mInTagPrefix = str2;
    }

    @Override // com.aliasi.chunk.AbstractTagChunkCodec
    public boolean enforceConsistency() {
        return this.mEnforceConsistency;
    }

    @Override // com.aliasi.chunk.TagChunkCodec
    public Set<String> tagSet(Set<String> set) {
        HashSet hashSet = new HashSet();
        hashSet.add(this.mOutTag);
        for (String str : set) {
            hashSet.add(this.mBeginTagPrefix + str);
            hashSet.add(this.mInTagPrefix + str);
        }
        return hashSet;
    }

    @Override // com.aliasi.chunk.TagChunkCodec
    public boolean legalTagSubSequence(String... strArr) {
        if (strArr.length == 0) {
            return true;
        }
        if (strArr.length == 1) {
            return legalTagSingle(strArr[0]);
        }
        for (int i = 1; i < strArr.length; i++) {
            if (!legalTagPair(strArr[i - 1], strArr[i])) {
                return false;
            }
        }
        return true;
    }

    @Override // com.aliasi.chunk.TagChunkCodec
    public boolean legalTags(String... strArr) {
        return legalTagSubSequence(strArr) && (strArr.length == 0 || !strArr[0].startsWith(this.mInTagPrefix));
    }

    @Override // com.aliasi.chunk.TagChunkCodec
    public Chunking toChunking(StringTagging stringTagging) {
        enforceConsistency(stringTagging);
        ChunkingImpl chunkingImpl = new ChunkingImpl(stringTagging.characters());
        int i = 0;
        while (i < stringTagging.size()) {
            String tag = stringTagging.tag(i);
            if (!this.mOutTag.equals(tag)) {
                if (!tag.startsWith(this.mBeginTagPrefix)) {
                    if (i == 0) {
                        throw new IllegalArgumentException("First tag must be out or begin. Found tagging.tag(0)=" + stringTagging.tag(0));
                    }
                    throw new IllegalArgumentException("Illegal tag sequence. tagging.tag(" + (i - 1) + ")=" + stringTagging.tag(i - 1) + " tagging.tag(" + i + ")=" + stringTagging.tag(i));
                }
                String substring = tag.substring(2);
                int i2 = stringTagging.tokenStart(i);
                String str = this.mInTagPrefix + substring;
                while (i + 1 < stringTagging.size() && str.equals(stringTagging.tag(i + 1))) {
                    i++;
                }
                chunkingImpl.add(ChunkFactory.createChunk(i2, stringTagging.tokenEnd(i), substring));
            }
            i++;
        }
        return chunkingImpl;
    }

    @Override // com.aliasi.chunk.TagChunkCodec
    public StringTagging toStringTagging(Chunking chunking) {
        if (this.mTokenizerFactory == null) {
            throw new UnsupportedOperationException("Tokenizer factory must be non-null to convert chunking to tagging.");
        }
        enforceConsistency(chunking);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        ArrayList arrayList4 = new ArrayList();
        toTagging(chunking, arrayList, arrayList2, arrayList3, arrayList4);
        return new StringTagging(arrayList, arrayList2, chunking.charSequence(), arrayList3, arrayList4);
    }

    @Override // com.aliasi.chunk.TagChunkCodec
    public Tagging<String> toTagging(Chunking chunking) {
        if (this.mTokenizerFactory == null) {
            throw new UnsupportedOperationException("Tokenizer factory must be non-null to convert chunking to tagging.");
        }
        enforceConsistency(chunking);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        toTagging(chunking, arrayList, arrayList2, null, null);
        return new Tagging<>(arrayList, arrayList2);
    }

    @Override // com.aliasi.chunk.TagChunkCodec
    public Iterator<Chunk> nBestChunks(TagLattice<String> tagLattice, int[] iArr, int[] iArr2, int i) {
        if (i < 0) {
            throw new IllegalArgumentException("Require non-negative number of results.");
        }
        if (iArr.length != tagLattice.numTokens()) {
            throw new IllegalArgumentException("Token starts must line up with num tokens. Found tokenStarts.length=" + iArr.length + " lattice.numTokens()=" + tagLattice.numTokens());
        }
        if (iArr2.length != tagLattice.numTokens()) {
            throw new IllegalArgumentException("Token ends must line up with num tokens. Found tokenEnds.length=" + iArr2.length + " lattice.numTokens()=" + tagLattice.numTokens());
        }
        for (int i2 = 1; i2 < iArr.length; i2++) {
            if (iArr[i2 - 1] > iArr[i2]) {
                throw new IllegalArgumentException("Token starts must be in order. Found tokenStarts[" + (i2 - 1) + "]=" + iArr[i2 - 1] + " tokenStarts[" + i2 + "]=" + iArr[i2]);
            }
            if (iArr2[i2 - 1] > iArr2[i2]) {
                throw new IllegalArgumentException("Token ends must be in order. Found tokenEnds[" + (i2 - 1) + "]=" + iArr2[i2 - 1] + " tokenEnds[" + i2 + "]=" + iArr2[i2]);
            }
        }
        if (tagLattice.numTags() == 0) {
            return Iterators.empty();
        }
        for (int i3 = 0; i3 < iArr.length; i3++) {
            if (iArr[i3] > iArr2[i3]) {
                throw new IllegalArgumentException("Token ends must not precede starts. Found tokenStarts[" + i3 + "]=" + iArr[i3] + " tokenEnds[" + i3 + "]=" + iArr2[i3]);
            }
        }
        return new NBestIterator(tagLattice, iArr, iArr2, i, this.mBeginTagPrefix, this.mInTagPrefix, this.mOutTag);
    }

    public String toString() {
        return "BioTagChunkCodec";
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @Override // com.aliasi.chunk.AbstractTagChunkCodec
    public void enforceConsistency(StringTagging stringTagging) {
        if (this.mEnforceConsistency) {
            StringBuilder sb = new StringBuilder();
            if (!isDecodable(stringTagging, sb)) {
                throw new IllegalArgumentException(sb.toString());
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @Override // com.aliasi.chunk.AbstractTagChunkCodec
    public void enforceConsistency(Chunking chunking) {
        if (this.mEnforceConsistency) {
            StringBuilder sb = new StringBuilder();
            if (!isEncodable(chunking, sb)) {
                throw new IllegalArgumentException(sb.toString());
            }
        }
    }

    boolean legalTagSingle(String str) {
        return this.mOutTag.equals(str) || str.startsWith(this.mBeginTagPrefix) || str.startsWith(this.mInTagPrefix);
    }

    boolean legalTagPair(String str, String str2) {
        if (!legalTagSingle(str) || !legalTagSingle(str2)) {
            return false;
        }
        if (str2.startsWith(this.mInTagPrefix)) {
            return str.endsWith(str2.substring(this.mInTagPrefix.length()));
        }
        return true;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void toTagging(Chunking chunking, List<String> list, List<String> list2, List<Integer> list3, List<Integer> list4) {
        char[] charArray = Strings.toCharArray(chunking.charSequence());
        Set<Chunk> chunkSet = chunking.chunkSet();
        Chunk[] chunkArr = (Chunk[]) chunkSet.toArray(new Chunk[chunkSet.size()]);
        Arrays.sort(chunkArr, Chunk.TEXT_ORDER_COMPARATOR);
        int i = 0;
        for (Chunk chunk : chunkArr) {
            String type = chunk.type();
            int start = chunk.start();
            int end = chunk.end();
            outBioTag(charArray, i, start, list, list2, list3, list4);
            chunkBioTag(charArray, type, start, end, list, list2, list3, list4);
            i = end;
        }
        outBioTag(charArray, i, charArray.length, list, list2, list3, list4);
    }

    void outBioTag(char[] cArr, int i, int i2, List<String> list, List<String> list2, List<Integer> list3, List<Integer> list4) {
        Tokenizer tokenizer = this.mTokenizerFactory.tokenizer(cArr, i, i2 - i);
        while (true) {
            String nextToken = tokenizer.nextToken();
            if (nextToken == null) {
                return;
            }
            list.add(nextToken);
            addOffsets(tokenizer, i, list3, list4);
            list2.add(this.mOutTag);
        }
    }

    void chunkBioTag(char[] cArr, String str, int i, int i2, List<String> list, List<String> list2, List<Integer> list3, List<Integer> list4) {
        int i3 = i2 - i;
        Tokenizer tokenizer = this.mTokenizerFactory.tokenizer(cArr, i, i3);
        String nextToken = tokenizer.nextToken();
        if (nextToken == null) {
            throw new IllegalArgumentException("Chunks must contain at least one token. Found chunk with yield=|" + new String(cArr, i, i3) + "|");
        }
        list.add(nextToken);
        addOffsets(tokenizer, i, list3, list4);
        list2.add(this.mBeginTagPrefix + str);
        String str2 = this.mInTagPrefix + str;
        while (true) {
            String nextToken2 = tokenizer.nextToken();
            if (nextToken2 == null) {
                return;
            }
            list.add(nextToken2);
            addOffsets(tokenizer, i, list3, list4);
            list2.add(str2);
        }
    }

    void addOffsets(Tokenizer tokenizer, int i, List<Integer> list, List<Integer> list2) {
        if (list == null) {
            return;
        }
        int lastTokenStartPosition = tokenizer.lastTokenStartPosition() + i;
        int lastTokenEndPosition = tokenizer.lastTokenEndPosition() + i;
        list.add(Integer.valueOf(lastTokenStartPosition));
        list2.add(Integer.valueOf(lastTokenEndPosition));
    }

    Object writeReplace() {
        return new Serializer(this);
    }

    @Override // com.aliasi.chunk.AbstractTagChunkCodec, com.aliasi.chunk.TagChunkCodec
    public /* bridge */ /* synthetic */ boolean isDecodable(StringTagging stringTagging) {
        return super.isDecodable(stringTagging);
    }

    @Override // com.aliasi.chunk.AbstractTagChunkCodec, com.aliasi.chunk.TagChunkCodec
    public /* bridge */ /* synthetic */ boolean isEncodable(Chunking chunking) {
        return super.isEncodable(chunking);
    }

    @Override // com.aliasi.chunk.AbstractTagChunkCodec
    public /* bridge */ /* synthetic */ TokenizerFactory tokenizerFactory() {
        return super.tokenizerFactory();
    }
}
