package com.aliasi.suffixarray;

import com.aliasi.tokenizer.Tokenization;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.Strings;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:lib/lingpipe-4.1.0.jar:com/aliasi/suffixarray/DocumentTokenSuffixArray.class */
public class DocumentTokenSuffixArray {
    private final TokenSuffixArray mTsa;
    private final int[] mDocStarts;
    private final String[] mDocIds;
    private final Map<String, Integer> mDocIdToIndex;

    public DocumentTokenSuffixArray(Map<String, String> map, TokenizerFactory tokenizerFactory, int i, String str) {
        String str2 = " " + str + " ";
        String[] strArr = tokenizerFactory.tokenizer(str2.toCharArray(), 0, str2.length()).tokenize();
        if (strArr.length != 1 || !strArr[0].equals(str)) {
            throw new IllegalArgumentException("Tokenizer factory must convert boundary token to self. Found documentBoundaryToken=|" + str + "| tokenizerFactory=" + tokenizerFactory + " result of tokenizing boundary token=|" + Arrays.asList(strArr) + "|");
        }
        this.mDocIds = (String[]) map.keySet().toArray(Strings.EMPTY_STRING_ARRAY);
        Arrays.sort(this.mDocIds);
        ArrayList arrayList = new ArrayList(map.size());
        this.mDocIdToIndex = new HashMap(map.size());
        map.entrySet();
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        for (String str3 : this.mDocIds) {
            String str4 = map.get(str3);
            int i5 = i4;
            i4++;
            this.mDocIdToIndex.put(str3, Integer.valueOf(i5));
            arrayList.add(Integer.valueOf(i2));
            i2 += tokenCount(tokenizerFactory, str4) + 1;
            i3 += str4.length() + str.length() + 2;
        }
        this.mDocStarts = new int[arrayList.size()];
        for (int i6 = 0; i6 < this.mDocStarts.length; i6++) {
            this.mDocStarts[i6] = ((Integer) arrayList.get(i6)).intValue();
        }
        char[] cArr = new char[i3];
        int i7 = 0;
        for (String str5 : this.mDocIds) {
            String str6 = map.get(str5);
            for (int i8 = 0; i8 < str6.length(); i8++) {
                int i9 = i7;
                i7++;
                cArr[i9] = str6.charAt(i8);
            }
            int i10 = i7;
            int i11 = i7 + 1;
            cArr[i10] = ' ';
            for (int i12 = 0; i12 < str.length(); i12++) {
                int i13 = i11;
                i11++;
                cArr[i13] = str.charAt(i12);
            }
            int i14 = i11;
            i7 = i11 + 1;
            cArr[i14] = ' ';
        }
        this.mTsa = new TokenSuffixArray(new Tokenization(cArr, 0, cArr.length, tokenizerFactory), i, str);
    }

    public TokenSuffixArray suffixArray() {
        return this.mTsa;
    }

    public String textPositionToDocId(int i) {
        if (i < 0 || i > this.mTsa.tokenization().text().length()) {
            throw new IndexOutOfBoundsException("Position must be >= 0 and <= text.length=" + this.mTsa.tokenization().text().length() + " Found textPosition=" + i);
        }
        return this.mDocIds[largestWithoutGoingOver(this.mDocStarts, i)];
    }

    public String documentText(String str) {
        String documentBoundaryToken = this.mTsa.documentBoundaryToken();
        String text = this.mTsa.tokenization().text();
        int intValue = this.mDocIdToIndex.get(str).intValue();
        return text.substring(this.mDocStarts[intValue], ((intValue + 1 == this.mDocStarts.length ? text.length() : this.mDocStarts[intValue + 1]) - documentBoundaryToken.length()) - 2);
    }

    public int numDocuments() {
        return this.mDocStarts.length;
    }

    public Set<String> documentNames() {
        return Collections.unmodifiableSet(this.mDocIdToIndex.keySet());
    }

    public int docStartToken(String str) {
        int binarySearch = Arrays.binarySearch(this.mDocIds, str);
        if (binarySearch < 0) {
            return -1;
        }
        return this.mDocStarts[binarySearch];
    }

    public int docEndToken(String str) {
        int binarySearch = Arrays.binarySearch(this.mDocIds, str);
        if (binarySearch < 0) {
            return -1;
        }
        return binarySearch + 1 == this.mDocIds.length ? Math.max(1, this.mTsa.suffixArrayLength() - 1) : Math.max(1, this.mDocStarts[binarySearch + 1] - 1);
    }

    public static int largestWithoutGoingOver(int[] iArr, int i) {
        int i2 = 0;
        int length = iArr.length;
        if (iArr.length == 0 || i < iArr[0]) {
            return -1;
        }
        if (i >= iArr[length - 1]) {
            return length - 1;
        }
        while (i2 + 1 < length) {
            int i3 = (i2 + length) / 2;
            if (i < iArr[i3]) {
                length = i3;
            } else {
                if (i <= iArr[i3]) {
                    return i3;
                }
                i2 = i3;
            }
        }
        return i2;
    }

    static int tokenCount(TokenizerFactory tokenizerFactory, String str) {
        int i = 0;
        Iterator<String> it = tokenizerFactory.tokenizer(str.toCharArray(), 0, str.length()).iterator();
        while (it.hasNext()) {
            it.next();
            i++;
        }
        return i;
    }
}
