package com.aliasi.tokenizer;

/* loaded from: input_file:lib/lingpipe-4.1.0.jar:com/aliasi/tokenizer/IndoEuropeanTokenizer.class */
class IndoEuropeanTokenizer extends Tokenizer {
    private final char[] mChars;
    private final int mLastPosition;
    private final int mStartPosition;
    private int mPosition;
    private int mTokenStart;
    private int mLastTokenIndex;
    private int mLastTokenStartPosition;
    private int mLastTokenEndPosition;

    public IndoEuropeanTokenizer(char[] cArr, int i, int i2) {
        this.mLastTokenStartPosition = -1;
        this.mLastTokenEndPosition = -1;
        if (i < 0 || i + i2 > cArr.length) {
            throw new IllegalArgumentException("Illegal slice. cs.length=" + cArr.length + " offset=" + i + " length=" + i2);
        }
        this.mChars = cArr;
        this.mPosition = i;
        this.mLastPosition = i + i2;
        this.mTokenStart = -1;
        this.mLastTokenIndex = -1;
        this.mStartPosition = i;
    }

    public IndoEuropeanTokenizer(String str) {
        this(str.toCharArray(), 0, str.length());
    }

    public IndoEuropeanTokenizer(StringBuilder sb) {
        this(sb.toString());
    }

    @Override // com.aliasi.tokenizer.Tokenizer
    public int lastTokenStartPosition() {
        return this.mLastTokenStartPosition;
    }

    @Override // com.aliasi.tokenizer.Tokenizer
    public int lastTokenEndPosition() {
        return this.mLastTokenEndPosition;
    }

    @Override // com.aliasi.tokenizer.Tokenizer
    public String nextWhitespace() {
        StringBuilder sb = new StringBuilder();
        while (hasMoreCharacters() && Character.isWhitespace(currentChar())) {
            sb.append(currentChar());
            this.mPosition++;
        }
        return sb.toString();
    }

    private static boolean isLetter(char c) {
        return Character.isLetter(c) || devanagari(c);
    }

    private static boolean devanagari(char c) {
        return c >= 2304 && c <= 2431;
    }

    @Override // com.aliasi.tokenizer.Tokenizer
    public String nextToken() {
        skipWhitespace();
        if (!hasMoreCharacters()) {
            return null;
        }
        this.mTokenStart = this.mPosition;
        this.mLastTokenIndex++;
        char[] cArr = this.mChars;
        int i = this.mPosition;
        this.mPosition = i + 1;
        char c = cArr[i];
        if (c == '.') {
            while (currentCharEquals('.')) {
                this.mPosition++;
            }
            return currentToken();
        }
        if (c == '-') {
            while (currentCharEquals('-')) {
                this.mPosition++;
            }
            return currentToken();
        }
        if (c == '=') {
            while (currentCharEquals('=')) {
                this.mPosition++;
            }
            return currentToken();
        }
        if (c == '\'') {
            if (currentCharEquals('\'')) {
                this.mPosition++;
            }
            return currentToken();
        }
        if (c != '`') {
            return isLetter(c) ? alphaNumToken() : Character.isDigit(c) ? numToken() : currentToken();
        }
        if (currentCharEquals('`')) {
            this.mPosition++;
        }
        return currentToken();
    }

    private boolean hasMoreCharacters() {
        return this.mPosition < this.mLastPosition;
    }

    private char currentChar() {
        return this.mChars[this.mPosition];
    }

    private boolean currentCharEquals(char c) {
        return hasMoreCharacters() && currentChar() == c;
    }

    private void skipWhitespace() {
        while (hasMoreCharacters() && Character.isWhitespace(currentChar())) {
            this.mPosition++;
        }
    }

    private String currentToken() {
        int i = this.mPosition - this.mTokenStart;
        this.mLastTokenStartPosition = this.mTokenStart - this.mStartPosition;
        this.mLastTokenEndPosition = this.mLastTokenStartPosition + i;
        return new String(this.mChars, this.mTokenStart, i);
    }

    private String alphaNumToken() {
        while (hasMoreCharacters() && (isLetter(currentChar()) || Character.isDigit(currentChar()))) {
            this.mPosition++;
        }
        return currentToken();
    }

    private String numToken() {
        while (hasMoreCharacters()) {
            if (isLetter(currentChar())) {
                this.mPosition++;
                return alphaNumToken();
            }
            if (!Character.isDigit(currentChar())) {
                return (currentChar() == '.' || currentChar() == ',') ? numPunctToken() : currentToken();
            }
            this.mPosition++;
        }
        return currentToken();
    }

    private String numPunctToken() {
        while (hasMoreCharacters()) {
            if (Character.isDigit(currentChar())) {
                this.mPosition++;
            } else {
                if (currentChar() != '.' && currentChar() != ',') {
                    return currentToken();
                }
                this.mPosition++;
                if (!hasMoreCharacters() || !Character.isDigit(currentChar())) {
                    this.mPosition--;
                    return currentToken();
                }
            }
        }
        return currentToken();
    }

    public static String[] tokenize(String str) {
        return new IndoEuropeanTokenizer(str).tokenize();
    }
}
