package com.mandarintools;

import edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenImpl;
import edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenList;
import edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenListImpl;
import edu.harvard.wcfia.yoshikoder.document.tokenizer.Tokenizer;
import java.text.BreakIterator;
import java.util.Locale;

/* loaded from: input_file:com/mandarintools/BITokenizerImpl.class */
public class BITokenizerImpl implements Tokenizer {
    private Locale locale;
    private BreakIterator wordIterator;

    public BITokenizerImpl(Locale locale) {
        this.locale = locale;
    }

    public BITokenizerImpl() {
        this(Locale.getDefault());
    }

    @Override // edu.harvard.wcfia.yoshikoder.document.tokenizer.Tokenizer
    public Locale[] getLocales() {
        return new Locale[]{this.locale};
    }

    public Locale getLocale() {
        return this.locale;
    }

    public void setLocale(Locale locale) {
        this.locale = locale;
    }

    @Override // edu.harvard.wcfia.yoshikoder.document.tokenizer.Tokenizer
    public TokenList getTokens(String str) {
        TokenListImpl tokenListImpl = new TokenListImpl();
        this.wordIterator = BreakIterator.getWordInstance(this.locale);
        this.wordIterator.setText(str);
        int first = this.wordIterator.first();
        int next = this.wordIterator.next();
        while (next != -1) {
            int i = first;
            String substring = str.substring(first, next);
            int length = i + substring.length();
            first = next;
            try {
                next = this.wordIterator.next();
                if (Character.isLetterOrDigit(substring.charAt(0))) {
                    tokenListImpl.add(new TokenImpl(substring, i, length));
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return tokenListImpl;
    }
}
