package edu.usc.ict.npc.editor.model.processor.text;

import com.leuski.lucene.analysis.AlphaNumTokenFilter;
import com.leuski.lucene.analysis.ApostrophFilter;
import com.leuski.lucene.analysis.EmptyTextFilter;
import com.leuski.lucene.analysis.IrregularVerbs;
import com.leuski.lucene.analysis.StripTokenTypeFilter;
import com.leuski.lucene.analysis.UniversalTokenizer;
import com.leuski.lucene.analysis.XMLSentenceFilter;
import edu.usc.ict.dialog.model.Category;
import edu.usc.ict.dialog.model.Utterance;
import edu.usc.ict.npc.editor.model.UtteranceIndexer;
import edu.usc.ict.npc.editor.model.processor.Processor;
import edu.usc.ict.npc.editor.model.processor.ProcessorProvider;
import edu.usc.ict.saso.framebank.AbstractAnalyzer;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.KStemFilter;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;

/* loaded from: input_file:edu/usc/ict/npc/editor/model/processor/text/TextProcessorProvider.class */
public class TextProcessorProvider extends ProcessorProvider {

    /* loaded from: input_file:edu/usc/ict/npc/editor/model/processor/text/TextProcessorProvider$Analyzer.class */
    public static class Analyzer extends AbstractAnalyzer {
        private boolean mStemming = true;
        private boolean mConflatingVerbs = true;
        private String[] mStopWords = {"the", "a", "an"};

        public TokenStream tokenStream(String str, Reader reader) {
            try {
                return makeFilterPipeline(str, new UniversalTokenizer(reader));
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }

        public TokenStream tokenStream(String str, String str2) {
            return makeFilterPipeline(str, new UniversalTokenizer(str2));
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public TokenStream makeFilterPipeline(String str, TokenStream tokenStream) {
            TokenStream alphaNumTokenFilter = new AlphaNumTokenFilter(new XMLSentenceFilter(new ApostrophFilter(new LowerCaseFilter(tokenStream))));
            if (this.mStemming) {
                alphaNumTokenFilter = new KStemFilter(alphaNumTokenFilter);
            }
            if (this.mConflatingVerbs) {
                alphaNumTokenFilter = new IrregularVerbs.ConflationFilter(alphaNumTokenFilter);
            }
            if (this.mStopWords != null && this.mStopWords.length > 0) {
                alphaNumTokenFilter = new StopFilter(alphaNumTokenFilter, this.mStopWords);
            }
            return new EmptyTextFilter(new StripTokenTypeFilter(new StripTokenTypeFilter(alphaNumTokenFilter, new String[]{"beginningOfSentence", "endOfSentence"}), new String[]{"sentenceBreak"}));
        }

        public String toString() {
            return "Text";
        }
    }

    /* loaded from: input_file:edu/usc/ict/npc/editor/model/processor/text/TextProcessorProvider$Indexer.class */
    public static class Indexer<C extends Utterance> extends Processor<C> {
        public Indexer(org.apache.lucene.analysis.Analyzer analyzer, Set<Category> set) {
            super(analyzer, set);
        }

        public Indexer(org.apache.lucene.analysis.Analyzer analyzer, String str, Set<Category> set) {
            super(analyzer, str, set);
        }
    }

    /* renamed from: newInstance, reason: merged with bridge method [inline-methods] */
    public UtteranceIndexer<Utterance> m30newInstance() {
        return new Indexer(new Analyzer(), null);
    }
}
