package opennlp.tools.formats.brat;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
import java.util.stream.Collectors;
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.formats.AbstractSampleStreamFactory;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.sentdetect.NewlineSentenceDetector;
import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.ObjectStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizerFactory;

/* loaded from: input_file:WEB-INF/lib/opennlp-tools-1.9.4.jar:opennlp/tools/formats/brat/BratNameSampleStreamFactory.class */
public class BratNameSampleStreamFactory extends AbstractSampleStreamFactory<NameSample> {

    /* loaded from: input_file:WEB-INF/lib/opennlp-tools-1.9.4.jar:opennlp/tools/formats/brat/BratNameSampleStreamFactory$Parameters.class */
    interface Parameters {
        @ArgumentParser.ParameterDescription(valueName = "bratDataDir", description = "location of brat data dir")
        File getBratDataDir();

        @ArgumentParser.ParameterDescription(valueName = "annConfFile")
        File getAnnotationConfig();

        @ArgumentParser.OptionalParameter
        @ArgumentParser.ParameterDescription(valueName = "modelFile")
        File getSentenceDetectorModel();

        @ArgumentParser.OptionalParameter
        @ArgumentParser.ParameterDescription(valueName = "modelFile")
        File getTokenizerModel();

        @ArgumentParser.OptionalParameter
        @ArgumentParser.ParameterDescription(valueName = "name")
        String getRuleBasedTokenizer();

        @ArgumentParser.OptionalParameter(defaultValue = "false")
        @ArgumentParser.ParameterDescription(valueName = "value")
        Boolean getRecursive();

        @ArgumentParser.OptionalParameter
        @ArgumentParser.ParameterDescription(valueName = "names")
        String getNameTypes();
    }

    protected BratNameSampleStreamFactory() {
        super(Parameters.class);
    }

    private boolean notNull(Object... objArr) {
        for (Object obj : objArr) {
            if (obj == null) {
                return false;
            }
        }
        return true;
    }

    @Override // opennlp.tools.cmdline.ObjectStreamFactory
    public ObjectStream<NameSample> create(String[] strArr) {
        SentenceDetector sentenceDetectorME;
        Parameters parameters = (Parameters) ArgumentParser.parse(strArr, Parameters.class);
        if (notNull(parameters.getRuleBasedTokenizer(), parameters.getTokenizerModel())) {
            throw new TerminateToolException(-1, "Either use rule based or statistical tokenizer!");
        }
        try {
            try {
                BratDocumentStream bratDocumentStream = new BratDocumentStream(AnnotationConfiguration.parse(parameters.getAnnotationConfig()), parameters.getBratDataDir(), parameters.getRecursive().booleanValue(), null);
                if (parameters.getSentenceDetectorModel() != null) {
                    try {
                        sentenceDetectorME = new SentenceDetectorME(new SentenceModel(parameters.getSentenceDetectorModel()));
                    } catch (IOException e) {
                        throw new TerminateToolException(-1, "Failed to load sentence detector model!", e);
                    }
                } else {
                    sentenceDetectorME = new NewlineSentenceDetector();
                }
                Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
                if (parameters.getTokenizerModel() != null) {
                    try {
                        tokenizer = new TokenizerME(new TokenizerModel(parameters.getTokenizerModel()));
                    } catch (IOException e2) {
                        throw new TerminateToolException(-1, "Failed to load tokenizer model!", e2);
                    }
                } else if (parameters.getRuleBasedTokenizer() != null) {
                    String ruleBasedTokenizer = parameters.getRuleBasedTokenizer();
                    if ("simple".equals(ruleBasedTokenizer)) {
                        tokenizer = SimpleTokenizer.INSTANCE;
                    } else {
                        if (!WhitespaceTokenizerFactory.NAME.equals(ruleBasedTokenizer)) {
                            throw new TerminateToolException(-1, "Unkown tokenizer: " + ruleBasedTokenizer);
                        }
                        tokenizer = WhitespaceTokenizer.INSTANCE;
                    }
                }
                Set set = null;
                if (parameters.getNameTypes() != null) {
                    String[] split = parameters.getNameTypes().split(",");
                    if (split.length > 0) {
                        set = (Set) Arrays.stream(split).map((v0) -> {
                            return v0.trim();
                        }).collect(Collectors.toSet());
                    }
                }
                return new BratNameSampleStream(sentenceDetectorME, tokenizer, bratDocumentStream, (Set<String>) set);
            } catch (IOException e3) {
                throw new TerminateToolException(-1, e3.getMessage());
            }
        } catch (IOException e4) {
            throw new TerminateToolException(1, "Failed to parse annotation.conf file!");
        }
    }

    public static void registerFactory() {
        StreamFactoryRegistry.registerFactory(NameSample.class, "brat", new BratNameSampleStreamFactory());
    }
}
