/*
 * Decompiled with CFR 0.152.
 */
package org.apache.stanbol.commons.opennlp;

import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
import opennlp.tools.chunker.Chunker;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinder;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTagger;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.InvalidFormatException;
import org.apache.commons.io.IOUtils;
import org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class OpenNLP {
    private static final String DOWNLOAD_ROOT = "http://opennlp.sourceforge.net/models-1.5/";
    private final Logger log = LoggerFactory.getLogger(this.getClass());
    private DataFileProvider dataFileProvider;
    protected Map<String, Object> models = new HashMap<String, Object>();

    public OpenNLP() {
    }

    public OpenNLP(DataFileProvider dataFileProvider) {
        this();
        this.dataFileProvider = dataFileProvider;
    }

    public SentenceModel getSentenceModel(String language) throws InvalidFormatException, IOException {
        return this.initModel(String.format("%s-sent.bin", language), SentenceModel.class);
    }

    public SentenceDetector getSentenceDetector(String language) throws IOException {
        SentenceModel sentModel = this.getSentenceModel(language);
        if (sentModel != null) {
            return new SentenceDetectorME(sentModel);
        }
        this.log.debug("No Sentence Detection Model for language '{}'", (Object)language);
        return null;
    }

    public TokenNameFinderModel getNameModel(String type, String language) throws InvalidFormatException, IOException {
        return this.initModel(String.format("%s-ner-%s.bin", language, type), TokenNameFinderModel.class);
    }

    public TokenNameFinder getNameFinder(String type, String language) throws IOException {
        TokenNameFinderModel model = this.getNameModel(type, language);
        if (model != null) {
            return new NameFinderME(model);
        }
        this.log.debug("TokenNameFinder model for type {} and langauge {} not present", (Object)type, (Object)language);
        return null;
    }

    public TokenizerModel getTokenizerModel(String language) throws InvalidFormatException, IOException {
        return this.initModel(String.format("%s-token.bin", language), TokenizerModel.class);
    }

    public Tokenizer getTokenizer(String language) {
        SimpleTokenizer tokenizer = null;
        if (language != null) {
            try {
                TokenizerModel model = this.getTokenizerModel(language);
                if (model != null) {
                    tokenizer = new TokenizerME(this.getTokenizerModel(language));
                }
            }
            catch (InvalidFormatException e) {
                this.log.warn("Unable to load Tokenizer Model for " + language + ": " + "Will use Simple Tokenizer instead", (Throwable)e);
            }
            catch (IOException e) {
                this.log.warn("Unable to load Tokenizer Model for " + language + ": " + "Will use Simple Tokenizer instead", (Throwable)e);
            }
        }
        if (tokenizer == null) {
            this.log.debug("Use Simple Tokenizer for language {}", (Object)language);
            tokenizer = SimpleTokenizer.INSTANCE;
        } else {
            this.log.debug("Use ME Tokenizer for language {}", (Object)language);
        }
        return tokenizer;
    }

    public POSModel getPartOfSpeachModel(String language) throws IOException, InvalidFormatException {
        POSModel model;
        IOException first = null;
        try {
            model = this.initModel(String.format("%s-pos-perceptron.bin", language), POSModel.class);
        }
        catch (IOException e) {
            first = e;
            this.log.warn("Unable to laod preceptron based POS model for " + language, (Throwable)e);
            model = null;
        }
        if (model == null) {
            this.log.debug("No perceptron based POS model for language " + language + "available. Will try to load maxent model");
            try {
                model = this.initModel(String.format("%s-pos-maxent.bin", language), POSModel.class);
            }
            catch (IOException e) {
                if (first != null) {
                    throw first;
                }
                throw e;
            }
        }
        return model;
    }

    public POSTagger getPartOfSpeechTagger(String language) throws IOException {
        POSModel posModel = this.getPartOfSpeachModel(language);
        if (posModel != null) {
            return new POSTaggerME(posModel);
        }
        this.log.debug("No POS Model for language '{}'", (Object)language);
        return null;
    }

    public <T> T getModel(Class<T> modelType, String modelName, Map<String, String> properties) throws InvalidFormatException, IOException {
        return this.initModel(modelName, modelType, properties);
    }

    public ChunkerModel getChunkerModel(String language) throws InvalidFormatException, IOException {
        return this.initModel(String.format("%s-chunker.bin", language), ChunkerModel.class);
    }

    public Chunker getChunker(String language) throws IOException {
        ChunkerModel chunkerModel = this.getChunkerModel(language);
        if (chunkerModel != null) {
            return new ChunkerME(chunkerModel);
        }
        this.log.debug("No Chunker Model for language {}", (Object)language);
        return null;
    }

    private <T> T initModel(String name, Class<T> modelType) throws InvalidFormatException, IOException {
        return this.initModel(name, modelType, null);
    }

    private <T> T initModel(String name, Class<T> modelType, Map<String, String> modelProperties) throws InvalidFormatException, IOException {
        T built;
        InputStream modelDataStream;
        Object model = this.models.get(name);
        if (model != null) {
            if (modelType.isAssignableFrom(model.getClass())) {
                return (T)model;
            }
            throw new IllegalStateException(String.format("Incompatible Model Types for name '%s': present=%s | requested=%s", name, model.getClass(), modelType));
        }
        if (!(modelProperties = modelProperties != null ? new HashMap<String, String>(modelProperties) : new HashMap<String, String>()).containsKey("Description")) {
            modelProperties.put("Description", "Statistical model for OpenNLP");
        }
        if (!modelProperties.containsKey("Model Type")) {
            modelProperties.put("Model Type", modelType.getSimpleName());
        }
        if (!modelProperties.containsKey("Download Location")) {
            modelProperties.put("Download Location", DOWNLOAD_ROOT + name);
        }
        try {
            modelDataStream = this.lookupModelStream(name, modelProperties);
        }
        catch (IOException e) {
            this.log.debug("Unable to load Resource {} via the DataFileProvider", (Object)name);
            return null;
        }
        if (modelDataStream == null) {
            this.log.debug("Unable to load Resource {} via the DataFileProvider", (Object)name);
            return null;
        }
        try {
            Constructor<T> constructor = modelType.getConstructor(InputStream.class);
            built = constructor.newInstance(modelDataStream);
        }
        catch (SecurityException e) {
            throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
        }
        catch (NoSuchMethodException e) {
            throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
        }
        catch (IllegalArgumentException e) {
            throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
        }
        catch (InstantiationException e) {
            throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
        }
        catch (IllegalAccessException e) {
            throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
        }
        catch (InvocationTargetException e) {
            Throwable checked = e.getCause();
            if (checked instanceof InvalidFormatException) {
                throw (InvalidFormatException)checked;
            }
            if (checked instanceof IOException) {
                throw (IOException)checked;
            }
            throw new IllegalStateException(String.format("Unable to create %s for %s!", modelType.getSimpleName(), name), e);
        }
        finally {
            IOUtils.closeQuietly((InputStream)modelDataStream);
        }
        this.models.put(name, built);
        return built;
    }

    protected InputStream lookupModelStream(String modelName, Map<String, String> properties) throws IOException {
        return this.dataFileProvider.getInputStream(null, modelName, properties);
    }

    protected static String removeNonUtf8CompliantCharacters(String text) {
        if (null == text) {
            return null;
        }
        Charset UTF8 = Charset.forName("UTF-8");
        byte[] bytes = text.getBytes(UTF8);
        for (int i = 0; i < bytes.length; ++i) {
            byte ch = bytes[i];
            if (ch > 31 && ch < 253 || ch == 9 || ch == 10 || ch == 13) continue;
            bytes[i] = 32;
        }
        return new String(bytes, UTF8);
    }

    protected void bindDataFileProvider(DataFileProvider dataFileProvider) {
        this.dataFileProvider = dataFileProvider;
    }

    protected void unbindDataFileProvider(DataFileProvider dataFileProvider) {
        if (this.dataFileProvider == dataFileProvider) {
            this.dataFileProvider = null;
        }
    }
}

