/*
 * Decompiled with CFR 0.152.
 */
package org.apache.stanbol.enhancer.engines.opennlp.token.impl;

import java.util.Collections;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.Span;
import org.apache.stanbol.commons.opennlp.OpenNLP;
import org.apache.stanbol.enhancer.nlp.NlpProcessingRole;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory;
import org.apache.stanbol.enhancer.nlp.model.Section;
import org.apache.stanbol.enhancer.nlp.model.Token;
import org.apache.stanbol.enhancer.nlp.utils.LanguageConfiguration;
import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class OpenNlpTokenizerEngine
extends AbstractEnhancementEngine<RuntimeException, RuntimeException>
implements ServiceProperties {
    public static final String CONFIG_LANGUAGES = "org.apache.stanbol.enhancer.token.languages";
    private static final String MODEL_NAME_PARAM = "model";
    private static final String SIMPLE_MODEL_NAME = "SIMPLE";
    private static final Map<String, Object> SERVICE_PROPERTIES;
    private static Logger log;
    private LanguageConfiguration languageConfig = new LanguageConfiguration("org.apache.stanbol.enhancer.token.languages", new String[]{"*"});
    private OpenNLP openNLP;
    private AnalysedTextFactory analysedTextFactory;

    public int canEnhance(ContentItem ci) throws EngineException {
        Map.Entry entry = NlpEngineHelper.getPlainText((EnhancementEngine)this, (ContentItem)ci, (boolean)false);
        if (entry == null || entry.getValue() == null) {
            return 0;
        }
        String language = NlpEngineHelper.getLanguage((EnhancementEngine)this, (ContentItem)ci, (boolean)false);
        if (language == null) {
            return 0;
        }
        if (!this.languageConfig.isLanguage(language)) {
            log.trace(" > can NOT enhance ContentItem {} because language {} is not enabled by this engines configuration", (Object)ci, (Object)language);
            return 0;
        }
        if (this.getTokenizer(language) == null) {
            log.trace(" > can NOT tokenize plain text of {} because the tokenizer for language {} is not available.", (Object)ci, (Object)language);
            return 0;
        }
        log.trace(" > can enhance ContentItem {} with language {}", (Object)ci, (Object)language);
        return 2;
    }

    public void computeEnhancements(ContentItem ci) throws EngineException {
        AnalysedText at = NlpEngineHelper.initAnalysedText((EnhancementEngine)this, (AnalysedTextFactory)this.analysedTextFactory, (ContentItem)ci);
        String language = NlpEngineHelper.getLanguage((EnhancementEngine)this, (ContentItem)ci, (boolean)true);
        Tokenizer tokenizer = this.getTokenizer(language);
        if (tokenizer == null) {
            log.warn("Tokenizer for language {} is no longer available. This might happen if the model becomes unavailable during enhancement. If this happens more often it might also indicate an bug in the used EnhancementJobManager implementation as the availability is also checked in the canEnhance(..) method of this Enhancement Engine.");
            return;
        }
        Iterator<AnalysedText> sections = at.getSentences();
        if (!sections.hasNext()) {
            sections = Collections.singleton(at).iterator();
        }
        while (sections.hasNext()) {
            Section section = (Section)sections.next();
            Span[] tokenSpans = tokenizer.tokenizePos(section.getSpan());
            for (int i = 0; i < tokenSpans.length; ++i) {
                Token token = section.addToken(tokenSpans[i].getStart(), tokenSpans[i].getEnd());
                log.trace(" > add {}", (Object)token);
            }
        }
    }

    public Map<String, Object> getServiceProperties() {
        return SERVICE_PROPERTIES;
    }

    protected void activate(ComponentContext ce) throws ConfigurationException {
        log.info("activating POS tagging engine");
        super.activate(ce);
        Dictionary properties = ce.getProperties();
        this.languageConfig.setConfiguration(properties);
    }

    protected void deactivate(ComponentContext context) {
        this.languageConfig.setDefault();
        super.deactivate(context);
    }

    private Tokenizer getTokenizer(String language) throws EngineException {
        TokenizerModel model;
        String modelName = this.languageConfig.getParameter(language, MODEL_NAME_PARAM);
        if (modelName == null) {
            return this.openNLP.getTokenizer(language);
        }
        if (SIMPLE_MODEL_NAME.equals(modelName)) {
            return SimpleTokenizer.INSTANCE;
        }
        try {
            model = (TokenizerModel)this.openNLP.getModel(TokenizerModel.class, modelName, null);
        }
        catch (Exception e) {
            throw new EngineException("Error while loading the configured OpenNLP TokenizerModel '" + modelName + "' (" + ((Object)((Object)this)).getClass().getSimpleName() + " | name=" + this.getName() + ")!", (Throwable)e);
        }
        if (model == null) {
            throw new EngineException("The configured OpenNLP TokenizerModel '" + modelName + " is not available' (" + ((Object)((Object)this)).getClass().getSimpleName() + " | name=" + this.getName() + ")!");
        }
        return new TokenizerME(model);
    }

    static {
        HashMap<String, Integer> props = new HashMap<String, Integer>();
        props.put("org.apache.stanbol.enhancer.engine.order", ServiceProperties.ORDERING_NLP_TOKENIZING);
        props.put("org.apache.stanbol.enhancer.engine.nlp.role", (Integer)NlpProcessingRole.Tokenizing);
        SERVICE_PROPERTIES = Collections.unmodifiableMap(props);
        log = LoggerFactory.getLogger(OpenNlpTokenizerEngine.class);
    }

    protected void bindOpenNLP(OpenNLP openNLP) {
        this.openNLP = openNLP;
    }

    protected void unbindOpenNLP(OpenNLP openNLP) {
        if (this.openNLP == openNLP) {
            this.openNLP = null;
        }
    }

    protected void bindAnalysedTextFactory(AnalysedTextFactory analysedTextFactory) {
        this.analysedTextFactory = analysedTextFactory;
    }

    protected void unbindAnalysedTextFactory(AnalysedTextFactory analysedTextFactory) {
        if (this.analysedTextFactory == analysedTextFactory) {
            this.analysedTextFactory = null;
        }
    }
}

