/*
 * Decompiled with CFR 0.152.
 */
package org.apache.any23.encoding;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.apache.any23.encoding.EncodingDetector;
import org.apache.any23.encoding.EncodingUtils;
import org.apache.tika.detect.TextStatistics;
import org.apache.tika.parser.txt.CharsetDetector;
import org.apache.tika.parser.txt.CharsetMatch;
import org.jsoup.nodes.Comment;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.DocumentType;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.PseudoTextElement;
import org.jsoup.nodes.TextNode;
import org.jsoup.parser.ParseError;
import org.jsoup.parser.ParseErrorList;
import org.jsoup.parser.Parser;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;

public class TikaEncodingDetector
implements EncodingDetector {
    private static final String TAG_CHARS = "< />";
    private static final byte[] TAG_BYTES = "< />".getBytes(StandardCharsets.UTF_8);
    private static final Node[] EMPTY_NODES = new Node[0];

    public String guessEncoding(InputStream input) throws IOException {
        return this.guessEncoding(input, null);
    }

    private static Charset guessEncoding(InputStream is, Charset declared) throws IOException {
        TextStatistics stats;
        if (!is.markSupported()) {
            is = new BufferedInputStream(is);
        }
        if ((stats = TikaEncodingDetector.computeAndReset(is, EncodingUtils::stats)).looksLikeUTF8()) {
            return StandardCharsets.UTF_8;
        }
        if ((declared = EncodingUtils.correctVariant(stats, declared)) != null) {
            return declared;
        }
        String iso_8859_1 = TikaEncodingDetector.computeAndReset(is, EncodingUtils::iso_8859_1);
        Charset xmlCharset = EncodingUtils.xmlCharset(stats, iso_8859_1);
        if (xmlCharset != null) {
            return xmlCharset;
        }
        ParseErrorList htmlErrors = ParseErrorList.tracking((int)Integer.MAX_VALUE);
        Document doc = TikaEncodingDetector.parseFragment(iso_8859_1, htmlErrors);
        Charset htmlCharset = EncodingUtils.htmlCharset(stats, (Element)doc);
        if (htmlCharset != null) {
            return htmlCharset;
        }
        if (stats.countEightBit() == 0) {
            return StandardCharsets.UTF_8;
        }
        long openTags = TikaEncodingDetector.countTags((Node)doc);
        long badTags = htmlErrors.stream().map(ParseError::getErrorMessage).filter(err -> err != null && err.matches(".*'[</>]'.*")).count();
        boolean filterInput = true;
        if (openTags < 5L || openTags / 5L < badTags) {
            filterInput = false;
        } else {
            String wholeText = TikaEncodingDetector.wholeText((Node)doc);
            if (wholeText.length() < 100 && iso_8859_1.length() > 600) {
                filterInput = false;
            } else {
                iso_8859_1 = wholeText;
            }
        }
        byte[] text = iso_8859_1.getBytes(StandardCharsets.ISO_8859_1);
        CharsetDetector icu4j = new CharsetDetector(text.length);
        icu4j.setText(text);
        for (CharsetMatch match : icu4j.detectAll()) {
            try {
                Charset charset = EncodingUtils.forName(match.getName());
                if (filterInput && !TAG_CHARS.equals(new String(TAG_BYTES, charset)) || (charset = EncodingUtils.correctVariant(stats, charset)) == null) continue;
                return charset;
            }
            catch (Exception exception) {
                // empty catch block
            }
        }
        return EncodingUtils.correctVariant(stats, StandardCharsets.ISO_8859_1);
    }

    public String guessEncoding(InputStream is, String contentType) throws IOException {
        Charset charset = EncodingUtils.contentTypeCharset(contentType);
        return TikaEncodingDetector.guessEncoding(is, charset).name();
    }

    private static <E> E computeAndReset(InputStream is, InputStreamFunction<E> function) throws IOException {
        is.mark(Integer.MAX_VALUE);
        try {
            E e = function.compute(is);
            return e;
        }
        finally {
            is.reset();
        }
    }

    private static Document parseFragment(String html, ParseErrorList errors) {
        Node[] childNodes;
        Document doc = new Document("");
        for (Node node : childNodes = Parser.parseFragment((String)html, null, (String)"", (ParseErrorList)errors).toArray(EMPTY_NODES)) {
            if (node.parentNode() != null) {
                node.remove();
            }
            doc.appendChild(node);
        }
        return doc;
    }

    private static long countTags(Node node) {
        final long[] ret = new long[]{0L};
        NodeTraversor.traverse((NodeVisitor)new NodeVisitor(){

            public void head(Node node, int depth) {
                if (node instanceof Document || node instanceof PseudoTextElement) {
                    return;
                }
                if (node instanceof Element || node instanceof DocumentType || node instanceof Comment) {
                    ret[0] = ret[0] + (node.childNodeSize() == 0 ? 1L : 2L);
                }
            }

            public void tail(Node node, int depth) {
            }
        }, (Node)node);
        return ret[0];
    }

    private static String wholeText(Node node) {
        final StringBuilder sb = new StringBuilder();
        NodeTraversor.traverse((NodeVisitor)new NodeVisitor(){

            public void head(Node node, int depth) {
                if (node instanceof TextNode) {
                    sb.append(((TextNode)node).getWholeText());
                } else if (node instanceof DataNode) {
                    String data = ((DataNode)node).getWholeData();
                    do {
                        if (!"script".equalsIgnoreCase(node.nodeName())) continue;
                        if (node.attr("type").toLowerCase().contains("json")) {
                            sb.append(data);
                        }
                        break;
                    } while (!"style".equalsIgnoreCase(node.nodeName()) && (node = node.parentNode()) != null);
                } else if (node instanceof Comment) {
                    String data = ((Comment)node).getData();
                    if (!data.contains("<!") && !data.contains("<?")) {
                        sb.append(data);
                    }
                } else if (node instanceof Element) {
                    sb.append(node.attr("content"));
                }
            }

            public void tail(Node node, int depth) {
            }
        }, (Node)node);
        return sb.toString();
    }

    @FunctionalInterface
    private static interface InputStreamFunction<E> {
        public E compute(InputStream var1) throws IOException;
    }
}

