/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hadoop.hive.ql.exec.tez;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.tez.ColumnarSplitSizeEstimator;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.split.SplitLocationProvider;
import org.apache.hadoop.mapred.split.SplitSizeEstimator;
import org.apache.hadoop.mapred.split.TezGroupedSplit;
import org.apache.hadoop.mapred.split.TezMapredSplitsGrouper;
import org.apache.hive.com.google.common.collect.ArrayListMultimap;
import org.apache.hive.com.google.common.collect.Lists;
import org.apache.hive.com.google.common.collect.Multimap;
import org.apache.tez.dag.api.TaskLocationHint;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SplitGrouper {
    private static final Logger LOG = LoggerFactory.getLogger(SplitGrouper.class);
    private static final Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cache = new ConcurrentHashMap<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>>();
    private final TezMapredSplitsGrouper tezGrouper = new TezMapredSplitsGrouper();

    public Multimap<Integer, InputSplit> group(Configuration conf, Multimap<Integer, InputSplit> bucketSplitMultimap, int availableSlots, float waves, SplitLocationProvider splitLocationProvider) throws IOException {
        Map<Integer, Integer> bucketTaskMap = this.estimateBucketSizes(availableSlots, waves, bucketSplitMultimap.asMap());
        ArrayListMultimap<Integer, InputSplit> bucketGroupedSplitMultimap = ArrayListMultimap.create();
        for (int bucketId : bucketSplitMultimap.keySet()) {
            Collection<InputSplit> inputSplitCollection = bucketSplitMultimap.get(bucketId);
            InputSplit[] rawSplits = inputSplitCollection.toArray(new InputSplit[0]);
            InputSplit[] groupedSplits = this.tezGrouper.getGroupedSplits(conf, rawSplits, bucketTaskMap.get(bucketId).intValue(), HiveInputFormat.class.getName(), (SplitSizeEstimator)new ColumnarSplitSizeEstimator(), splitLocationProvider);
            LOG.info("Original split count is " + rawSplits.length + " grouped split count is " + groupedSplits.length + ", for bucket: " + bucketId);
            for (InputSplit inSplit : groupedSplits) {
                bucketGroupedSplitMultimap.put(bucketId, inSplit);
            }
        }
        return bucketGroupedSplitMultimap;
    }

    public List<TaskLocationHint> createTaskLocationHints(InputSplit[] splits, boolean consistentLocations) throws IOException {
        ArrayList<TaskLocationHint> locationHints = Lists.newArrayListWithCapacity(splits.length);
        for (InputSplit split : splits) {
            String rack;
            String string = rack = split instanceof TezGroupedSplit ? ((TezGroupedSplit)split).getRack() : null;
            if (rack == null) {
                Object[] locations = split.getLocations();
                if (locations != null && locations.length > 0) {
                    if (consistentLocations && locations.length > 1 && split instanceof FileSplit) {
                        Arrays.sort(locations);
                        FileSplit fileSplit = (FileSplit)split;
                        Path path = fileSplit.getPath();
                        long startLocation = fileSplit.getStart();
                        int hashCode = Objects.hash(path, startLocation);
                        int startIndex = hashCode % locations.length;
                        LinkedHashSet<Object> locationSet = new LinkedHashSet<Object>(locations.length);
                        for (int i = 0; i < locations.length; ++i) {
                            int index = (startIndex + i) % locations.length;
                            locationSet.add(locations[index]);
                        }
                        locationHints.add(TaskLocationHint.createTaskLocationHint(locationSet, null));
                        continue;
                    }
                    locationHints.add(TaskLocationHint.createTaskLocationHint(new LinkedHashSet<String>(Arrays.asList(split.getLocations())), null));
                    continue;
                }
                locationHints.add(TaskLocationHint.createTaskLocationHint(null, null));
                continue;
            }
            locationHints.add(TaskLocationHint.createTaskLocationHint(null, Collections.singleton(rack)));
        }
        return locationHints;
    }

    public Multimap<Integer, InputSplit> generateGroupedSplits(JobConf jobConf, Configuration conf, InputSplit[] splits, float waves, int availableSlots, SplitLocationProvider locationProvider) throws Exception {
        return this.generateGroupedSplits(jobConf, conf, splits, waves, availableSlots, null, true, locationProvider);
    }

    public Multimap<Integer, InputSplit> generateGroupedSplits(JobConf jobConf, Configuration conf, InputSplit[] splits, float waves, int availableSlots, String inputName, boolean groupAcrossFiles, SplitLocationProvider locationProvider) throws Exception {
        MapWork work = SplitGrouper.populateMapWork(jobConf, inputName);
        ArrayListMultimap<Integer, InputSplit> bucketSplitMultiMap = ArrayListMultimap.create();
        int i = 0;
        InputSplit prevSplit = null;
        for (InputSplit s : splits) {
            if (this.schemaEvolved(s, prevSplit, groupAcrossFiles, work)) {
                ++i;
                prevSplit = s;
            }
            bucketSplitMultiMap.put(i, s);
        }
        LOG.info("# Src groups for split generation: " + (i + 1));
        Multimap<Integer, InputSplit> groupedSplits = this.group((Configuration)jobConf, bucketSplitMultiMap, availableSlots, waves, locationProvider);
        return groupedSplits;
    }

    private Map<Integer, Integer> estimateBucketSizes(int availableSlots, float waves, Map<Integer, Collection<InputSplit>> bucketSplitMap) {
        HashMap<Integer, Long> bucketSizeMap = new HashMap<Integer, Long>();
        HashMap<Integer, Integer> bucketTaskMap = new HashMap<Integer, Integer>();
        long totalSize = 0L;
        boolean earlyExit = false;
        for (int bucketId : bucketSplitMap.keySet()) {
            long size = 0L;
            for (InputSplit s : bucketSplitMap.get(bucketId)) {
                if (!(s instanceof FileSplit)) {
                    bucketTaskMap.put(bucketId, (int)((float)availableSlots * waves));
                    earlyExit = true;
                    continue;
                }
                FileSplit fsplit = (FileSplit)s;
                size += fsplit.getLength();
                totalSize += fsplit.getLength();
            }
            bucketSizeMap.put(bucketId, size);
        }
        if (earlyExit) {
            return bucketTaskMap;
        }
        for (int bucketId : bucketSizeMap.keySet()) {
            int numEstimatedTasks = 0;
            if (totalSize != 0L) {
                numEstimatedTasks = (int)((float)availableSlots * waves * (float)((Long)bucketSizeMap.get(bucketId)).longValue() / (float)totalSize);
            }
            LOG.info("Estimated number of tasks: " + numEstimatedTasks + " for bucket " + bucketId);
            if (numEstimatedTasks == 0) {
                numEstimatedTasks = 1;
            }
            bucketTaskMap.put(bucketId, numEstimatedTasks);
        }
        return bucketTaskMap;
    }

    private static MapWork populateMapWork(JobConf jobConf, String inputName) {
        MapWork work = null;
        if (inputName != null) {
            work = (MapWork)Utilities.getMergeWork((Configuration)jobConf, inputName);
        }
        if (work == null) {
            work = Utilities.getMapWork((Configuration)jobConf);
        }
        return work;
    }

    private boolean schemaEvolved(InputSplit s, InputSplit prevSplit, boolean groupAcrossFiles, MapWork work) throws IOException {
        boolean retval = false;
        Path path = ((FileSplit)s).getPath();
        PartitionDesc pd = HiveFileFormatUtils.getPartitionDescFromPathRecursively(work.getPathToPartitionInfo(), path, cache);
        String currentDeserializerClass = pd.getDeserializerClassName();
        Class<? extends InputFormat> currentInputFormatClass = pd.getInputFileFormatClass();
        Class<? extends InputFormat> previousInputFormatClass = null;
        String previousDeserializerClass = null;
        if (prevSplit != null) {
            Path prevPath = ((FileSplit)prevSplit).getPath();
            if (!groupAcrossFiles) {
                return !path.equals((Object)prevPath);
            }
            PartitionDesc prevPD = HiveFileFormatUtils.getPartitionDescFromPathRecursively(work.getPathToPartitionInfo(), prevPath, cache);
            previousDeserializerClass = prevPD.getDeserializerClassName();
            previousInputFormatClass = prevPD.getInputFileFormatClass();
        }
        if (currentInputFormatClass != previousInputFormatClass || !currentDeserializerClass.equals(previousDeserializerClass)) {
            retval = true;
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Adding split " + path + " to src new group? " + retval);
        }
        return retval;
    }
}

