/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hadoop.examples.terasort;

import java.io.DataOutput;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.examples.terasort.TeraInputFormat;
import org.apache.hadoop.examples.terasort.TeraScheduler;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

public class TeraInputFormat
extends FileInputFormat<Text, Text> {
    static final String PARTITION_FILENAME = "_partition.lst";
    private static final String NUM_PARTITIONS = "mapreduce.terasort.num.partitions";
    private static final String SAMPLE_SIZE = "mapreduce.terasort.partitions.sample";
    static final int KEY_LENGTH = 10;
    static final int VALUE_LENGTH = 90;
    static final int RECORD_LENGTH = 100;
    private static MRJobConfig lastContext = null;
    private static List<InputSplit> lastResult = null;

    public static void writePartitionFile(JobContext job, Path partFile) throws Throwable {
        long t1 = System.currentTimeMillis();
        Configuration conf = job.getConfiguration();
        TeraInputFormat inFormat = new TeraInputFormat();
        TextSampler sampler = new TextSampler();
        int partitions = job.getNumReduceTasks();
        long sampleSize = conf.getLong(SAMPLE_SIZE, 100000L);
        List splits = inFormat.getSplits(job);
        long t2 = System.currentTimeMillis();
        System.out.println("Computing input splits took " + (t2 - t1) + "ms");
        int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
        System.out.println("Sampling " + samples + " splits of " + splits.size());
        long recordsPerSample = sampleSize / (long)samples;
        int sampleStep = splits.size() / samples;
        Thread[] samplerReader = new Thread[samples];
        SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
        for (int i = 0; i < samples; ++i) {
            int idx = i;
            samplerReader[i] = new /* Unavailable Anonymous Inner Class!! */;
            samplerReader[i].start();
        }
        FileSystem outFs = partFile.getFileSystem(conf);
        FSDataOutputStream writer = outFs.create(partFile, true, 65536, (short)10, outFs.getDefaultBlockSize(partFile));
        for (int i = 0; i < samples; ++i) {
            try {
                samplerReader[i].join();
                if (threadGroup.getThrowable() == null) continue;
                throw threadGroup.getThrowable();
            }
            catch (InterruptedException e) {
                // empty catch block
            }
        }
        for (Text split : sampler.createPartitions(partitions)) {
            split.write((DataOutput)writer);
        }
        writer.close();
        long t3 = System.currentTimeMillis();
        System.out.println("Computing parititions took " + (t3 - t2) + "ms");
    }

    public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException {
        return new TeraRecordReader();
    }

    public List<InputSplit> getSplits(JobContext job) throws IOException {
        if (job == lastContext) {
            return lastResult;
        }
        long t1 = System.currentTimeMillis();
        lastContext = job;
        lastResult = super.getSplits(job);
        long t2 = System.currentTimeMillis();
        System.out.println("Spent " + (t2 - t1) + "ms computing base-splits.");
        if (job.getConfiguration().getBoolean(TeraScheduler.USE, true)) {
            TeraScheduler scheduler = new TeraScheduler(lastResult.toArray(new FileSplit[0]), job.getConfiguration());
            lastResult = scheduler.getNewFileSplits();
            long t3 = System.currentTimeMillis();
            System.out.println("Spent " + (t3 - t2) + "ms computing TeraScheduler splits.");
        }
        return lastResult;
    }
}

