/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.text;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;

public class TextParagraphSplittingJob
extends AbstractJob {
    public int run(String[] strings) throws Exception {
        Configuration originalConf = this.getConf();
        Job job = this.prepareJob(new Path(originalConf.get("mapred.input.dir")), new Path(originalConf.get("mapred.output.dir")), SequenceFileInputFormat.class, SplitMap.class, Text.class, Text.class, Reducer.class, Text.class, Text.class, SequenceFileOutputFormat.class);
        job.setNumReduceTasks(0);
        job.waitForCompletion(true);
        return 1;
    }

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Tool)new TextParagraphSplittingJob(), (String[])args);
    }

    public static class SplitMap
    extends Mapper<Text, Text, Text, Text> {
        protected void map(Text key, Text text, Mapper.Context context) throws IOException, InterruptedException {
            Text outText = new Text();
            int loc = 0;
            while (loc >= 0 && loc < text.getLength()) {
                int nextLoc = text.find("\n\n", loc + 1);
                if (nextLoc > 0) {
                    outText.set(text.getBytes(), loc, nextLoc - loc);
                    context.write((Object)key, (Object)outText);
                }
                loc = nextLoc;
            }
        }
    }
}

