jogam5 · October 22, 2021 01:29
diff --git a/Grep.java b/Grep.java
 package my.midterm;

 /*
 'Overview' section provides a concise high level summary of Hadoop - MapReduce:
 https://hadoop.apache.org/docs/r1.2.1/mapred_tutorial.html
 */

 import java.io.*;
 import java.util.*;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.*;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
 import java.util.ArrayList;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.io.Text;

 public class Grep {
    public static class Map extends Mapper<LongWritable, Text, Text, Text> {
        /*
           Rationale:
           Map extends Mapper class, where the method map() (one of many maps tasks)
           is called each time a  line is processed. When the line contains the
           string "pattern", "context.write()" is called to generate an output
           similar to {"dark", "the sky is dark"}.
           The final output of Map is similar to:
           {"dark", "the sky is dark"}, {"dark", "dark knight"}, ...
        */

        /* Supplied pattern */
        String pattern ="dark";

        private final Text keyOutput = new Text();
        private final Text valueOutput = new Text();

        public void map(LongWritable key, Text value, Context context) throws IOException,
            InterruptedException {

            String line = value.toString();
            StringTokenizer tokenizer = new StringTokenizer(line);
            while (tokenizer.hasMoreTokens()) {
                if (tokenizer.nextToken().contains(pattern)) {
                    // System.out.println(line);
                    keyOutput.set(pattern);
                    valueOutput.set(line);
                    context.write(keyOutput, valueOutput);
                    break;
                }
            }
        }
    }

    public static class Reduce extends Reducer<Text, Text, Text, Text> {
        /*
            Rationale:
            Reduce receives a lists of key:value pairs from Map (i.e.
            {"dark", "the sky is dark"}, {"dark", "dark knight"}, ... ).
            For each unique key, the object "Iterable<Text> values" is
            looped over in order to aggregate all the values for the same key.
            In this case, all the values are stored in an ArrayList that
            will be reordered to output the lines in the file that contain
            the word specified in "dark".
         */
        private final Text result = new Text();
        public void reduce(Text key, Iterable<Text> values, Context context) throws
                IOException, InterruptedException {
            // System.out.println("---> Enter REDUCE");

            List<String> lines = new ArrayList<>();
            for (Text val : values) {
                // System.out.println(val);
                lines.add(val.toString());
            }
            /* Reverse order of elements in arrayList */
            Collections.reverse(lines);

            result.set(lines.toString());
            context.write(key, result);
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf, "grep");
        job.setJarByClass(Grep.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        job.waitForCompletion(true);
    }
 }
	package my.midterm;

	/*
	'Overview' section provides a concise high level summary of Hadoop - MapReduce:
	https://hadoop.apache.org/docs/r1.2.1/mapred_tutorial.html
	*/

	import java.io.*;
	import java.util.*;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.io.*;
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.mapreduce.Job;
	import org.apache.hadoop.mapreduce.Mapper;
	import org.apache.hadoop.mapreduce.Reducer;
	import java.util.ArrayList;
	import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
	import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
	import org.apache.hadoop.io.Text;

	public class Grep {
	public static class Map extends Mapper<LongWritable, Text, Text, Text> {
	/*
	Rationale:
	Map extends Mapper class, where the method map() (one of many maps tasks)
	is called each time a line is processed. When the line contains the
	string "pattern", "context.write()" is called to generate an output
	similar to {"dark", "the sky is dark"}.
	The final output of Map is similar to:
	{"dark", "the sky is dark"}, {"dark", "dark knight"}, ...
	*/

	/* Supplied pattern */
	String pattern ="dark";

	private final Text keyOutput = new Text();
	private final Text valueOutput = new Text();

	public void map(LongWritable key, Text value, Context context) throws IOException,
	InterruptedException {

	String line = value.toString();
	StringTokenizer tokenizer = new StringTokenizer(line);
	while (tokenizer.hasMoreTokens()) {
	if (tokenizer.nextToken().contains(pattern)) {
	// System.out.println(line);
	keyOutput.set(pattern);
	valueOutput.set(line);
	context.write(keyOutput, valueOutput);
	break;
	}
	}
	}
	}

	public static class Reduce extends Reducer<Text, Text, Text, Text> {
	/*
	Rationale:
	Reduce receives a lists of key:value pairs from Map (i.e.
	{"dark", "the sky is dark"}, {"dark", "dark knight"}, ... ).
	For each unique key, the object "Iterable<Text> values" is
	looped over in order to aggregate all the values for the same key.
	In this case, all the values are stored in an ArrayList that
	will be reordered to output the lines in the file that contain
	the word specified in "dark".
	*/
	private final Text result = new Text();
	public void reduce(Text key, Iterable<Text> values, Context context) throws
	IOException, InterruptedException {
	// System.out.println("---> Enter REDUCE");

	List<String> lines = new ArrayList<>();
	for (Text val : values) {
	// System.out.println(val);
	lines.add(val.toString());
	}
	/* Reverse order of elements in arrayList */
	Collections.reverse(lines);

	result.set(lines.toString());
	context.write(key, result);
	}
	}

	public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();

	Job job = Job.getInstance(conf, "grep");
	job.setJarByClass(Grep.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(Text.class);

	job.setMapperClass(Map.class);
	job.setReducerClass(Reduce.class);

	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	job.waitForCompletion(true);
	}
	}