Execute command
----------------------------
>hadoop jar MapReduce-0.0.1-SNAPSHOT.jar com.my.cert.example.WordCount -D mapred.reduce.tasks=2 word file:///c:/hadoop/test1
if we need multiple arguments we should pass as below.
hadoop jar MapReduce-0.0.1-SNAPSHOT.jar com.my.cert.example.WordCount -D mapred.reduce.tasks=2 -D mapred.map.tasks=2 word file:///c:/hadoop/test1
The command is setting the number of reducer task to 2. The command output the result to my windows local directory file:///c:/hadoop/test1.
"word" is input directory having text files and contains data something like below
text.txt
-----------------------------------
A A A
B B B
C C C
---------------------------
we can pass/set map reduce configuration parameters ( -D mapred.reduce.tasks=2) to driver class. The class must "extends Configured implements Tool" as in below java map reduce program
----------------------------
>hadoop jar MapReduce-0.0.1-SNAPSHOT.jar com.my.cert.example.WordCount -D mapred.reduce.tasks=2 word file:///c:/hadoop/test1
if we need multiple arguments we should pass as below.
hadoop jar MapReduce-0.0.1-SNAPSHOT.jar com.my.cert.example.WordCount -D mapred.reduce.tasks=2 -D mapred.map.tasks=2 word file:///c:/hadoop/test1
The command is setting the number of reducer task to 2. The command output the result to my windows local directory file:///c:/hadoop/test1.
"word" is input directory having text files and contains data something like below
text.txt
-----------------------------------
A A A
B B B
C C C
---------------------------
we can pass/set map reduce configuration parameters ( -D mapred.reduce.tasks=2) to driver class. The class must "extends Configured implements Tool" as in below java map reduce program
public class WordCount extends Configured implements Tool
-------
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | package com.my.cert.example; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class WordCount extends Configured implements Tool { public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new WordCount(), args); System.exit(res); } public static class ProjectionMapper extends Mapper<LongWritable, Text, Text, LongWritable> { private Text word = new Text(); private LongWritable count = new LongWritable(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // value is tab separated values: word, year, occurrences, #books, // #pages // we project out (word, occurrences) so we can sum over all years String[] split = value.toString().split("\\s+"); for (String text : split) { word.set(text); context.write(word, new LongWritable(1)); } } } public static class LongSumReducer extends Reducer<Text, LongWritable, Text, LongWritable> { private LongWritable result = new LongWritable(); public void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long sum = 0; for (LongWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } @Override protected void cleanup( org.apache.hadoop.mapreduce.Reducer.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub super.cleanup(context); } } @Override public int run(String[] args) throws Exception { Path inputPath = new Path(args[0]); Path outputDir = new Path(args[1]); // Create configuration Configuration conf = this.getConf(); // Create job Job job = new Job(conf, "WordCount"); job.setJarByClass(WordCount.class); // Setup MapReduce job.setMapperClass(WordCount.ProjectionMapper.class); job.setReducerClass(WordCount.LongSumReducer.class); job.setNumReduceTasks(1); // Specify key / value // job.setMapOutputKeyClass(Text.class); // job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); // Output FileOutputFormat.setOutputPath(job, outputDir); job.setOutputFormatClass(TextOutputFormat.class); int code = job.waitForCompletion(true) ? 0 : 1; return code; } } |
No comments:
Post a Comment