1/02/2015

Example to setting Map reduce properties from command prompt using -D

Execute command
----------------------------

>hadoop jar MapReduce-0.0.1-SNAPSHOT.jar com.my.cert.example.WordCount -D mapred.reduce.tasks=2  word  file:///c:/hadoop/test1

if  we need multiple arguments we should pass as below.

hadoop jar MapReduce-0.0.1-SNAPSHOT.jar com.my.cert.example.WordCount -D mapred.reduce.tasks=2 -D mapred.map.tasks=2 word file:///c:/hadoop/test1

The command is setting the number of reducer task to 2.  The command output the result to my windows local directory file:///c:/hadoop/test1.  

"word" is input directory having text files  and contains data something like below

text.txt
-----------------------------------
A A A
B B B
C C C

---------------------------
we can pass/set map reduce configuration parameters ( -D mapred.reduce.tasks=2) to driver class. The class must "extends Configured implements Tool" as in below java map reduce program

public class WordCount extends Configured implements Tool

-------
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
package com.my.cert.example;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WordCount extends Configured implements Tool {
 public static void main(String[] args) throws Exception {
  
   int res = ToolRunner.run(new Configuration(), new WordCount(), args);
   System.exit(res);
 }

 public static class ProjectionMapper extends
   Mapper<LongWritable, Text, Text, LongWritable> {
  private Text word = new Text();
  private LongWritable count = new LongWritable();

  @Override
  protected void map(LongWritable key, Text value, Context context)
    throws IOException, InterruptedException {
   // value is tab separated values: word, year, occurrences, #books,
   // #pages
   // we project out (word, occurrences) so we can sum over all years
   String[] split = value.toString().split("\\s+");
   for (String  text : split) {
    word.set(text);
    context.write(word, new LongWritable(1));
   }
   
  }
 }

 public static class LongSumReducer extends
   Reducer<Text, LongWritable, Text, LongWritable> {

  private LongWritable result = new LongWritable();
  
  public void reduce(Text key, Iterable<LongWritable> values,
    Context context) throws IOException, InterruptedException {
   long sum = 0;
   for (LongWritable val : values) {
    sum += val.get();
   }
   
   result.set(sum);
   context.write(key, result);
  }
  @Override
  protected void cleanup(
    org.apache.hadoop.mapreduce.Reducer.Context context)
    throws IOException, InterruptedException {
   // TODO Auto-generated method stub
   
   super.cleanup(context);
  }
 }


 @Override
 public int run(String[] args) throws Exception {

  Path inputPath = new Path(args[0]);
  Path outputDir = new Path(args[1]);

  // Create configuration
  Configuration conf = this.getConf();

  // Create job
  Job job = new Job(conf, "WordCount");
  job.setJarByClass(WordCount.class);

  // Setup MapReduce
  job.setMapperClass(WordCount.ProjectionMapper.class);
  job.setReducerClass(WordCount.LongSumReducer.class);
  job.setNumReduceTasks(1);

  // Specify key / value
  // job.setMapOutputKeyClass(Text.class);
  // job.setMapOutputValueClass(Text.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);

  // Input
  FileInputFormat.addInputPath(job, inputPath);
  job.setInputFormatClass(TextInputFormat.class);

  // Output
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setOutputFormatClass(TextOutputFormat.class);
  int code = job.waitForCompletion(true) ? 0 : 1;
 

 
  return code;
 }
}

No comments:

Post a Comment