12/30/2014

Hadoop JAVA Map Reduce Sort by Value


Input  (test.txt)
------------
1,50
2,20
3,30
4,10
5,15
6,25
7,55
8,35
9,70

output
----------------------------
9 70
7 55
1 50
8 35
3 30
6 25
2 20
5 15

4 10



package com.my.cert.example;

import java.nio.ByteBuffer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.IntWritable.Comparator;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class ValueSortExp {
 public static void main(String[] args) throws Exception {

  Path inputPath = new Path("C:\\hadoop\\test\\test.txt");
  Path outputDir = new Path("C:\\hadoop\\test\\test1");

  // Path inputPath = new Path(args[0]);
  // Path outputDir = new Path(args[1]);

  // Create configuration
  Configuration conf = new Configuration(true);

  // Create job
  Job job = new Job(conf, "Test HIVE commond");
  job.setJarByClass(ValueSortExp.class);

  // Setup MapReduce
  job.setMapperClass(ValueSortExp.MapTask.class);
  job.setReducerClass(ValueSortExp.ReduceTask.class);
  job.setNumReduceTasks(1);

  // Specify key / value
  job.setMapOutputKeyClass(IntWritable.class);
  job.setMapOutputValueClass(IntWritable.class);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  job.setSortComparatorClass(IntComparator.class);
  // Input
  FileInputFormat.addInputPath(job, inputPath);
  job.setInputFormatClass(TextInputFormat.class);

  // Output
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setOutputFormatClass(TextOutputFormat.class);

  /*
   * // Delete output if exists FileSystem hdfs = FileSystem.get(conf); if
   * (hdfs.exists(outputDir)) hdfs.delete(outputDir, true);
   * 
   * // Execute job int code = job.waitForCompletion(true) ? 0 : 1;
   * System.exit(code);
   */

  // Execute job
  int code = job.waitForCompletion(true) ? 0 : 1;
  System.exit(code);

 }
 
 public static class IntComparator extends WritableComparator {

     public IntComparator() {
         super(IntWritable.class);
     }

     @Override
     public int compare(byte[] b1, int s1, int l1,
             byte[] b2, int s2, int l2) {

         Integer v1 = ByteBuffer.wrap(b1, s1, l1).getInt();
         Integer v2 = ByteBuffer.wrap(b2, s2, l2).getInt();

         return v1.compareTo(v2) * (-1);
     }
 }

 public static class MapTask extends
   Mapper<LongWritable, Text, IntWritable, IntWritable> {
  public void map(LongWritable key, Text value, Context context)
    throws java.io.IOException, InterruptedException {
   String line = value.toString();
   String[] tokens = line.split(","); // This is the delimiter between
   int keypart = Integer.parseInt(tokens[0]);
   int valuePart = Integer.parseInt(tokens[1]);
   context.write(new IntWritable(valuePart), new IntWritable(keypart));

  }
 }

 public static class ReduceTask extends
   Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
  public void reduce(IntWritable key, Iterable<IntWritable> list, Context context)
    throws java.io.IOException, InterruptedException {
   
   for (IntWritable value : list) {
    
    context.write(value,key);
    
   }
   
  }
 }

}

10 comments:

  1. Thank you so much for sharing this worthwhile to spent time on. You are running a really awesome blog. Keep up this good work

    Hadoop training chennai velachery
    Hadoop training velachery
    Hadoop training institute in t nagar

    ReplyDelete
  2. I have finally found a Worth able content to read. The way you have presented information here is quite impressive. I have bookmarked this page for future use. Thanks for sharing content like this once again. Keep sharing content like this.

    Software testing training in chennai | Software testing training | Software testing institute in chennai

    ReplyDelete
  3. Face To Face java SE Java EE Training Courses Materials. Java Training in Chennai Java SE & Java EE Certification Courses Training Program. Online Java Training Online Java Training India Java Training Institutes in Chennai Online Java Training India Java J2EE Training Institutes in Chennai | Online Java Training

    ReplyDelete
  4. Java Training Java Training | Java Course in Chennai Java Course in Chennai | Java Training Chennai Java Training Chennai | Online Java Training India Online Java Training

    ReplyDelete
  5. This works fine as the input K, V are unique values, this gives problems with duplicate keys while grouping. Correct me if I am wrong. Thanks

    ReplyDelete
  6. Thanks for providing this informative information you may also refer.
    http://www.s4techno.com/blog/2016/08/13/storm-components/

    ReplyDelete
  7. This comment has been removed by the author.

    ReplyDelete
  8. It is amazing and wonderful to visit your site.Thanks for sharing this information,this is useful to me...
    Android Training in Chennai
    Ios Training in Chennai

    ReplyDelete