1/21/2015

Hadoop big data: Converting Text files to Sequence File

Hadoop big data: Converting Text files to Sequence File.

package com.my.hadoop.example3;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;



public class ConvertTextToSequenceFile {  
 
   @SuppressWarnings("deprecation")
   public static void main(String[] args) throws IOException,
           InstantiationException, IllegalAccessException {
       // TODO Auto-generated method stub

       Configuration conf = new Configuration();
       conf.addResource(new Path("C:/hadoop-2.5.1/etc/hadoop/core-site.xml"));
       conf.addResource(new Path("C:/hadoop-2.5.1/etc/hadoop/hdfs-site.xml"));
       FileSystem fs = FileSystem.get(conf);
       Path inputFile = new Path("word/test1.txt");
       FSDataInputStream inputStream = fs.open(inputFile);
       Path outputFile = new Path("outputSEQ.lz");
       IntWritable key = new IntWritable();
       int count = 0;
       Text value = new Text();    
       String str;
       SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf,outputFile, key.getClass(), value.getClass());
       while (inputStream.available() > 0) {
           key.set(count++);
           str = inputStream.readLine();
           value.set(str);
           writer.append(key, value);
       }
       fs.close();
       IOUtils.closeStream(writer);
       System.out.println("SEQUENCE FILE CREATED SUCCESSFULLY........");
   }}

No comments:

Post a Comment