Hadoop big data: Converting Text files to Sequence File.
package com.my.hadoop.example3; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; public class ConvertTextToSequenceFile { @SuppressWarnings("deprecation") public static void main(String[] args) throws IOException, InstantiationException, IllegalAccessException { // TODO Auto-generated method stub Configuration conf = new Configuration(); conf.addResource(new Path("C:/hadoop-2.5.1/etc/hadoop/core-site.xml")); conf.addResource(new Path("C:/hadoop-2.5.1/etc/hadoop/hdfs-site.xml")); FileSystem fs = FileSystem.get(conf); Path inputFile = new Path("word/test1.txt"); FSDataInputStream inputStream = fs.open(inputFile); Path outputFile = new Path("outputSEQ.lz"); IntWritable key = new IntWritable(); int count = 0; Text value = new Text(); String str; SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf,outputFile, key.getClass(), value.getClass()); while (inputStream.available() > 0) { key.set(count++); str = inputStream.readLine(); value.set(str); writer.append(key, value); } fs.close(); IOUtils.closeStream(writer); System.out.println("SEQUENCE FILE CREATED SUCCESSFULLY........"); }}
No comments:
Post a Comment