hadoop处理sequenceFile文件

代码为:

package sequence;

import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.io.IOUtils;

public class SeqRead {//读取sequenceFile文件
	public static void main(String[] args) throws Exception{
		Configuration conf = new Configuration();
		
		FileSystem fs = FileSystem.get(conf);//指定格式为当前hdfs:lx-zhujiming:9000
		
		Path path = new Path("/qy/sourcedata/sina-blog-08/part00");//hdfs格式加上此处路径,得到完全路径
		
		SequenceFile.Reader reader = null;
		
		try{
			reader = new SequenceFile.Reader(fs, path,conf);
			Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
			
			Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
			
			while(reader.next(key,value)){
				System.out.println(key+"\t"+value);
				break;
			}
		}catch (Exception e){
			e.printStackTrace();
		}finally{
			IOUtils.closeStream(reader);
		}
	}
}

执行方法:


hadoop jar filename.jar

你可能感兴趣的:(hadoop处理sequenceFile文件)