hdfs 简单的api 读写文件

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.InputStreamReader;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class TestRead {

	/**
	 * @param args
	 * @throws Throwable 
	 */
	public static void main(String[] args) throws Throwable {
		String path = args[0];
		String confFile = args[1];
		Configuration conf = new Configuration(); 
		FileInputStream fis = new FileInputStream(confFile);
		conf.addResource(fis);
		FileSystem fileSystem = FileSystem.get(conf);
		FSDataInputStream fs = fileSystem.open(new Path(path));
		BufferedReader bis = new BufferedReader(new InputStreamReader(fs,"GBK"));	
		FileWriter fos = new FileWriter(args[2]);
		BufferedWriter bw = new BufferedWriter(fos);
        String temp;
        int i=0;
	    while ((temp = bis.readLine()) != null) {
	    	bw.write(temp);
	    	System.out.println(temp);
	    	if(temp.startsWith("</doc>")) {
	    		break;
	    	}
	    	if(temp.indexOf("上海") > -1) {
	    		System.out.println("发现你了 coming");
	    		break;
	    	}
	    }	    
	    bw.close();
	    bis.close();
	    fileSystem.close();
	}

}

 

需要注意的几点:

1:读写file最好都用BufferedXXX

2:如果是读取byte 写文件,不存在字符集问题

3:如果是读取字符请注意 字符集编码问题,例如我是设置读取GBK编码

InputStreamReader(fs,"GBK")

你可能感兴趣的:(hdfs)