Configuration conf = context.getConfiguration(); FileSystem fs = FileSystem.get(conf); // FSDataInputStream fin = fs.open(new Path(conf.get("emotionPath"))); FSDataInputStream fin = fs.open(new Path("/user/lvxinjian/negative.txt")); BufferedReader in = null; String line; try { in = new BufferedReader(new InputStreamReader(fin, "UTF-8")); while ((line = in.readLine()) != null) { wordSet.add(line); } System.out.println(wordSet.size()); } finally { if(in != null) in.close(); }
public class GetSentenceWithPos { public void read () throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); SequenceFile.Reader sreader = null; try { sreader = new SequenceFile.Reader(fs, new Path("/user/lvxinjian/tfidf/mediafile/dictionary.file-0"),conf); Text key = new Text();// key 和 value的类型要和当前读取文件的key val 一致 IntWritable val = new IntWritable(); HashMap<Integer , String> WordList = new HashMap<Integer, String>();//词典 System.out.println("load dictionary 0..."); while (sreader.next(key, val)) { WordList.put(val.get(),key.toString()); } System.out.println("load dictionary 1..."); sreader = null; sreader = new SequenceFile.Reader(fs, new Path("/user/lvxinjian/tfidf/mediafile/dictionary.file-1"),conf); while (sreader.next(key, val)) { WordList.put(val.get(),key.toString()); } System.out.println("load dictionary 2..."); sreader = null; sreader = new SequenceFile.Reader(fs, new Path("/user/lvxinjian/tfidf/mediafile/dictionary.file-2"),conf); while (sreader.next(key, val)) { WordList.put(val.get(),key.toString()); } Configuration conf1 = new Configuration();; FileSystem fs2 = FileSystem.get(conf1); FSDataInputStream fin = fs2.open(new Path("/user/lvxinjian/showTfidf49AllData/part-r-00000")); BufferedReader in = null; String line; System.out.println("load wordindex_count..."); ArrayList<String> wordInfo = new ArrayList<String>(); //mapreduce结果 in = new BufferedReader(new InputStreamReader(fin, "UTF-8")); while ((line = in.readLine()) != null) { wordInfo.add(line); } System.out.println("sizef:\t"+ wordInfo.size()); System.out.println("get word ..."); ArrayList<String> lstResult = new ArrayList<String>(); int count = 0; for(String str : wordInfo){ if(count % 1000 == 0) System.out.println(count); count++; String [] arr = str.split("\t"); if(arr.length != 2) continue; if(WordList.containsKey(Integer.parseInt(arr[0]))){ String word = WordList.get(Integer.parseInt(arr[0])); lstResult.add(word + "\t" + arr[1]); } } System.out.println("saving...."); FileTool.SaveListToFile(lstResult, "./2013052802.txt", false, Charset.forName("utf-8")); } finally { IOUtils.closeStream(sreader); } } static public void main(String [] args) { try { GetSentenceWithPos getSentenceWithPos = new GetSentenceWithPos(); getSentenceWithPos.read(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }