利用apache 自带 WordExtractor 读取 word内容

package com.linewell.asmp;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;

import org.apache.poi.hwpf.extractor.WordExtractor;

public class TestMain{

public static String getContent(){
	
	String content ="";
    try{
    	
    	// 创建输入流读取doc文件
	    String doc="H:/test.doc";
	    //FileInputStream in = new FileInputStream(new File(doc));
	    InputStream in = new FileInputStream(new File(doc));
	    // 创建WordExtractor
	    WordExtractor extractor = new WordExtractor(in);
	    // 对doc文件进行提取
	  	String[] text = extractor.getParagraphText();
	  	
	  	String str = extractor.getTextFromPieces();
	  	
   		System.out.println(str);
	   	   
   }catch(Exception e){
	   System.out.println(e.getMessage());
   }
   return content; 
}
public static void main(String[] args) {
	getContent();
}

}

你可能感兴趣的:(利用apache 自带 WordExtractor 读取 word内容)