目前最新版POI为3.8:
poi-3.8-20120326.jar
poi-examples-3.8-20120326.jar
poi-excelant-3.8-20120326.jar
poi-ooxml-3.8-20120326.jar
poi-ooxml-schemas-3.8-20120326.jar
poi-scratchpad-3.8-20120326.jar
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
public class WordDemo extends HttpServlet {
private static final long serialVersionUID = 1L;
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
this.doPost(request, response);
}
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
//从硬盘读取一个doc文档
InputStream in = new FileInputStream("F:\\test.doc");
//类从word文档中提取文本,非特殊情况下,都将使用getParagraphText()与getText()
WordExtractor word = new WordExtractor(in);
//获取段文本
String [] strArray = word.getParagraphText();
String str = word.getText();
for(int i=0 ; i<strArray.length ; i++){
System.out.println(strArray[i]+"\ti循环:"+i);
}
System.out.println(str +"\t --");
//这个构造函数从InputStream中加载Word文档。
HWPFDocument doc = new HWPFDocument((InputStream)new FileInputStream("F:\\test.doc"));
//这个类为HWPF对象模型,对文档范围段操作
Range range = doc.getRange(); //
//看看此文档有多少个段落
int num = range.numParagraphs();
System.out.println(num+"段");
//得到word数据流
byte [] dataStream = doc.getDataStream();
System.out.println("数据流长度:"+dataStream.length);
//用于在一段范围内获得段落数
int numChar = range.numCharacterRuns();
System.out.println("CharacterRuns 数:"+numChar);
//负责图像提取 和 确定一些文件某块是否包含嵌入的图像。
PicturesTable table = new PicturesTable(doc, dataStream, null);
for(int j=0 ; j<numChar ; j++){
//这个类表示一个文本运行,有着共同的属性。
CharacterRun run = range.getCharacterRun(j);
//是否存在图片
boolean bool = table.hasPicture(run);
System.out.println("是否存在图片:"+bool);
if(bool){
//返回图片对象绑定到指定的CharacterRun
Picture pic = table.extractPicture(run, true);
//图片的内容字节写入到指定的输出流。
pic.writeImageContent(new FileOutputStream("F:\\"+j+".bmp"));
System.out.println("成功提取图片"+j+":");
}
}
request.getRequestDispatcher("ok.jsp").forward(request, response);
}
}
可正常运行及提取图片