word转HTML 基本版

同时支持doc和docx,话不多说,直接上代码
项目依赖 pom.xml

<dependency>
    <groupId>commons-langgroupId>
    <artifactId>commons-langartifactId>
    <version>2.6version>
dependency>
<dependency>
    <groupId>org.apache.poigroupId>
    <artifactId>poiartifactId>
    <version>3.14version>
dependency>
<dependency>
    <groupId>org.apache.poigroupId>
    <artifactId>poi-ooxmlartifactId>
    <version>3.14version>
dependency>
<dependency>
    <groupId>org.apache.poigroupId>
    <artifactId>poi-scratchpadartifactId>
    <version>3.14version>
dependency>
<dependency>
    <groupId>fr.opensagres.xdocreportgroupId>
    <artifactId>org.apache.poi.xwpf.converter.xhtmlartifactId>
    <version>1.0.6version>
dependency>
<dependency>
    <groupId>net.sf.jtidygroupId>
    <artifactId>jtidyartifactId>
    <version>r938version>
dependency>

正式代码是

package com.zbj.spring.boot.util;

import lombok.Cleanup;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;

/**
 * WordToHtml
 *
 * @author weigang
 * @create 2017-10-13
 **/
public class WordToHtml {

    public static void main(String[] args) {
//      String sourceFileName = "D:/test/员工劳动合同.docx";
        String sourceFileName = "D:/test/员工劳动合同.doc";
        try {
            convertWordToHtml(sourceFileName);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        } catch (TransformerException e) {
            e.printStackTrace();
        }
    }

    public static void convertWordToHtml(String path) throws IOException, ParserConfigurationException, TransformerException {

        String htmlPath = "D:/test/index.html";
        String imagePath = "D:/test/image/";

        if (StringUtils.isBlank(path)) {
            return;
        }

        if (path.endsWith("docx")) { // 2007 及以后

            XWPFDocument document = new XWPFDocument(new FileInputStream(path));
            XHTMLOptions options = XHTMLOptions.create();
            options.setExtractor(new FileImageExtractor(new File(imagePath)));
            options.URIResolver(new BasicURIResolver("image"));

            @Cleanup OutputStreamWriter streamWriter = new OutputStreamWriter(new FileOutputStream(htmlPath));
            XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();
            xhtmlConverter.convert(document, streamWriter, options);
        } else { // 2003 之前

            HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(path));
            Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
            WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
            // 保存图片,并返回图片的相对路径
            wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
                File imageFile = new File(imagePath);
                if(!imageFile.exists()){
                    imageFile.mkdirs();
                }
                try (FileOutputStream out = new FileOutputStream(imagePath + name)) {
                    out.write(content);
                } catch (Exception e) {
                    e.printStackTrace();
                }
                return "image/" + name;
            });
            wordToHtmlConverter.processDocument(wordDocument);
            Document htmlDocument = wordToHtmlConverter.getDocument();
            DOMSource domSource = new DOMSource(htmlDocument);
            StreamResult streamResult = new StreamResult(new File(htmlPath));

            TransformerFactory tf = TransformerFactory.newInstance();
            Transformer serializer = tf.newTransformer();
            serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
            serializer.setOutputProperty(OutputKeys.INDENT, "yes");
            serializer.setOutputProperty(OutputKeys.METHOD, "html");
            serializer.transform(domSource, streamResult);
        }
    }
}

对于word文件,在网上随便下载个合同或者文件即可

你可能感兴趣的:(Java,html,word,转换)