PDF转html

1.引入gradle依赖

implementation 'net.sf.cssbox:pdf2dom:1.8'

2.定义自己处理图片的handler

package com.easipass.cms.config;

import com.alibaba.fastjson.JSONObject;
import com.easipass.cms.service.ApiService;
import com.easipass.commoncore.util.JsonUtils;
import org.fit.pdfdom.resource.HtmlResource;
import org.fit.pdfdom.resource.HtmlResourceHandler;
import java.io.IOException;

public class UploadImageHandler implements HtmlResourceHandler {

    ApiService apiService;
    private String devPath;

    public UploadImageHandler(ApiService initApiService, String initDevPath) {
        this.apiService = initApiService;
        this.devPath = initDevPath;
    }

    @Override
    public String handleResource(HtmlResource htmlResource) throws IOException {
        // 定义上传图片的方法
        try {
            String fileName = "pdfConvertPng.png";
            String responseUrl = apiService.uploadFile(fileName, htmlResource.getData());
            JSONObject result = JsonUtils.jsonToBean(responseUrl);
            String bigDataId = (String) result.get("data");
            String fileEndPath = fileName.substring(fileName.lastIndexOf(".") + 1);
            return devPath + bigDataId + "." + fileEndPath;
        } catch (Exception e) {
            throw new RuntimeException(e.getMessage());
        }
    }

}

3.Controller接口

/**
     * 解析pdf为html,转存图片到亿存
     */
    @PostMapping("/pdf2dom/convertPdfToHtml")
    public String convertPdfToHtml(@RequestBody MultipartFile file) {
        String htmlOutput = null;
        try (InputStream is = file.getInputStream()) {
            // Create PdfDocument instance
            PDDocument pdf = PDDocument.load(is);

            // 设置图片上传的handler
            PDFDomTreeConfig config = PDFDomTreeConfig.createDefaultConfig();
            UploadImageHandler handler = new UploadImageHandler(apiService,devPath);
            config.setImageHandler(handler);
            config.setFontHandler(handler);
            PDFDomTree parser = new PDFDomTree(config){
                @Override
                public Element createPageElement(){
                    String pstyle = "";
                    PDRectangle layout = getCurrentMediaBox();
                    if (layout != null)
                    {
                        float w = layout.getWidth();
                        float h = layout.getHeight();
                        final int rot = pdpage.getRotation();
                        if (rot == 90 || rot == 270)
                        {
                            float x = w; w = h; h = x;
                        }

                        pstyle = "width:" + w + UNIT + ";" + "height:" + h + UNIT + ";";
                        pstyle += "overflow:hidden;";
                        pstyle += "margin:0 auto;";
                    } else
                        log.warn("No media box found");

                    Element el = doc.createElement("div");
                    el.setAttribute("id", "page_" + (pagecnt++));
                    el.setAttribute("class", "page");
                    el.setAttribute("style", pstyle);
                    return el;
                }
            };

            // 反射修改defaultStyle中的css属性
            Field field = PDFDomTree.class.getDeclaredField("defaultStyle");
            field.setAccessible(true);
            field.set(parser, ".page{position:relative; margin:0.5em}\n" +
                    ".p,.r{position:absolute;}\n" +
                    // disable text-shadow fallback for text stroke if stroke supported by browser
                    "@supports(-webkit-text-stroke: 1px black) {" +
                    ".p{text-shadow:none !important;}" +
                    "}");

            Writer output = new StringWriter();
            parser.writeText(pdf, output);
            pdf.close();
            htmlOutput = output.toString();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return htmlOutput;
    }

你可能感兴趣的:(java,pdf,html,java)