import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.InputStream; import java.io.OutputStream; import java.util.List; import org.docx4j.Docx4J; import org.docx4j.convert.out.FOSettings; import org.docx4j.fonts.IdentityPlusMapper; import org.docx4j.fonts.Mapper; import org.docx4j.fonts.PhysicalFont; import org.docx4j.fonts.PhysicalFonts; import org.docx4j.openpackaging.exceptions.Docx4JException; import org.docx4j.openpackaging.exceptions.InvalidFormatException; import org.docx4j.openpackaging.packages.WordprocessingMLPackage; import org.docx4j.wml.P; import org.docx4j.wml.R; import org.docx4j.wml.STBrType; public class WordUtil { /** * * @param xwpfDocument poi包里的类 * @return * @throws IOException * @throws Docx4JException */ // public static WordprocessingMLPackage loadMLPackage( // XWPFDocument xwpfDocument) throws IOException, // Docx4JException { // InputStream is = null; // try (ByteArrayOutputStream out = new ByteArrayOutputStream();) { // xwpfDocument.write(out); // is = new ByteArrayInputStream(out.toByteArray()); // return WordprocessingMLPackage.load(is); // } finally { // IOUtils.closeQuietly(is); // } // } /** * 将word输出到某个pdf文件savePdfPath中 * @param wordMLPackage * @param savePdfPath * @throws Exception */ public static void convertWordToPdf(WordprocessingMLPackage wordMLPackage,String savePdfPath) throws Exception{ OutputStream os = new java.io.FileOutputStream(savePdfPath); convertWordToPdf(wordMLPackage,os); } /** * 将word文件保存输出到输出流 * @param wordMLPackage * @param os * @throws Exception */ public static void convertWordToPdf(WordprocessingMLPackage wordMLPackage,OutputStream os ) throws Exception { // Font regex (optional) // Set regex if you want to restrict to some defined subset of fonts // Here we have to do this before calling createContent, // since that discovers fonts String regex = null; // Windows: // String // regex=".*(calibri|camb|cour|arial|symb|times|Times|zapf).*"; regex=".*(simsun|calibri|camb|cour|courier new|arial|times|comic|georgia|impact|LSANS|pala|tahoma|trebuc|verdana|symbol|webdings|wingdings|palatino).*"; // Mac // String // regex=".*(Courier New|Arial|Times New Roman|Comic Sans|Georgia|Impact|Lucida Console|Lucida Sans Unicode|Palatino Linotype|Tahoma|Trebuchet|Verdana|Symbol|Webdings|Wingdings|MS Sans Serif|MS Serif).*"; //PhysicalFonts.setRegex(regex); PhysicalFonts.setRegex(regex); // Set up font mapper (optional) Mapper fontMapper = new IdentityPlusMapper(); // .. example of mapping font Times New Roman which doesn't have certain Arabic glyphs // eg Glyph "ي" (0x64a, afii57450) not available in font "TimesNewRomanPS-ItalicMT". // eg Glyph "ج" (0x62c, afii57420) not available in font "TimesNewRomanPS-ItalicMT". // to a font which does PhysicalFont font = PhysicalFonts.get("arial unicode ms"); // make sure this is in your regex (if any)!!! if (font!=null) { fontMapper.put(Mapper.FONT_FALLBACK, font); } //fontMapper.getFontMappings().put("Libian SC Regular", PhysicalFonts.getPhysicalFonts().get("SimSun")); wordMLPackage.setFontMapper(fontMapper); // FO exporter setup (required) // .. the FOSettings object FOSettings foSettings = Docx4J.createFOSettings(); foSettings.setWmlPackage(wordMLPackage); // Specify whether PDF export uses XSLT or not to create the FO // (XSLT takes longer, but is more complete). // Don't care what type of exporter you use Docx4J.toFO(foSettings, os, Docx4J.FLAG_EXPORT_PREFER_XSL); // Prefer the exporter, that uses a xsl transformation // Docx4J.toFO(foSettings, os, Docx4J.FLAG_EXPORT_PREFER_XSL); // Prefer the exporter, that doesn't use a xsl transformation (= uses a visitor) // .. faster, but not yet at feature parity // Docx4J.toFO(foSettings, os, Docx4J.FLAG_EXPORT_PREFER_NONXSL); } // public InputStream transOutputStreamToInputStream(ByteArrayOutputStream out){ // InputStream is = null; // is = new ByteArrayInputStream(out.toByteArray()); // return is; // ByteArrayOutputStream out = new ByteArrayOutputStream() // } /** * 创建分页符 * @return */ public static P getPageBreak() { P p = new P(); R r = new R(); org.docx4j.wml.Br br = new org.docx4j.wml.Br(); br.setType(STBrType.PAGE); r.getContent().add(br); p.getContent().add(r); return p; } /** * 合并成word文档 * * @param newMLPackage * @param srcMLPackage * @param b * @throws InvalidFormatException */ public static WordprocessingMLPackage mergeWord2010Util(List<WordprocessingMLPackage> srcMLPackageList, boolean b) throws InvalidFormatException { WordprocessingMLPackage newMLPackage = WordprocessingMLPackage.createPackage(); int l=srcMLPackageList.size(); for(int i=0;i<l;i++){ WordprocessingMLPackage item = srcMLPackageList.get(i); List<Object> list = item.getMainDocumentPart().getContent(); for (Object obj : list) { newMLPackage.getMainDocumentPart().addObject(obj); } if (b) {// 判断是否需要换页符 if( i != (l-1)){ newMLPackage.getMainDocumentPart().addObject(WordUtil.getPageBreak()); } } } if ( l == 0) return null; return newMLPackage; } /** * 将WordprocessingMLPackage导出到输出流 * @param wordMLPackage * @param baos * @throws Docx4JException */ public static void convertWordprocessingMLPackageToOutputStream(WordprocessingMLPackage wordMLPackage,ByteArrayOutputStream out) throws Docx4JException{ FOSettings foSettings = Docx4J.createFOSettings(); foSettings.setWmlPackage(wordMLPackage); // want the fo document as the result. foSettings.setApacheFopMime(FOSettings.INTERNAL_FO_MIME); // ByteArrayOutputStream baos = new ByteArrayOutputStream(); //Don't care what type of exporter you use // Docx4J.toFO(foSettings, os, Docx4J.FLAG_NONE); //Prefer the exporter, that uses a xsl transformation Docx4J.toFO(foSettings, out, Docx4J.FLAG_EXPORT_PREFER_XSL); } /** * 将wordMLPackage转成输出流 * @param wordMLPackage * @return * @throws Docx4JException */ public InputStream wordprocessingMLPackageToInputStream(WordprocessingMLPackage wordMLPackage) throws Docx4JException{ InputStream is = null; ByteArrayOutputStream out = new ByteArrayOutputStream(); convertWordprocessingMLPackageToOutputStream(wordMLPackage, out); is = new ByteArrayInputStream(out.toByteArray()); return is; } }
工具测试类
import java.util.ArrayList; import java.util.List; import myutils.WordUtil; import org.docx4j.openpackaging.packages.WordprocessingMLPackage; public class WordUtilTest { private static String[] docnames = {"\\myexamples\\file_1.docx","\\myexamples\\file_2.docx"}; private static String out_put_docnames = "\\myexamples\\files.docx"; private static String out_put_pdf = "\\myexamples\\files.pdf"; public static void main(String[] args) throws Exception { WordUtilTest.mergeWord2010UtilTest(); } public static void mergeWord2010UtilTest() throws Exception{ List<WordprocessingMLPackage> srcPkgList=new ArrayList<WordprocessingMLPackage>(); for(int i = 0,l = docnames.length;i<l;i++){ String docName = docnames[i]; String docPath = System.getProperty("user.dir")+docName; WordprocessingMLPackage wmlSourcePkg = WordprocessingMLPackage.load(new java.io.File(docPath)); srcPkgList.add(wmlSourcePkg); } // XWPFDocument document = new XWPFDocument(); // ByteArrayOutputStream out = new ByteArrayOutputStream(); // document.write(out ); // InputStream is = new ByteArrayInputStream(out.toByteArray()); WordprocessingMLPackage newMLPackage = WordUtil.mergeWord2010Util( srcPkgList, true); //保存成文件 //newMLPackage.save(new File( System.getProperty("user.dir")+out_put_docnames)); // ByteArrayOutputStream out = new ByteArrayOutputStream(); // WordUtil.convertWordprocessingMLPackageToOutputStream(newMLPackage, out); // System.out.println(out.toByteArray().length); //转成PDF WordUtil.convertWordToPdf(newMLPackage, System.getProperty("user.dir")+out_put_pdf); } }