Loading... 代码如下 ```dart package com.cong.quartz.util; import org.apache.commons.io.FileUtils; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.PicturesManager; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.usermodel.PictureType; import org.apache.poi.xwpf.converter.core.BasicURIResolver; import org.apache.poi.xwpf.converter.core.FileImageExtractor; import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter; import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.w3c.dom.Document; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import java.io.*; import java.util.List; /** * word 转换成html */ public class WordToHtml { /** * docx转换成html */ public static void Word2007ToHtml(String datestr,String id,String docpath,String htmlpath, String docname, String htmlname) throws IOException { ///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/doc/202010/0e5b5829881647248a58c4fd94cb41e3.docx final String file = docpath + "/" + datestr + "/" + docname; File f = new File(file); if (!f.exists()) { System.out.println("Sorry File does not Exists!"); } else { if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) { // 1) 加载word文档生成 XWPFDocument对象 InputStream in = new FileInputStream(f); XWPFDocument document = new XWPFDocument(in); // 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录) ///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/html/202010/80c0ce15d6e940408b157f00f10d523d/image5.png File htmlFolderFile = new File(htmlpath + "/" + datestr); if(!htmlFolderFile.exists()){ htmlFolderFile.mkdirs(); } File imageFolderFile = new File(htmlpath + "/" + datestr+ "/" + id); if(!imageFolderFile.exists()){ imageFolderFile.mkdirs(); } //XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile)); XHTMLOptions options = XHTMLOptions.create();; options.setExtractor(new FileImageExtractor(imageFolderFile)); //图片位置---这里需要改变 options.URIResolver(new BasicURIResolver(id)); options.setIgnoreStylesIfUnused(false); options.setFragment(true); // 2) Prepare XHTML options (here we set the IURIResolver to load images from a "word/media" folder) // 3) 将 XWPFDocument转换成XHTML ///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/html/202010/00b4fe3d59ac486187f2f5173e359075.html String targetFileName = htmlpath + "/" + datestr + "/" + htmlname; OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(targetFileName), "utf-8"); XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance(); xhtmlConverter.convert(document, outputStreamWriter, options); //OutputStream out = new FileOutputStream(new File(htmlpath + "/" + datestr + "/" + htmlname)); //XHTMLConverter.getInstance().convert(document, out, options); } else { System.out.println("Enter only MS Office 2007+ files"); } } } /** * doc转换成html */ public static void convert2Html(String datestr,String id,String docpath,String htmlpath,String docname, String htmlname) throws TransformerException, IOException, ParserConfigurationException { ///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/doc/202010/0e5b5829881647248a58c4fd94cb41e3.docx File htmlFolderFile = new File(htmlpath + "/" + datestr); if(!htmlFolderFile.exists()){ htmlFolderFile.mkdirs(); } File imageFolderFile = new File(htmlpath + "/" + datestr+ "/" + id); if(!imageFolderFile.exists()){ imageFolderFile.mkdirs(); } HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(docpath+"/"+datestr+"/"+docname));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder() .newDocument()); wordToHtmlConverter.setPicturesManager( new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches ) { return htmlpath + "/" + datestr + "/" + id + "/" +suggestedName; } } ); wordToHtmlConverter.processDocument(wordDocument); //save pictures List pics=wordDocument.getPicturesTable().getAllPictures(); if(pics!=null){ for(int i=0;i<pics.size();i++){ Picture pic = (Picture)pics.get(i); System.out.println(); try { ///mnt/ChinaApp/Tomcat/apache-tomcat-8-8060/webapps/PWebService/html/202010/80c0ce15d6e940408b157f00f10d523d/image5.png pic.writeImageContent(new FileOutputStream(htmlpath + "/" + datestr + "/" + id + "/" + pic.suggestFullFileName())); } catch (FileNotFoundException e) { e.printStackTrace(); } } } Document htmlDocument = wordToHtmlConverter.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); //serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312"); serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close(); String content = new String(out.toByteArray()); String imgWebPath = htmlpath + "/" + datestr + "/" + id; content = content.replace(imgWebPath, id); String targetFilePath = htmlpath + "/" + datestr + "/" +htmlname; FileUtils.writeStringToFile(new File(targetFilePath), content, "utf-8"); //writeFile(new String(out.toByteArray()), htmlpath + "/" + datestr + "/" +htmlname); } public static void writeFile(String content, String path) { FileOutputStream fos = null; BufferedWriter bw = null; try { File file = new File(path); fos = new FileOutputStream(file); // bw = new BufferedWriter(new OutputStreamWriter(fos,"GB2312")); bw = new BufferedWriter(new OutputStreamWriter(fos,"UTF-8")); bw.write(content); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } finally { try { if (bw != null) bw.close(); if (fos != null) fos.close(); } catch (IOException ie) { } } } } ``` 最后修改:2022 年 01 月 10 日 © 允许规范转载 打赏 赞赏作者 支付宝微信 赞 如果觉得我的文章对你有用,请随意赞赏