将word转html之后并将word中的图片转换为oss链接。先加入maven依赖:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.15</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.15</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.15</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.15</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>xdocreport</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.3</version>
</dependency>
poi版本都到4.X版本了,为啥还要用3.15版本呢?因为下面的代码如果使用高版本的poi会出现ClassNotFound的异常。其实根本的原因是不清楚高版本poi转换html的方法(笑)
- word2007版本
public String docxToHtml(InputStream in) {
String imagePath = "./word-images";
File imageFile = new File(imagePath);
if (!imageFile.exists()) {
imageFile.mkdir();
}
try {
XWPFDocument document = new XWPFDocument(in);
//存储图片
XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFile));
options.setExtractor(new FileImageExtractor(imageFile));
options.URIResolver((uri) -> {
log.info("docx 图片地址:{}", uri);
File imgFile = new File(imagePath +"\\" + uri);//获取图片
String imgName = System.currentTimeMillis() + "_" + imgFile.getName();//设置图片文件名
InputStream is = null;
try {
is = new FileInputStream(imgFile);
return OSSUploadUtil.upload(imgName, is);
} catch (IOException e) {
e.printStackTrace();
}
return "#";
});
ByteArrayOutputStream out = new ByteArrayOutputStream();
XHTMLConverter converter = new XHTMLConverter();
converter.convert(document, out, options);
return new String(out.toByteArray(), StandardCharsets.UTF_8);
} catch (IOException e) {
e.printStackTrace();
return "#";
}
}
-
word2003转换
public String docToHtml(InputStream in) { //加载word文档生成XWPF对象 try { HWPFDocument doc = new HWPFDocument(in); org.w3c.dom.Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document); //保存图片,并返回图片的相对路径 //将图片上传,调用上传的方法 wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> { try { String upload = OSSUploadUtil.upload(name, new ByteArrayInputStream(content)); return upload; } catch (UnsupportedEncodingException | FileNotFoundException e) { return "#"; } }); wordToHtmlConverter.processDocument(doc); org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument(); DOMSource domSource = new DOMSource(htmlDocument); ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); StreamResult streamResult = new StreamResult(byteArrayOutputStream); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); return new String(byteArrayOutputStream.toByteArray(), "UTF-8"); } catch (IOException | ParserConfigurationException | TransformerException e) { log.error("", e); } return null; }
注意:本文归作者所有,未经作者允许,不得转载