今日主题:java解析word文档内容
1依赖
代码语言:javascript复制 <dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.xmlbeans</groupId>
<artifactId>xmlbeans</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
<version>4.1</version>
</dependency>
只需要前面两个依赖就好了
2实现
代码语言:javascript复制/**
* @author : xujp0818
* @date : 2021/7/3 20:49
*/
@RestController
public class WordAction {
@Autowired
private WordContextReadService wordContextReadService;
/**
* word文档上传解析
*
* @return
*/
@PostMapping("/wordUpload")
public String wordUpload(@RequestParam("wordUploader") MultipartFile mFile) {
OutputStream os = null;
try {
// 段落
int word = 0;
// 总段落字数
int wordNum = 0;
// 图片
int picture = 0;
// 视频
int video = 0;
String buffer = "";
XWPFDocument xwpfDocument = new XWPFDocument(mFile.getInputStream());
POIXMLTextExtractor ex = new XWPFWordExtractor(xwpfDocument);
buffer = ex.getText();
ex.close();
List<String> stringList = DocUtil.pharaNum(buffer);
if (!CollectionUtils.isEmpty(stringList)) {
word = stringList.size();
}
String wordString = String.join("", stringList);
wordNum = wordString.length();
List<XWPFParagraph> paragraphList = xwpfDocument.getParagraphs();
List<String> picList = new ArrayList<>();
// 统计素材元素个数
for (int i = 0; i < paragraphList.size(); i ) {
List<XWPFRun> runList = paragraphList.get(i).getRuns();
for (XWPFRun run : runList) {
CTR ctr = run.getCTR();
String params = ctr.toString();
if (params.contains("a:graphicData")) {
picture ;
List<String> pictureIds = wordContextReadService
.readImageInParagraph(paragraphList.get(i));
if (CollectionUtils
.isNotEmpty(pictureIds)) {
for (String pictureId : pictureIds) {
picList.add(pictureId);
}
}
} else if (params.contains("v:imagedata")) {
video ;
}
}
}
int count = 0;
int aa = 0;
for (String s : stringList) {
if (StringUtils.isNotBlank(s)) {
if (s.contains("视频地址:")) {
System.out.println("视频:" s);
}
} else {
String picId = picList.get(aa);
XWPFPictureData pictureData = xwpfDocument
.getPictureDataByID(picId);
byte[] data = pictureData.getData();
aa ;
//将该图片的字节数组保存到本地的一张图片
wordContextReadService.write(data,"e:/test/" System.currentTimeMillis() ".png");
}
}
System.out.println("文字字数:" wordNum ",图片:" picture ",视频:" video);
} catch (Exception e) {
e.printStackTrace();
return "上传失败";
}
return "上传成功";
}
}
如果想看详细的步骤,请看我的B站视频:https://www.bilibili.com/video/BV1Uv411H7Ds?share_source=copy_web