POI读取Word内容及格式,看这一篇就够了!

2021-02-02 10:57:45 浏览数 (1)

ps:获取实例代码请直接拉倒最后

效果

pom依赖

代码语言:javascript复制
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi</artifactId>
      <version>4.1.2</version>
    </dependency>
    <dependency>
      <groupId>cn.hutool</groupId>
      <artifactId>hutool-all</artifactId>
      <version>5.5.7</version>
    </dependency>

    <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-ooxml</artifactId>
      <version>4.1.2</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas -->
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-ooxml-schemas</artifactId>
      <version>4.1.2</version>
    </dependency>
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>ooxml-schemas</artifactId>
      <version>1.1</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
    <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-scratchpad</artifactId>
      <version>4.1.2</version>
    </dependency>

加载文档

代码语言:javascript复制
  InputStream is = new FileInputStream("C:\Users\10386\Desktop\word-正确文档2.docx");
  XWPFDocument doc = new XWPFDocument(is);

页眉页脚

代码语言:javascript复制
    XWPFHeaderFooterPolicy headerFooterPolicy = doc.getHeaderFooterPolicy();
    //获取页眉
    String header = headerFooterPolicy.getDefaultHeader().getText();
    System.out.println("***页眉 ***" header);
    //获取页脚
    String footer = headerFooterPolicy.getDefaultFooter().getText();
    System.out.println("***页脚 ***" header);

页眉边距

代码语言:javascript复制
    CTDocument1 ctdoc =  doc.getDocument();
    int top = ctdoc.getBody().getSectPr().getPgMar().getTop().intValue();
    int bottom = ctdoc.getBody().getSectPr().getPgMar().getBottom().intValue();
    int left = ctdoc.getBody().getSectPr().getPgMar().getLeft().intValue();
    int right = ctdoc.getBody().getSectPr().getPgMar().getRight().intValue();

获取标题

代码语言:javascript复制
   List<XWPFParagraph> paras = doc.getParagraphs(); //将得到包含段落列表
    //获取标题
    List<Map<String, String>> list = getParagraph(paras.get(0));
    System.out.println("标题信息===" list);

获取表格

代码语言:javascript复制
        int row_count =0;
        XWPFTable table = (XWPFTable) element;
        List<XWPFTableRow> xwpfTableRows = table.getRows();
        row_count = xwpfTableRows.size();
        ArrayList cell_count=new ArrayList();
        int row_index = 1;
        for (XWPFTableRow xwpfTableRow : xwpfTableRows) {
          List<XWPFTableCell> xwpfTableCells = xwpfTableRow.getTableCells();
          cell_count.add(xwpfTableCells.size());
          System.out.println("第" row_index "行");
          int cell_index =1;
          for (XWPFTableCell xwpfTableCell : xwpfTableCells) {
            //单元格是否被合并,合并了几个
            CTDecimalNumber  cellspan = xwpfTableCell.getCTTc().getTcPr().getGridSpan();
            boolean gridspan = cellspan != null;
            String gridspan_num = cellspan != null?cellspan.getVal().toString():"0";
            List<XWPFParagraph> xwpfParagraphs = xwpfTableCell.getParagraphs();
            XWPFParagraph paragraph = xwpfParagraphs.get(0);
            System.out.println("第"  cell_index "个单元格,合并标志:" gridspan ",合并个数:" gridspan_num
             "文字:" getParagraph(paragraph));
            cell_index  ;
          }
          row_index  ;
        }
        System.out.println("表格为:row_count===" row_count "行" Collections.max(cell_count) "列");

获取图片

代码语言:javascript复制
List<XWPFPicture> pictures = run.getEmbeddedPictures();
            if (pictures.size() > 0) {
                XWPFPicture picture = pictures.get(0);
                XWPFPictureData pictureData = picture.getPictureData();
                System.out.println(Base64.encode(pictureData.getData()));
            }

缩进方式计算

代码语言:javascript复制
//先判断缩进方式再进行数值计算
        double ind = -1, ind_left = -1, ind_right = -1, ind_hang = -1;
        String ind_type = "";
        if (para.getIndentationHanging() != -1) {//悬挂缩进
            ind_type = "hang";
            if (para.getIndentationHanging() % 567 == 0) {//悬挂单位为厘米
                ind = para.getIndentationHanging() / 567.0;
                ind_left = (para.getIndentationLeft() - 567.0 * ind) / 210;
            } else {//悬挂单位为字符
                ind = para.getIndentationHanging() / 240;
                ind_left = (para.getIndentationLeft() - para.getIndentationHanging()) / 210;
            }
            ind_right = para.getIndentationRight() / 210.0;
        } else {//首行缩进或者无
            ind_type = "first";
            if (para.getFirstLineIndent() == -1) {
                ind_type = "none";
                ind = 0;
            } else {
                ind = para.getFirstLineIndent() % 567.0 == 0 ? para.getFirstLineIndent() / 567.0 : para.getFirstLineIndent() / 240.0;
            }
            ind_left = para.getIndentationLeft() / 210;
            ind_right = para.getIndentationRight() / 210.0;
        }

段落格式

代码语言:javascript复制
List<XWPFParagraph> paras = doc.getParagraphs(); //将得到包含段落列表
 XWPFParagraph  para =  paras.get(1);
 List<XWPFRun> runsLists = para.getRuns();
        List<Map<String, String>> list = new ArrayList<>();
        Map<String, String> titile = new HashMap<>();
        titile.put("Text", para.getText());//本段全部内容
        titile.put("Alignment", para.getAlignment().toString());
        titile.put("SpacingBetween", para.getSpacingBetween()   "");//行距
        titile.put("SpacingBeforeLines", para.getSpacingBeforeLines()   "");//段前
        titile.put("SpacingAfterLines", para.getSpacingAfterLines()   "");//段后
        titile.put("NumLevelText", para.getNumLevelText()   "");//自动编号格式

文字属性

代码语言:javascript复制
      List<XWPFRun> runsLists = para.getRuns()
        for (XWPFRun run : runsLists
        ) {
            List<XWPFPicture> pictures = run.getEmbeddedPictures();
            if (pictures.size() > 0) {
                XWPFPicture picture = pictures.get(0);
                XWPFPictureData pictureData = picture.getPictureData();
                System.out.println(Base64.encode(pictureData.getData()));
            }

            Map<String, String> titile_map = new HashMap<>();
            titile_map.put("content", run.getText(0));
            String Bold = Boolean.toString(run.isBold());//加粗
            titile_map.put("Bold", Bold);
            String color = run.getColor();//字体颜色
            titile_map.put("Color", color);

            String FontFamily = run.getFontFamily(XWPFRun.FontCharRange.hAnsi);//字体
            titile_map.put("FontFamily", FontFamily);

            String FontName = run.getFontName();//字体
            titile_map.put("FontName", FontName);

            String FontSize = run.getFontSize()   "";//字体大小
            titile_map.put("FontSize", FontSize);

            String Underline = run.getUnderline().name();//字下加线
            titile_map.put("Underline", Underline);

            String UnderlineColor = run.getUnderlineColor();//字下加线颜色
            titile_map.put("UnderlineColor", UnderlineColor);

            String Italic = Boolean.toString(run.isItalic());//字体倾斜
            titile_map.put("Italic", Italic);
            list.add(titile_map);

示例代码:

链接:https://pan.baidu.com/s/1mjVQ3x22MZJi1OZw0H6fuQ

提取码:6lu6

0 人点赞