java 利用 pdfbox 实现PDF转为图片

这个是我之前在上海做发票业务工单时弄的，记录一下。当时需求是要将发票的pdf转化为图片展示，并支持长按图片进行保存。

1.引用jar包

代码语言：javascript复制

    <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.16</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>fontbox</artifactId>
            <version>2.0.16</version>
        </dependency>

2.实现代码：

代码语言：javascript复制

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.icepdf.core.exceptions.PDFException;
import org.icepdf.core.exceptions.PDFSecurityException;
import org.icepdf.core.pobjects.Document;
import org.icepdf.core.pobjects.Page;
import org.icepdf.core.util.GraphicsRenderingHints;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;

public class PDF2ImageUtil {
    /**
     * 经过测试,dpi为96,100,105,120,150,200中,
     * 120,150,200显示效果较为清晰,体积稳定,dpi越高图片体积越大
     * 分享遇到的坑：
     * 1.如何解决 Linux 环境下乱码的问题：重写 UnixFontDirFinder 类，修改 Linux 环境下获取字体文件的路径，改为取项目里的字体文件（使用 pdfbox 转图片时的方法，使用 icepdf 请自行研究）
     * 2.如果后续遇到乱码的问题，查看日志看看缺少什么字体，然后将字体文件上传到项目的 src/main/resources/fonts 目录下即可
     */
    public static final float DEFAULT_DPI = 200;
    public static final String DEFAULT_FORMAT = "jpg";

    /**
     * pdf转图片，demo
     * （使用 pdfbox）
     * @param pdfPath PDF路径
     * @imgPath img路径
     * @page_end 要转换的页码，也可以定义开始页码和结束页码，根据需求自行添加
     */
    public static void pdfToImage(String pdfPath, String imgPath,int page_end) {
        try {
            //图像合并使用参数
            // 总宽度
            int width = 0;
            // 保存一张图片中的RGB数据
            int[] singleImgRGB;
            int shiftHeight = 0;
            //保存每张图片的像素值
            BufferedImage imageResult = null;
            //利用PdfBox生成图像
            PDDocument pdDocument = PDDocument.load(new File(pdfPath));
            PDFRenderer renderer = new PDFRenderer(pdDocument);
            //循环每个页码
            for (int i = 0, len = pdDocument.getNumberOfPages(); i < len; i  ) {
                if (i==page_end) {
                    BufferedImage image = renderer.renderImageWithDPI(i, DEFAULT_DPI, ImageType.RGB);
                    int imageHeight = image.getHeight();
                    int imageWidth = image.getWidth();
                    //计算高度和偏移量
                    //使用第一张图片宽度;
                    width = imageWidth;
                    //保存每页图片的像素值
                    imageResult = new BufferedImage(width, imageHeight, BufferedImage.TYPE_INT_RGB);
                    //这里有高度，可以将imageHeight*len，我这里值提取一页所以不需要
                    singleImgRGB = image.getRGB(0, 0, width, imageHeight, null, 0, width);
                    // 写入流中
                    imageResult.setRGB(0, shiftHeight, width, imageHeight, singleImgRGB, 0, width);
                }else i**粗体**f(i>page_end) {
                    continue;
                }

            }

            pdDocument.close();
            // 写图片
            ImageIO.write(imageResult, DEFAULT_FORMAT, new File(imgPath));

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 将PDF转化为图片
     * （使用 pdfbox）
     * @param pdDocument PDF对象
     * @param page_end 要转换的页码，发票一般是一页，取第一页
     * @return
     */
    public static BufferedImage pdfToImage(PDDocument pdDocument,int page_end) {
        //保存每张图片的像素值
        BufferedImage imageResult = null;
        try {
            //图像合并使用参数
            // 总宽度
            int width = 0;
            // 保存一张图片中的RGB数据
            int[] singleImgRGB;
            int shiftHeight = 0;

            //利用PdfBox生成图像
            PDFRenderer renderer = new PDFRenderer(pdDocument);

            //循环每个页码
            for (int i = 0, len = pdDocument.getNumberOfPages(); i < len; i  ) {
                if (i==page_end) {
                    BufferedImage image = renderer.renderImageWithDPI(i, DEFAULT_DPI, ImageType.RGB);
                    int imageHeight = image.getHeight();
                    int imageWidth = image.getWidth();
                    //计算高度和偏移量
                    //使用第一张图片宽度;
                    width = imageWidth;
                    //保存每页图片的像素值
                    imageResult = new BufferedImage(width, imageHeight, BufferedImage.TYPE_INT_RGB);
                    //这里有高度，可以将imageHeight*len，我这里值提取一页所以不需要
                    singleImgRGB = image.getRGB(0, 0, width, imageHeight, null, 0, width);
                    // 写入流中
                    imageResult.setRGB(0, shiftHeight, width, imageHeight, singleImgRGB, 0, width);
                }else if(i>page_end) {
                    continue;
                }

            }

            pdDocument.close();

        } catch (Exception e) {
            e.printStackTrace();
        }
        return imageResult;
    }

    /**
     * 将pdf转为图片（不建议使用）
     *（使用 icepdf）
     * @param pdfContent pdf数据流
     * @param zoom 缩略图显示倍数，1表示不缩放，0.3则缩小到30%，倍数越大越清晰，图片也越大，转换得也越慢
     * @return
     * @throws PDFException
     * @throws PDFSecurityException
     * @throws IOException
     */
    public static ByteArrayOutputStream tranferPDF2Img(byte[] pdfContent, float zoom) throws PDFException, PDFSecurityException, IOException {
        Document document = null;
        float rotation = 0f;// 旋转角度
        if(pdfContent == null){
            throw new RuntimeException("pdf文件内容不能为空");
        }
        ByteArrayInputStream bin = new ByteArrayInputStream(pdfContent);
        ByteArrayOutputStream out =  new ByteArrayOutputStream();
        document = new Document();
        document.setInputStream(bin, null);
        int maxPages = document.getPageTree().getNumberOfPages();

        for (int i = 0; i < maxPages; i  ) {
            BufferedImage img = null;
            try {
                img = (BufferedImage) document.getPageImage(i, GraphicsRenderingHints.SCREEN, Page.BOUNDARY_CROPBOX, rotation, zoom);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }

            try {
                ImageIO.write(img, DEFAULT_FORMAT, out);
            } catch (IOException e) {
                throw new RuntimeException("pdf内容读取异常", e);
            }
            img.flush();

        }
        return out;
    }

}

3.获取到流后调用方法转为图片返回给前台

代码语言：javascript复制

// 开发中可以直接读取文件，测试、生产时代码中改为加载 InputStream
PDDocument pdDocument = PDDocument.load(new File("F:\destop\1.pdf"));
BufferedImage bufferedImage= PDF2ImageUtil.pdfToImage(pdDocument, 0);
ImageIO.write(bufferedImage, "jpg", outImage);

4.遇到的问题

当时放到测试环境后一直显示乱码，看了下报错是说字体不存在。第一个想法是在主机上安装字体，但是又有问题了，生产不可能这样吧，运维也不同意啊。

想想还是研究研究 pdfbox 的源码吧，分析后发现它是根据不同系统来读取字体的文件夹的，然后一个同事建议我重写读写 Linux 系统文件的类，指向我们项目的文件夹，然后在项目新建一个文件夹来存放需要的字体。（感谢耀光！）

Linux 读取的是以下这几个目录： "/usr/local/fonts", "/usr/local/share/fonts", "/usr/share/fonts", "/usr/X11R6/lib/X11/fonts"

MAC： "/Library/Fonts/", "/Library/Fonts/", "/System/Library/Fonts/", "/Network/Library/Fonts/"

Windows: C:WindowsFonts

说干就干，将目录指向我新建的font文件夹，果然ok了。需要注意的是，后面如果pdf有用到新的字体，就需要将对应的字体下载下来，放到该目录下。

代码语言：javascript复制

package org.apache.fontbox.util.autodetect;

import com.ai.ecs.h5.view.common.pdf.PdfController;

public class UnixFontDirFinder extends NativeFontDirFinder {
    public UnixFontDirFinder() {
    }

    protected String[] getSearchableDirectories() {
        return new String[]{PdfController.class.getResource("/").getPath() "/fonts/"};
    }
}

如何解决 Linux 环境下乱码的问题：重写 UnixFontDirFinder 类，修改 Linux 环境下获取字体文件的路径，改为取项目里的字体文件

Copyright: 采用知识共享署名4.0 国际许可协议进行许可 Links: https://cloud.tencent.com/developer/article/2020434

linux windows 费用中心

0 人点赞