CSharp中PDF转图片、Word转图片

2023-04-27 15:23:33 浏览数 (2)

使用PDFRender4NET

无水印DLL

链接:https://pan.baidu.com/s/1HILw9Ztl6xNr4kMB1HGuWQ 提取码:psvm

工具类

代码语言:javascript复制
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using O2S.Components.PDFRender4NET;

namespace Z.Tools
{
    public class PdfHelper
    {
        public enum Definition
        {
            Low = 2,
            Standard = 3,
            High = 5,
            SuperHigh = 8,
            UltraHigh = 10
        }

        /// <summary>
        /// 获取PDF的页数
        /// </summary>
        /// <param name="pdfInputPath"></param>
        /// <returns></returns>
        public static int GetPageNum(string pdfInputPath)
        {
            PDFFile pdfFile = PDFFile.Open(pdfInputPath);
            return pdfFile.PageCount;
        }

        /// <summary>
        /// 将PDF文档转换为图片的方法
        /// </summary>
        /// <param name="pdfInputPath">PDF文件路径</param>
        /// <param name="imageOutputPath">图片输出路径</param>
        /// <param name="startPageNum">从PDF文档的第几页开始转换</param>
        /// <param name="endPageNum">从PDF文档的第几页开始停止转换</param>
        /// <param name="imageFormat">设置所需图片格式</param>
        /// <param name="definition">设置图片的清晰度,数字越大越清晰</param>
        public static List<string> ConvertPdfToImage(
            string pdfInputPath,
            string imageOutputPath,
            int startPageNum = 1,
            int endPageNum = int.MaxValue,
            ImageFormat imageFormat = null,
            Definition definition = Definition.Standard
        )
        {
            List<string> imgList = new List<string>();
            string imageName = "pdfimg";
            if (imageFormat == null)
            {
                imageFormat = ImageFormat.Jpeg;
            }

            PDFFile pdfFile = PDFFile.Open(pdfInputPath);
            if (!Directory.Exists(imageOutputPath))
            {
                Directory.CreateDirectory(imageOutputPath);
            }

            if (startPageNum <= 0)
            {
                startPageNum = 1;
            }

            if (endPageNum > pdfFile.PageCount)
            {
                endPageNum = pdfFile.PageCount;
            }

            if (startPageNum > endPageNum)
            {
                startPageNum = endPageNum;
                endPageNum = startPageNum;
            }

            for (int i = startPageNum; i <= endPageNum; i  )
            {
                Bitmap pageImage = pdfFile.GetPageImage(i - 1, 56 * (int)definition);

                string filePath = imageOutputPath   imageName   i   "."   imageFormat;
                imgList.Add(filePath);
                pageImage.Save(filePath, imageFormat);
                pageImage.Dispose();
            }

            pdfFile.Dispose();
            return imgList;
        }
    }
}

调用

代码语言:javascript复制
var pageNum = PdfHelper.GetPageNum(@"D:ToolsDocTest水印.pdf");
Console.WriteLine($@"文档页数:{pageNum}");

var imgList = PdfHelper.ConvertPdfToImage(@"D:ToolsDocTest水印.pdf", @"D:ToolsDocTestPic");
Console.WriteLine(string.Join("n", imgList));

Word转图片

代码语言:javascript复制
Install-Package Aspose.Words -Version 19.10.0

调用前设置授权

代码语言:javascript复制
new License()
.SetLicense(
        new MemoryStream(
            Convert.FromBase64String( "PExpY2Vuc2U CiAgPERhdGE CiAgICA8TGljZW5zZWRUbz5TdXpob3UgQXVuYm94IFNvZnR3YXJlIENvLiwgTHRkLjwvTGljZW5zZWRUbz4KICAgIDxFbWFpbFRvPnNhbGVzQGF1bnRlYy5jb208L0VtYWlsVG8 CiAgICA8TGljZW5zZVR5cGU RGV2ZWxvcGVyIE9FTTwvTGljZW5zZVR5cGU CiAgICA8TGljZW5zZU5vdGU TGltaXRlZCB0byAxIGRldmVsb3BlciwgdW5saW1pdGVkIHBoeXNpY2FsIGxvY2F0aW9uczwvTGljZW5zZU5vdGU CiAgICA8T3JkZXJJRD4xOTA4MjYwODA3NTM8L09yZGVySUQ CiAgICA8VXNlcklEPjEzNDk3NjAwNjwvVXNlcklEPgogICAgPE9FTT5UaGlzIGlzIGEgcmVkaXN0cmlidXRhYmxlIGxpY2Vuc2U8L09FTT4KICAgIDxQcm9kdWN0cz4KICAgICAgPFByb2R1Y3Q QXNwb3NlLlRvdGFsIGZvciAuTkVUPC9Qcm9kdWN0PgogICAgPC9Qcm9kdWN0cz4KICAgIDxFZGl0aW9uVHlwZT5FbnRlcnByaXNlPC9FZGl0aW9uVHlwZT4KICAgIDxTZXJpYWxOdW1iZXI M2U0NGRlMzAtZmNkMi00MTA2LWIzNWQtNDZjNmEzNzE1ZmMyPC9TZXJpYWxOdW1iZXI CiAgICA8U3Vic2NyaXB0aW9uRXhwaXJ5PjIwMjAwODI3PC9TdWJzY3JpcHRpb25FeHBpcnk CiAgICA8TGljZW5zZVZlcnNpb24 My4wPC9MaWNlbnNlVmVyc2lvbj4KICAgIDxMaWNlbnNlSW5zdHJ1Y3Rpb25zPmh0dHBzOi8vcHVyY2hhc2UuYXNwb3NlLmNvbS9wb2xpY2llcy91c2UtbGljZW5zZTwvTGljZW5zZUluc3RydWN0aW9ucz4KICA8L0RhdGE CiAgPFNpZ25hdHVyZT53UGJtNUt3ZTYvRFZXWFNIY1o4d2FiVEFQQXlSR0pEOGI3L00zVkV4YWZpQnd5U2h3YWtrNGI5N2c2eGtnTjhtbUFGY3J0c0cwd1ZDcnp6MytVYk9iQjRYUndTZWxsTFdXeXNDL0haTDNpN01SMC9jZUFxaVZFOU0rWndOQkR4RnlRbE9uYTFQajhQMzhzR1grQ3ZsemJLZFZPZXk1S3A2dDN5c0dqYWtaL1E9PC9TaWduYXR1cmU CjwvTGljZW5zZT4=")
        )
    );

调用

代码语言:javascript复制
using System;
using System.Collections.Generic;
using System.Drawing.Imaging;
using System.IO;
using Aspose.Words;
using Aspose.Words.Saving;

namespace Z.Utils
{
    public class Word2Img
    {
        /// <summary>
        /// 将Word文档转换为图片的方法(该方法基于第三方DLL),你可以像这样调用该方法: ConvertPDF2Image("F:\PdfFile.doc", "F:\",
        /// "ImageFile", 1, 20, ImageFormat.Png, 256);
        /// </summary>
        /// <param name="wordInputPath"></param>
        /// <param name="imageOutputPath">
        /// 图片输出路径,如果为空,默认值为Word所在路径
        /// </param>
        /// <param name="imageName">
        /// 图片的名字,不需要带扩展名,如果为空,默认值为Word的名称
        /// </param>
        /// <param name="startPageNum">
        /// 从PDF文档的第几页开始转换,如果为0,默认值为1
        /// </param>
        /// <param name="endPageNum">
        /// 从PDF文档的第几页开始停止转换,如果为0,默认值为Word总页数
        /// </param>
        /// <param name="imageFormat">
        /// 设置所需图片格式,如果为null,默认格式为PNG
        /// </param>
        /// <param name="resolution">
        /// 设置图片的像素,数字越大越清晰,如果为0,默认值为128,建议最大值不要超过1024
        /// </param>
        public static List<string> ConvertWordToImage(
            string wordInputPath,
            string imageOutputPath,
            string imageName,
            int startPageNum,
            int endPageNum,
            ImageFormat imageFormat,
            float resolution
        )
        {
            int num = 1;
            // 返回的图片绝对路径集合
            List<string> images = new List<string>();
            try
            {
                // open word file
                Document doc = new Document(wordInputPath);
                // validate parameter

                if (imageOutputPath.Trim().Length == 0) { imageOutputPath = Path.GetDirectoryName(wordInputPath); }

                if (!Directory.Exists(imageOutputPath))
                {
                    if (imageOutputPath != null)
                    {
                        Directory.CreateDirectory(imageOutputPath);
                    }
                }

                if (imageName.Trim().Length == 0)
                {
                    string uuid = Guid.NewGuid().ToString("N");
                    imageName = uuid;
                }

                if (startPageNum <= 0) { startPageNum = 1; }

                if (endPageNum > doc.PageCount || endPageNum <= 0) { endPageNum = doc.PageCount; }

                if (startPageNum > endPageNum)
                {
                    startPageNum = endPageNum;
                    endPageNum = startPageNum;
                }

                imageFormat ??= ImageFormat.Png;

                if (resolution <= 0) { resolution = 128; }

                ImageSaveOptions imageSaveOptions = new ImageSaveOptions(GetSaveFormat(imageFormat))
                {
                    Resolution = resolution
                };

                // start to convert each page
                for (int i = startPageNum; i <= endPageNum; i  )
                {
                    imageSaveOptions.PageIndex = i - 1;
                    if (imageOutputPath != null)
                    {
                        doc.Save(
                            Path.Combine(imageOutputPath, imageName)   "_"   num.ToString()   "."  
                            imageFormat,
                            imageSaveOptions);
                        images.Add(Path.Combine(imageOutputPath, imageName)   "_"   num.ToString()   "."  
                                   imageFormat);
                    }

                    num  ;
                }
            }
            catch (Exception ex)
            {
                throw new Exception("The document appears to be corrupted and cannot be loaded.".Equals(ex.Message)
                    ? "文件似乎已损坏,无法加载。"
                    : "文件被占用请关闭后重新导入");
            }

            return images;
        }

        private static SaveFormat GetSaveFormat(ImageFormat imageFormat)
        {
            SaveFormat sf; // = SaveFormat.Unknown;
            if (imageFormat.Equals(ImageFormat.Png))
            {
                sf = SaveFormat.Png;
            }
            else if (imageFormat.Equals(ImageFormat.Jpeg))
            {
                sf = SaveFormat.Jpeg;
            }
            else if (imageFormat.Equals(ImageFormat.Tiff))
            {
                sf = SaveFormat.Tiff;
            }
            else if (imageFormat.Equals(ImageFormat.Bmp))
            {
                sf = SaveFormat.Bmp;
            }
            else
            {
                sf = SaveFormat.Unknown;
            }

            return sf;
        }
    }
}

使用程序Poppler

https://blog.alivate.com.au/poppler-windows/

转换PDF为图片

代码语言:javascript复制
pdftoppm.exe -jpeg "D:ToolsDocTest水印.pdf" D:ToolsDocTestPic

这种方式转换的速度快,并且相同质量的前提下,文件比较小。

提取PDF中的图片

代码语言:javascript复制
pdfimages.exe -j -p "D:ToolsDocTest水印.pdf" D:ToolsDocTestPic

使用程序Ghostscript

https://www.ghostscript.com/

这种方式效果较好,就是引用的exe和dll会增加将近11M。

gs.exe的同级目录下运行下面的命令

获取页数

代码语言:javascript复制
./gs -q -dNODISPLAY -c "(D:/Project/Node/Pdf2PngforWindows/doc/1.pdf) (r) file runpdfbegin pdfpagecount = quit"

转换某页

代码语言:javascript复制
./gs -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -sDEVICE=png16m -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r100 -dFirstPage=1 -dLastPage=1 -sOutputFile=C:UsersADMINI~1AppDataLocalTemptmp-8468uujQmrsdRJ21.png "D:ProjectNodePdf2PngforWindows/doc/1.pdf"

我们就可以根据页数进行逐页转换。

相关参数如下说明:

  • -dQUIET, 安静的意思,指代执行过程中尽可能少的输出日志等信息。(也可以简写为-q
  • -dNOSAFER, 通过命令行运行
  • -dBATCH, 执行到最后一页后退出
  • -dNOPAUSE, 每一页转换之间没有停顿
  • -dNOPROMPT, 没有相关提示
  • -dFirstPage=1, 从第几页开始
  • -dLastPage=5, 到第几页结束
  • -sDEVICE=png16m, 转换输出的文件类型装置,默认值为x11alpha
  • -g720x1280, 图片像素(-g<width>x<height>),一般不指定,使用默认输出
  • -r300, 图片分辨率(即图片解析度为300dpi),默认值好像是72
  • -sOutputFile=/opt/shanhy/error1png/%d.png, 图片输出路径,使用%d%ld输出页数

0 人点赞