使用PDFRender4NET
无水印DLL
链接:https://pan.baidu.com/s/1HILw9Ztl6xNr4kMB1HGuWQ 提取码:psvm
工具类
代码语言:javascript复制using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using O2S.Components.PDFRender4NET;
namespace Z.Tools
{
public class PdfHelper
{
public enum Definition
{
Low = 2,
Standard = 3,
High = 5,
SuperHigh = 8,
UltraHigh = 10
}
/// <summary>
/// 获取PDF的页数
/// </summary>
/// <param name="pdfInputPath"></param>
/// <returns></returns>
public static int GetPageNum(string pdfInputPath)
{
PDFFile pdfFile = PDFFile.Open(pdfInputPath);
return pdfFile.PageCount;
}
/// <summary>
/// 将PDF文档转换为图片的方法
/// </summary>
/// <param name="pdfInputPath">PDF文件路径</param>
/// <param name="imageOutputPath">图片输出路径</param>
/// <param name="startPageNum">从PDF文档的第几页开始转换</param>
/// <param name="endPageNum">从PDF文档的第几页开始停止转换</param>
/// <param name="imageFormat">设置所需图片格式</param>
/// <param name="definition">设置图片的清晰度,数字越大越清晰</param>
public static List<string> ConvertPdfToImage(
string pdfInputPath,
string imageOutputPath,
int startPageNum = 1,
int endPageNum = int.MaxValue,
ImageFormat imageFormat = null,
Definition definition = Definition.Standard
)
{
List<string> imgList = new List<string>();
string imageName = "pdfimg";
if (imageFormat == null)
{
imageFormat = ImageFormat.Jpeg;
}
PDFFile pdfFile = PDFFile.Open(pdfInputPath);
if (!Directory.Exists(imageOutputPath))
{
Directory.CreateDirectory(imageOutputPath);
}
if (startPageNum <= 0)
{
startPageNum = 1;
}
if (endPageNum > pdfFile.PageCount)
{
endPageNum = pdfFile.PageCount;
}
if (startPageNum > endPageNum)
{
startPageNum = endPageNum;
endPageNum = startPageNum;
}
for (int i = startPageNum; i <= endPageNum; i )
{
Bitmap pageImage = pdfFile.GetPageImage(i - 1, 56 * (int)definition);
string filePath = imageOutputPath imageName i "." imageFormat;
imgList.Add(filePath);
pageImage.Save(filePath, imageFormat);
pageImage.Dispose();
}
pdfFile.Dispose();
return imgList;
}
}
}
调用
代码语言:javascript复制var pageNum = PdfHelper.GetPageNum(@"D:ToolsDocTest水印.pdf");
Console.WriteLine($@"文档页数:{pageNum}");
var imgList = PdfHelper.ConvertPdfToImage(@"D:ToolsDocTest水印.pdf", @"D:ToolsDocTestPic");
Console.WriteLine(string.Join("n", imgList));
Word转图片
代码语言:javascript复制Install-Package Aspose.Words -Version 19.10.0
调用前设置授权
代码语言:javascript复制new License()
.SetLicense(
new MemoryStream(
Convert.FromBase64String( "PExpY2Vuc2U CiAgPERhdGE CiAgICA8TGljZW5zZWRUbz5TdXpob3UgQXVuYm94IFNvZnR3YXJlIENvLiwgTHRkLjwvTGljZW5zZWRUbz4KICAgIDxFbWFpbFRvPnNhbGVzQGF1bnRlYy5jb208L0VtYWlsVG8 CiAgICA8TGljZW5zZVR5cGU RGV2ZWxvcGVyIE9FTTwvTGljZW5zZVR5cGU CiAgICA8TGljZW5zZU5vdGU TGltaXRlZCB0byAxIGRldmVsb3BlciwgdW5saW1pdGVkIHBoeXNpY2FsIGxvY2F0aW9uczwvTGljZW5zZU5vdGU CiAgICA8T3JkZXJJRD4xOTA4MjYwODA3NTM8L09yZGVySUQ CiAgICA8VXNlcklEPjEzNDk3NjAwNjwvVXNlcklEPgogICAgPE9FTT5UaGlzIGlzIGEgcmVkaXN0cmlidXRhYmxlIGxpY2Vuc2U8L09FTT4KICAgIDxQcm9kdWN0cz4KICAgICAgPFByb2R1Y3Q QXNwb3NlLlRvdGFsIGZvciAuTkVUPC9Qcm9kdWN0PgogICAgPC9Qcm9kdWN0cz4KICAgIDxFZGl0aW9uVHlwZT5FbnRlcnByaXNlPC9FZGl0aW9uVHlwZT4KICAgIDxTZXJpYWxOdW1iZXI M2U0NGRlMzAtZmNkMi00MTA2LWIzNWQtNDZjNmEzNzE1ZmMyPC9TZXJpYWxOdW1iZXI CiAgICA8U3Vic2NyaXB0aW9uRXhwaXJ5PjIwMjAwODI3PC9TdWJzY3JpcHRpb25FeHBpcnk CiAgICA8TGljZW5zZVZlcnNpb24 My4wPC9MaWNlbnNlVmVyc2lvbj4KICAgIDxMaWNlbnNlSW5zdHJ1Y3Rpb25zPmh0dHBzOi8vcHVyY2hhc2UuYXNwb3NlLmNvbS9wb2xpY2llcy91c2UtbGljZW5zZTwvTGljZW5zZUluc3RydWN0aW9ucz4KICA8L0RhdGE CiAgPFNpZ25hdHVyZT53UGJtNUt3ZTYvRFZXWFNIY1o4d2FiVEFQQXlSR0pEOGI3L00zVkV4YWZpQnd5U2h3YWtrNGI5N2c2eGtnTjhtbUFGY3J0c0cwd1ZDcnp6MytVYk9iQjRYUndTZWxsTFdXeXNDL0haTDNpN01SMC9jZUFxaVZFOU0rWndOQkR4RnlRbE9uYTFQajhQMzhzR1grQ3ZsemJLZFZPZXk1S3A2dDN5c0dqYWtaL1E9PC9TaWduYXR1cmU CjwvTGljZW5zZT4=")
)
);
调用
代码语言:javascript复制using System;
using System.Collections.Generic;
using System.Drawing.Imaging;
using System.IO;
using Aspose.Words;
using Aspose.Words.Saving;
namespace Z.Utils
{
public class Word2Img
{
/// <summary>
/// 将Word文档转换为图片的方法(该方法基于第三方DLL),你可以像这样调用该方法: ConvertPDF2Image("F:\PdfFile.doc", "F:\",
/// "ImageFile", 1, 20, ImageFormat.Png, 256);
/// </summary>
/// <param name="wordInputPath"></param>
/// <param name="imageOutputPath">
/// 图片输出路径,如果为空,默认值为Word所在路径
/// </param>
/// <param name="imageName">
/// 图片的名字,不需要带扩展名,如果为空,默认值为Word的名称
/// </param>
/// <param name="startPageNum">
/// 从PDF文档的第几页开始转换,如果为0,默认值为1
/// </param>
/// <param name="endPageNum">
/// 从PDF文档的第几页开始停止转换,如果为0,默认值为Word总页数
/// </param>
/// <param name="imageFormat">
/// 设置所需图片格式,如果为null,默认格式为PNG
/// </param>
/// <param name="resolution">
/// 设置图片的像素,数字越大越清晰,如果为0,默认值为128,建议最大值不要超过1024
/// </param>
public static List<string> ConvertWordToImage(
string wordInputPath,
string imageOutputPath,
string imageName,
int startPageNum,
int endPageNum,
ImageFormat imageFormat,
float resolution
)
{
int num = 1;
// 返回的图片绝对路径集合
List<string> images = new List<string>();
try
{
// open word file
Document doc = new Document(wordInputPath);
// validate parameter
if (imageOutputPath.Trim().Length == 0) { imageOutputPath = Path.GetDirectoryName(wordInputPath); }
if (!Directory.Exists(imageOutputPath))
{
if (imageOutputPath != null)
{
Directory.CreateDirectory(imageOutputPath);
}
}
if (imageName.Trim().Length == 0)
{
string uuid = Guid.NewGuid().ToString("N");
imageName = uuid;
}
if (startPageNum <= 0) { startPageNum = 1; }
if (endPageNum > doc.PageCount || endPageNum <= 0) { endPageNum = doc.PageCount; }
if (startPageNum > endPageNum)
{
startPageNum = endPageNum;
endPageNum = startPageNum;
}
imageFormat ??= ImageFormat.Png;
if (resolution <= 0) { resolution = 128; }
ImageSaveOptions imageSaveOptions = new ImageSaveOptions(GetSaveFormat(imageFormat))
{
Resolution = resolution
};
// start to convert each page
for (int i = startPageNum; i <= endPageNum; i )
{
imageSaveOptions.PageIndex = i - 1;
if (imageOutputPath != null)
{
doc.Save(
Path.Combine(imageOutputPath, imageName) "_" num.ToString() "."
imageFormat,
imageSaveOptions);
images.Add(Path.Combine(imageOutputPath, imageName) "_" num.ToString() "."
imageFormat);
}
num ;
}
}
catch (Exception ex)
{
throw new Exception("The document appears to be corrupted and cannot be loaded.".Equals(ex.Message)
? "文件似乎已损坏,无法加载。"
: "文件被占用请关闭后重新导入");
}
return images;
}
private static SaveFormat GetSaveFormat(ImageFormat imageFormat)
{
SaveFormat sf; // = SaveFormat.Unknown;
if (imageFormat.Equals(ImageFormat.Png))
{
sf = SaveFormat.Png;
}
else if (imageFormat.Equals(ImageFormat.Jpeg))
{
sf = SaveFormat.Jpeg;
}
else if (imageFormat.Equals(ImageFormat.Tiff))
{
sf = SaveFormat.Tiff;
}
else if (imageFormat.Equals(ImageFormat.Bmp))
{
sf = SaveFormat.Bmp;
}
else
{
sf = SaveFormat.Unknown;
}
return sf;
}
}
}
使用程序Poppler
https://blog.alivate.com.au/poppler-windows/
转换PDF为图片
代码语言:javascript复制pdftoppm.exe -jpeg "D:ToolsDocTest水印.pdf" D:ToolsDocTestPic
这种方式转换的速度快,并且相同质量的前提下,文件比较小。
提取PDF中的图片
代码语言:javascript复制pdfimages.exe -j -p "D:ToolsDocTest水印.pdf" D:ToolsDocTestPic
使用程序Ghostscript
https://www.ghostscript.com/
这种方式效果较好,就是引用的exe和dll会增加将近11M。
在gs.exe
的同级目录下运行下面的命令
获取页数
代码语言:javascript复制./gs -q -dNODISPLAY -c "(D:/Project/Node/Pdf2PngforWindows/doc/1.pdf) (r) file runpdfbegin pdfpagecount = quit"
转换某页
代码语言:javascript复制./gs -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -sDEVICE=png16m -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r100 -dFirstPage=1 -dLastPage=1 -sOutputFile=C:UsersADMINI~1AppDataLocalTemptmp-8468uujQmrsdRJ21.png "D:ProjectNodePdf2PngforWindows/doc/1.pdf"
我们就可以根据页数进行逐页转换。
相关参数如下说明:
-dQUIET
, 安静的意思,指代执行过程中尽可能少的输出日志等信息。(也可以简写为-q
)-dNOSAFER
, 通过命令行运行-dBATCH
, 执行到最后一页后退出-dNOPAUSE
, 每一页转换之间没有停顿-dNOPROMPT
, 没有相关提示-dFirstPage=1
, 从第几页开始-dLastPage=5
, 到第几页结束-sDEVICE=png16m
, 转换输出的文件类型装置,默认值为x11alpha
-g720x1280
, 图片像素(-g<width>x<height>
),一般不指定,使用默认输出-r300
, 图片分辨率(即图片解析度为300dpi),默认值好像是72-sOutputFile=/opt/shanhy/error1png/%d.png
, 图片输出路径,使用%d
或%ld
输出页数