文章背景:为了文件传输的方便,有时需要对pdf文件进行压缩。针对pdf压缩,一种方法是借助PYMUPDF
第三方库函数进行图片提取和压缩,最后合并生成一个新的PDF。该方法并非对所有pdf文件都适用,且压缩后的pdf画质可能会变差。
本文借助Ghostscript
对pdf文件进行压缩。因此,电脑上需要事先安装Ghostscript软件。
Ghostscript是一套建基于Adobe、PostScript及可移植文档格式(PDF)的页面描述语言等而编译成的自由软件。许可协议有两个:GNU通用公共许可协议(免费,GPL Ghostscript), Aladdin Free Public License(收费)。
代码1:(pdf_compressor.py
)
#!/usr/bin/env python3
# Author: Theeko74
# Contributor(s): skjerns
# Oct, 2021
# MIT license -- free to use as you want, cheers.
"""
Simple python wrapper script to use ghoscript function to compress PDF files.
Compression levels:
0: default
1: prepress
2: printer
3: ebook
4: screen
Dependency: Ghostscript.
On MacOSX install via command line `brew install ghostscript`.
"""
import argparse
import subprocess
import os.path
import sys
import shutil
def compress(input_file_path, output_file_path, power=0):
"""Function to compress PDF via Ghostscript command line interface"""
quality = {
0: '/default',
1: '/prepress',
2: '/printer',
3: '/ebook',
4: '/screen'
}
# Basic controls
# Check if valid path
if not os.path.isfile(input_file_path):
print("Error: invalid path for input PDF file")
sys.exit(1)
# Check if file is a PDF by extension
if input_file_path.split('.')[-1].lower() != 'pdf':
print("Error: input file is not a PDF")
sys.exit(1)
gs = get_ghostscript_path()
print("Compress PDF...")
initial_size = os.path.getsize(input_file_path)
subprocess.call([gs, '-sDEVICE=pdfwrite', '-dCompatibilityLevel=1.4',
'-dPDFSETTINGS={}'.format(quality[power]),
'-dNOPAUSE', '-dQUIET', '-dBATCH',
'-sOutputFile={}'.format(output_file_path),
input_file_path]
)
final_size = os.path.getsize(output_file_path)
ratio = 1 - (final_size / initial_size)
print("Compression by {0:.0%}.".format(ratio))
print("Final file size is {0:.1f}MB".format(final_size / 1024/1024))
print("Done.")
def get_ghostscript_path():
gs_names = ['gs', 'gswin32', 'gswin64']
for name in gs_names:
if shutil.which(name):
return shutil.which(name)
raise FileNotFoundError(f'No GhostScript executable was found on path ({"/".join(gs_names)})')
def main():
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('input', help='Relative or absolute path of the input PDF file')
parser.add_argument('-o', '--out', help='Relative or absolute path of the output PDF file')
parser.add_argument('-c', '--compress', type=int, help='Compression level from 0 to 4')
parser.add_argument('-b', '--backup', action='store_true', help="Backup the old PDF file")
parser.add_argument('--open', action='store_true', default=False,
help='Open PDF after compression')
args = parser.parse_args()
# In case no compression level is specified, default is 2 '/ printer'
if not args.compress:
args.compress = 2
# In case no output file is specified, store in temp file
if not args.out:
args.out = 'temp.pdf'
# Run
compress(args.input, args.out, power=args.compress)
# In case no output file is specified, erase original file
if args.out == 'temp.pdf':
if args.backup:
shutil.copyfile(args.input, args.input.replace(".pdf", "_BACKUP.pdf"))
shutil.copyfile(args.out, args.input)
os.remove(args.out)
# In case we want to open the file after compression
if args.open:
if args.out == 'temp.pdf' and args.backup:
subprocess.call(['open', args.input])
else:
subprocess.call(['open', args.out])
if __name__ == '__main__':
main()
代码2:(compressPDF_tkinter.py
)
# -*- coding: UTF-8 -*-
# Tkinter界面,压缩PDF文件
from tkinter import *
import os
from tkinter import filedialog
from tkinter import messagebox
from pdf_compressor import compress
def getPDF():
#通过文件对话框,获取文件路径
file_path = filedialog.askopenfilename()
FilePath_result.delete(0,END)
if file_path !="":
FilePath_result.insert(0,file_path)
else:
messagebox.showinfo("提示","未选中pdf文件!")
def comPDF(PDFpath):
#压缩pdf文件
if PDFpath == "":
messagebox.showinfo("提示","文件路径为空!")
elif not os.path.exists(PDFpath):
messagebox.showinfo("提示","该路径不存在,请确认!")
elif PDFpath[-3:] == 'pdf':
#文件大小
content = int(os.path.getsize(PDFpath)/1024) #kb
Size_rusult.config(text = str(content))
#压缩后的文件路径
new_path = os.path.join(os.getcwd(),"compressed.pdf")
if os.path.exists(new_path):
messagebox.showinfo("提示","该路径已存在,请确认!n" new_path)
else:
# 压缩文件
compress(PDFpath, new_path, power=0)
content_compressed = int(os.path.getsize(new_path)/1024) #kb
Size_comp_result.config(text = str(content_compressed))
path_comp_result.config(text = new_path)
messagebox.showinfo("提示","Done!")
else:
messagebox.showinfo("提示","不是pdf文件,请确认!")
root = Tk()
root.title("pdf compressor")
root.columnconfigure(1,weight=1)
btn1 = Button(root, text="获取待压缩的pdf文件",command=getPDF,bg="AliceBlue")
btn2 = Button(root, text="压缩文件",command=lambda: comPDF(FilePath_result.get()),bg="Beige")
FilePath = Label(root,text="压缩前的文件路径",relief = "raised")
FilePath_result = Entry(root,width=50)
Size = Label(root,text="PDF文件大小(kb)",relief = "raised")
Size_rusult = Label(root,text="",relief = "raised")
Size_comp = Label(root,text="压缩后的文件大小(kb)",relief = "raised")
Size_comp_result = Label(root,text="",relief = "raised")
path_comp = Label(root,text="压缩后文件的路径:",relief = "raised")
path_comp_result = Label(root,text="",relief = "raised")
# GUI界面
btn1.grid(row=0,column=0,padx=5,pady=5,stick=W E)
btn2.grid(row=0,column=1,padx=5,pady=5,stick=E)
FilePath.grid(row=1,column=0,padx=5,pady=5,stick=W E)
FilePath_result.grid(row=1,column=1,padx=5,stick=W E)
Size.grid(row=2,column=0,padx=5,pady=5,stick=W E)
Size_rusult.grid(row=2,column=1,padx=5,stick=W E)
Size_comp.grid(row=3,column=0,padx=5,pady=5,stick=W E)
Size_comp_result.grid(row=3,column=1,padx=5,stick=W E)
path_comp.grid(row=4,column=0,padx=5,pady=5,stick=W E)
path_comp_result.grid(row=4,column=1,padx=5,stick=W E)
root.mainloop()
运行界面:
效果演示:
http://mpvideo.qpic.cn/0b2e5aaagaaaiuadzxx6urqvb2gdapuaaaya.f10002.mp4?dis_k=a1fc7ee8875901f98cd70b5773b566c8&dis_t=1663653776&vid=wxv_2255866590980374534&format_id=10002&support_redirect=0&mmversion=false
参考资料:
[1] python实现PDF压缩(https://blog.csdn.net/qq_40507857/article/details/116501856)
[2] Ghostscript 9.55.0 for Windows (64 bit)(https://www.ghostscript.com/releases/gsdnld.html)
[3] Ghostscript(https://zh.wikipedia.org/wiki/Ghostscript)
[4] windows下ghostscript (gs)安装(https://blog.csdn.net/jasmine______001/article/details/105433002)
[5] Pdfc -- PDF Compressor(https://github.com/theeko74/pdfc)
[6] Python GUI设计:tkinter菜鸟编程(https://item.jd.com/12667860.html)