Python小工具 | 如何自动下载、压缩并批量替换文章中的外链图片

2022-09-02 08:44:57 浏览数 (1)

由于前段时间JsdelivrCDN加速的崩盘,博主存在Github上的图片全部访问失败,文章阅读体验极差,于是2022年以后的文章全部采用npm做图床,在此之前的文章由于数量过多并没有替换。再加上之前一直忙着研一阶段的期末考,懒得动博客。幸好这段时间有空,于是写了个python小工具来替换博客中外链图片,具体流程如下:

  • 首先,遍历hexosource_posts文件夹中的全部文章,把里面含有https://cdn.jsdelivr.net/gh的图片全部下载下来,并替换链接为npm的CDN链接。
  • 其次,对下载下来的图片做简单的压缩。
  • 最后,把压缩后的图片上传npm。

总体流程非常简单,下面是具体实现。

1. 下载并替换图片外链

代码语言:javascript复制
import os
import os.path as osp
import shutil as sh
import  re
from tqdm.auto import tqdm
import requests

def makedir(root):
    if not osp.exists(root):
        os.mkdir(root)  

def save_img(root,img_url):
    path=root img_url.split('/')[-1]
    try:
        if not osp.exists(path):
            s = requests.session()
            s.keep_alive = False # 关闭多余连接
            r = s.get(img_url) # 你需要的网址
            #r=requests.get(img_url)
            with open(path,'wb') as f:
                f.write(r.content)
                f.close()
        else:
            print(path "文件已存在!")
            return 0
    except Exception as e:
        print(img_url ", 爬取失败!")
        return 1
    print(img_url "已下载")
    return 0

def findimg(line,ori_pre):
    try:
        img = re.findall(f"({ori_pre}.*?(.jfif|.svg|.webp|.gif|.jpeg|.jpg|.png|.PNG|.JPEG|.JPG))",line)[0][0]
        name = img.split('/')[-1]
    except Exception as e:
        print("已自动忽略:",line)
        img,name = None,None
    return img,name

def changeurl(ori_root,save_root,down_root,ori_pre,new_pre):
    assert ori_pre[-1]=='/' and new_pre[-1]=="/"
    if osp.isdir(ori_root):
        files = os.listdir(ori_root)
    else:
        file = osp.basename(ori_root)
        ori_root = ori_root.split(file)[0]
        files = [file]
    makedir(save_root)
    makedir(down_root)
    for file in tqdm(files):
        print("Starting... ",file)
        with open(ori_root file,'r',encoding = 'utf-8') as f:
            content = f.readlines()
        with open(save_root file,'w',encoding='utf-8') as f:
            for line in content:
                if ori_pre in line:
                    img,name = findimg(line,ori_pre)
                    if img!=None:
                        change = new_pre   name
                        print(line,"==>",line.replace(img,change))
                        code = save_img(down_root,img)
                        if code == 0:
                            line = line.replace(img,change)
                f.write(line)


def main():
    ori_root = './hexo/source/_posts/'
    save_root = './markdown/'
    down_root = './download/'
    ori_pre = 'https://cdn.jsdelivr.net/gh/'
    new_pre = 'https://npm.elemecdn.com/justlovesmile-post@1.0.3/'
    changeurl(ori_root,save_root,down_root,ori_pre,new_pre)
    
if __name__ == "__main__":
    main()

2. 压缩图片

由第一步已经下载好图片了,这一步需要对图片进行简单的压缩,这里我采用最简单的图片缩放,通过缩小图片来压缩(有损),需要无损压缩的可以用软件或者其他方法(百度、CSDN)。

代码语言:javascript复制
from PIL import Image
from glob import glob
import os
from tqdm import tqdm
import shutil
import sys
from itertools import chain
 
from multiprocessing import Pool
 
# image_dir = "image_dir"
template_dir = 'template'
output_dir = 'output'
error_dir = 'error'
 
 
def clean_dir(dir_name):
    if os.path.exists(dir_name):
        shutil.rmtree(dir_name)
        os.makedirs(dir_name)
    else:
        os.makedirs(dir_name)
 
 
# image_file_list = glob(f"{image_dir}/*")
# image_file_list
 
 
def imagesize(filepath):
    """
    获得文件的磁盘大小
    :param filepath:
    :return:
    """
    return os.path.getsize(filepath) / 1024
 
 
def compress_image(image_path, target_size=500):
    raw_image = Image.open(image_path)
    temp_image_name = image_path.split(os.sep)[-1]
    template_image = os.path.join(template_dir, temp_image_name)
    output_image = os.path.join(output_dir, temp_image_name)
    error_image = os.path.join(error_dir, temp_image_name)
 
    target_size = target_size  # kb
 
    try:
 
        if imagesize(image_path) < target_size:
            shutil.copyfile(image_path, output_image)
        else:
            width, height = raw_image.size
            raw_image.resize((int(width * 0.9), int(height * 0.9)), Image.ANTIALIAS).save(template_image)
            while imagesize(template_image) > target_size:
                template_iamge2 = Image.open(template_image)
                width_2, height_2 = template_iamge2.size
                template_iamge2.resize((int(width_2 * 0.9), int(height_2 * 0.9)), Image.ANTIALIAS).save(template_image)
 
            shutil.copyfile(template_image, output_image)
    except Exception as e:
        shutil.copyfile(image_path, error_image)
        print(f'文件保存失败: {image_path}')
        # print(e)
 
 
if __name__ == '__main__':
    # 批量创建文件夹
    [clean_dir(i) for i in [template_dir, output_dir, error_dir]]
 
    image_dir = input('dir path:')
    target_size = int(input('target size (kb):'))
 
    image_file_list = list(chain(*[glob(os.path.join(image_dir, i)) for i in ['*.png', '*.jpg', '*.jpeg']]))
 
    #for temp_image_path in tqdm(image_file_list):
    #     compress_image(temp_image_path)
 
    print(f'nn文件保存父目录: {os.getcwd()}n'
          f'输出文件位置:{os.path.join(os.getcwd(), output_dir)}nn')
 
    # parallel
    P = Pool(processes=10)
    pbar = tqdm(total=len(image_file_list))
 
    res_temp = [P.apply_async(func=compress_image, args=(i,target_size), callback=lambda _: pbar.update(1)) for i in
                image_file_list]
 
    _ = [res.get() for res in res_temp]

3. 上传npm

到这一步就很简单了,把之前压缩好的图片放在一个文件夹里,如:

代码语言:javascript复制
- mj-img
    - 1.jpg
    - 2.png
      ...
    - xxx.jpeg

之前用过npm的,可以直接在文件夹里打开bash,输入npm initnpm publish.

没有使用过npm的,可以参考Akilar的npm图床使用技巧.

0 人点赞