【教程】多进程下载百度旋转验证码图片-制作数据集

2023-11-09 10:13:51 浏览数 (1)

转载请注明出处:小锋学长生活大爆炸[xfxuezhang.cn] 数据集制作辅助工具:【工具】旋转图片-数据集制作工具, 开源!

效果展示:

直接上代码,开箱即用(当然selenium库自己装一下):

代码语言:javascript复制
import os
import time
import requests
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver import ActionChains
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver 
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from multiprocessing import Process


# 根据链接下载旋转图片
def get_img(url):
    header = {
        "Host": "passport.baidu.com",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0",
        "Accept": "image/avif,image/webp,*/*",
        "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
        "Accept-Encoding": "gzip, deflate, br",
        "Referer": "https://wappass.baidu.com/",
        "Connection": "keep-alive",
        "Cookie": 'Hm_lvt_3eecc7feff77952670b7c24e952e8773=1666849322,1666919008,1666961940,1667175865; Hm_lpvt_3eecc7feff77952670b7c24e952e8773=1667186488; token="MTY2NzE4NzczNS4yMTEzMjg1OmQwNDNhNmZiZTA4MjlmOGY1YjE0MjA0NmViN2M1NTdkM2MyYWY3NzE="; sessionid=aa6zibdmfbs5cwzh6x62niw7fbqe5pon',
        "Sec-Fetch-Dest": "image",
        "Sec-Fetch-Mode": "no-cors",
        "Sec-Fetch-Site": "same-site",
        "Pragma": "no-cache",
        "Cache-Control": "no-cache",
    }
    response = requests.get(url=url,headers=header)
 
    if response.status_code == 200:
        with open("images/" str(int(time.time())) ".jpg", 'wb') as f:
            f.write(response.content)

def main():
    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install())) 
    driver.implicitly_wait(5)

    while True:
        # 访问百度首页
        driver.get('https://wappass.baidu.com/static/captcha/tuxing.html?&ak=c27bbc89afca0463650ac9bde68ebe06&backurl=https://www.baidu.com/s?cl=3&tn=baidutop10&fr=top1000&wd=%E6%B6%88%E9%98%B2%E6%88%98%E5%A3%AB%E8%BF%9E%E5%A4%9C%E7%AD%91%E5%9D%9D%E5%BA%94%E5%AF%B9%E6%B4%AA%E5%B3%B0%E8%BF%87%E5%A2%83&rsv_idx=2&rsv_dl=fyb_n_homepage&hisfilter=1&logid=8309940529500911554&signature=4bce59041938b160b7c24423bde0b518&timestamp=1624535702')
        # 等待滑块出现
        WebDriverWait(driver, 10).until(lambda x: x.find_element(By.XPATH, value='//div[@class="passMod_slide-btn "]'))
        time.sleep(1)
        # 等待验证码出现
        WebDriverWait(driver, 10).until(lambda x: x.find_element(By.XPATH, value='//img[@class="passMod_spin-background"]'))
        img_src = driver.find_element(By.XPATH, value='//img[@class="passMod_spin-background"]').get_attribute('src')
        # 下载图片
        get_img(img_src)


if __name__ == '__main__':
    # 多进程下载百度旋转验证码图片
    if not os.path.exists('images'):
        os.mkdir('images')
    for i in range(5):
        print(f'进程{i}启动')
        p = Process(target=main, name=f"work_{i}")
        p.start()

0 人点赞