scrapy爬取搜狗图片

2020-03-12 16:18:46 浏览数 (1)

代码语言:javascript复制
# -*- coding: utf-8 -*-
from urllib.parse import urlencode
import json
import scrapy
import os
import re
import urllib.request

class SougouimgSpider(scrapy.Spider):
    name = 'sougouimg'
    allowed_domains = ['pic.sogou.com']
    start_urls = ['https://pic.sogou.com/']

    def parse(self, response):
        page = 1
        endpage = 5     # 终点页
        keywords = r'哆啦A梦'
        for page in range(1,endpage):
            yield scrapy.Request(self.geturl(keywords,page), callback=self.sougou)



    def sougou(self,response):
        # 获取get参数
        # print(response.text)
        data = response.text
        js = json.loads(data)
        for list in js['items']:
            img_url = list['pic_url']
            self.savve(img_url)

    def geturl(self, keywords, page):     # 传入关键字,页码
        param = {
            'query': keywords,
            'mode': '1',
            'start': page*48,
            'reqType': 'ajax',
            'reqFrom': 'result',
            'tn': '0'
        }

        ps = urlencode(param)
        url = 'https://pic.sogou.com/pics?'   ps
        return url

    def savve(self,img_url):
        path = os.path.dirname(os.path.abspath(__file__)) "\搜狗图片"

        dir = os.path.exists(path)

        if not dir:
            os.makedirs(path)
        reg = re.compile('[^/] $')
        # 保存图片
        title= reg.findall(img_url)[0]
        sougou = path   "\"   title
        try:
            urllib.request.urlretrieve(img_url, sougou)
        except Exception as e:
            print(title "下载失败")
        finally:
            print(title "下载完毕")

by浅枫沐雪

0 人点赞