items.py:
代码语言:javascript复制fishPicId = scrapy.Field() #图片url地址。要是个列表
image_path = scrapy.Field() #->>保存img绝对路径。
spider.py:
代码语言:javascript复制item['fishPicId'] = [img_src] # ImagesPipeline用到的是图片的url列表
settings.py:
代码语言:javascript复制ITEM_PIPELINES = {
'LXSpider.pipelines.LxspiderPipeline': 300,
'LXSpider.pipelines.DownloadImagesPipeline': 100
}
import os
IMAGES_URLS_FIELD ="fishPicId" #fishPicId:在items.py中配置的爬取得图片地址
project_dir = os.path.abspath(os.path.dirname(__file__)) #获取当前爬虫项目的绝对路径
IMAGES_STORE = os.path.join(project_dir,'images') #组装新的图片路径,设置图片存储目录
# IMAGES_MIN_HEIGHT = 1 #设定下载图片的最小高度
# IMAGES_MIN_WIDTH = 1 #设定下载图片的最小宽度
pipelines.py:
代码语言:javascript复制'''保存图片绝对路径'''
from LXSpider.images.full import img_abspath
from scrapy.pipelines.images import ImagesPipeline
class DownloadImagesPipeline(ImagesPipeline):
def item_completed(self, results, item, info):
for ok, value in results: # 通过断点可以看到图片路径存在results内
image_file_path = img_abspath str(value['path']).replace('/','\') # 将路径保存在item中返回
item['image_path']=image_file_path
return item