scrapy startproject todo scrapy genspider -t basic todolist 192.168.126.181 cd todo vi items.py import scrapy
class TodoItem(scrapy.Item): todo=scrapy.Field() riqi=scrapy.Field()
vi todolist.py
-- coding: utf-8 --
import scrapy from todo.items import TodoItem
class TodolistSpider(scrapy.Spider): name = 'todolist' allowed_domains = ['192.168.126.181'] start_urls = ['http://192.168.126.181:9999/simpletodo']
代码语言:javascript复制def parse(self, response):
items=[]
trs=response.xpath('//table//tr')
for tr in trs:
todos=tr.xpath('.//td[@class="todo"]')
if len(todos)>0:
item=TodoItem()
item["todo"]=todos[0].xpath("./text()").extract()[0]
item["riqi"]=todos[1].xpath("./text()").extract()[0]
items.append(item)
return items
scrapy crawl todolist