示例
做了一个简单的爬虫。使用python3。 涉及到代理的使用。关闭ssl验证。返回json的处理。 功能:用来查火车票。
代码语言:javascript复制import urllib.request
import json
import codecs
import time,datetime
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
def GetInfo():
while True:
try:
proxy_handler = urllib.request.ProxyHandler({'https': 'http://y003460:password@172.18.32.221:8080'})
opener = urllib.request.build_opener(proxy_handler)
urllib.request.install_opener(opener)
resp=urllib.request
.urlopen('https://kyfw.12306.cn/otn/leftTicket/queryT?'
'leftTicketDTO.train_date=2016-10-01'
'&leftTicketDTO.from_station=SZQ&leftTicketDTO.to_station=LDQ&'
'purpose_codes=ADULT',timeout=8)
reader = codecs.getreader("utf-8")
train_result = json.load(reader(resp))
# print(train_result)
train_datas = train_result['data']
for item in train_datas:
train_single_data = item['queryLeftNewDTO']
print(train_single_data['station_train_code'],"二等",train_single_data['ze_num'])
if train_single_data['ze_num'] != "无" and train_single_data['ze_num'] != "-":
return
nowtime = datetime.datetime.now()
print(nowtime.strftime("%Y-%m-%d %H:%M:%S-%f"))
time.sleep(8)
except Exception as errors:
print("一个错误",errors)
GetInfo()
print("找到了")
技术
获取网页
py2
代码语言:javascript复制proxy_handler = urllib2.ProxyHandler({})
opener = urllib2.build_opener(proxy_handler)
urllib2.install_opener(opener)
# download text
req = URL.format(args[1])
res_data = urllib2.urlopen(req)
res = res_data.read()
res = res.decode("utf-8")
py3
代码语言:javascript复制proxy_handler = urllib.request.ProxyHandler({})
opener = urllib.request.build_opener(proxy_handler)
urllib.request.install_opener(opener)
# download text
resp = urllib.request.urlopen(URL.format(args[1]))
reader = codecs.getreader("utf-8")
res = reader(resp).read()