python笔记:用Python实现简单的爬虫

2019-11-22 11:45:18 浏览数 (1)

示例

做了一个简单的爬虫。使用python3。 涉及到代理的使用。关闭ssl验证。返回json的处理。 功能:用来查火车票。

代码语言:javascript复制
import urllib.request
import json
import codecs
import time,datetime
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

def GetInfo():
    while True:
        try:
            proxy_handler = urllib.request.ProxyHandler({'https': 'http://y003460:password@172.18.32.221:8080'})
            opener = urllib.request.build_opener(proxy_handler)
            urllib.request.install_opener(opener)

            resp=urllib.request
            .urlopen('https://kyfw.12306.cn/otn/leftTicket/queryT?'
                     'leftTicketDTO.train_date=2016-10-01'
                     '&leftTicketDTO.from_station=SZQ&leftTicketDTO.to_station=LDQ&'
                     'purpose_codes=ADULT',timeout=8)

            reader = codecs.getreader("utf-8")
            train_result = json.load(reader(resp))

            # print(train_result)
            train_datas = train_result['data']
            for item in train_datas:
                train_single_data = item['queryLeftNewDTO']
                print(train_single_data['station_train_code'],"二等",train_single_data['ze_num'])
                if train_single_data['ze_num'] != "无" and train_single_data['ze_num'] != "-":
                    return
            nowtime = datetime.datetime.now()
            print(nowtime.strftime("%Y-%m-%d %H:%M:%S-%f"))
            time.sleep(8)
        except Exception as errors:
            print("一个错误",errors)

GetInfo()
print("找到了")

技术

获取网页

py2

代码语言:javascript复制
proxy_handler = urllib2.ProxyHandler({})
opener = urllib2.build_opener(proxy_handler)
urllib2.install_opener(opener)

# download text
req = URL.format(args[1])
res_data = urllib2.urlopen(req)
res = res_data.read()
res = res.decode("utf-8")

py3

代码语言:javascript复制
proxy_handler = urllib.request.ProxyHandler({})
opener = urllib.request.build_opener(proxy_handler)
urllib.request.install_opener(opener)
# download text
resp = urllib.request.urlopen(URL.format(args[1]))
reader = codecs.getreader("utf-8")
res = reader(resp).read()

0 人点赞