一.访问页面
import webbrowserwebbrowser.open('http://www.baidu.com/')pip3 install requestsimport requestsres = requests.get('http://www.gutenberg.org/cache/epub/1112/pg1112.txt')res.status_code == requests.codes.ok #返回真假len(res.text)#变量保存print(res.text[:250])res.raise_for_status()#下载出错抛出异常,成功则不返回playFile = open('a.txt','wb')#写入二进制文件,保存Unicode编码for chunk in res.iter_content(100000):#指定字节数playFile.write(chunk)playFile.close()pip3 install sqlalchemyimport sqlalchemy as saconn = sa.create_engine('sqlite://')meta = sa.MetaData()zoo = sa.Table('zoo', meta,sa.Column('critter', sa.String, primary_key=True),sa.Column('count', sa.Integer),sa.Column('damages', sa.Float)-
) meta.create_all(conn)conn.execute(zoo.insert(('bear',2,1000.0)))conn.execute(zoo.insert(('weasel',1,2000.0)))result = conn.execute(zoo.select())#类似select *rows = result.fetchall()print(rows)#webimport urllib.request as ururl ='http://www.iheartquotes.com/api/v1/random'conn = ur.urlopen(url)print(conn)data = conn.read()#获取网页数据print(data)conn.status #状态码print(conn.getheader('Content-Type'))#数据格式for key, value in conn.getheaders():#查看所有http头-
print(key, value) pip3 install requestsimport requestsurl ='http://www.iheartquotes.com/api/v1/random'resp = requests.get(url)resp<Response[200]>print(resp.text)
二.页面过滤
pip3 install beautifulsoup4import requests,bs4res = requests.get('http://nostarch.com')res.raise_for_status()noStarchSoup = bs4.BeautifulSoup(res.text)exampleFile = open('example.html')exampleSoup = bs4.BeautifulSoup(exampleFile)soup.select('p #author')soup.select('p')[0]#只取第一个放里面xx.get('id')#返回id的值


