一.访问页面
import webbrowser
webbrowser.open('http://www.baidu.com/')
pip3 install requests
import requests
res = requests.get('http://www.gutenberg.org/cache/epub/1112/pg1112.txt')
res.status_code == requests.codes.ok #返回真假
len(res.text)
#变量保存
print(res.text[:250])
res.raise_for_status()
#下载出错抛出异常,成功则不返回
playFile = open('a.txt',
'wb')
#写入二进制文件,保存Unicode编码
for chunk in res.iter_content(100000):
#指定字节数
playFile.write(chunk)
playFile.close()
pip3 install sqlalchemy
import sqlalchemy as sa
conn = sa.create_engine('sqlite://')
meta = sa.MetaData()
zoo = sa.Table('zoo', meta,
sa.Column('critter', sa.String, primary_key=True),
sa.Column('count', sa.Integer),
sa.Column('damages', sa.Float)
-
)
meta.create_all(conn)
conn.execute(zoo.insert(('bear',
2,
1000.0)))
conn.execute(zoo.insert(('weasel',
1,
2000.0)))
result = conn.execute(zoo.select())
#类似select *
rows = result.fetchall()
print(rows)
#web
import urllib.request as ur
url =
'http://www.iheartquotes.com/api/v1/random'
conn = ur.urlopen(url)
print(conn)
data = conn.read()
#获取网页数据
print(data)
conn.status #状态码
print(conn.getheader('Content-Type'))
#数据格式
for key, value in conn.getheaders():
#查看所有http头
-
print(key, value)
pip3 install requests
import requests
url =
'http://www.iheartquotes.com/api/v1/random'
resp = requests.get(url)
resp
<Response
[200]>
print(resp.text)
二.页面过滤
pip3 install beautifulsoup4
import requests,bs4
res = requests.get('http://nostarch.com')
res.raise_for_status()
noStarchSoup = bs4.BeautifulSoup(res.text)
exampleFile = open('example.html')
exampleSoup = bs4.BeautifulSoup(exampleFile)
soup.select('p #author')
soup.select('p')[0]
#只取第一个放里面
xx.get('id')
#返回id的值