代码语言:javascript复制python获取豆瓣top250电影数据
from urllib import request
import re
from bs4 import BeautifulSoup
from distutils.filelist import findall
import re
# 获取全部页面
for i in range(1,11):
url_i = 'https://movie.douban.com/top250?start=' str((i-1)*25) '&filter='
# print(url_i)
# url = 'https://movie.douban.com/top250?start=25&filter='
page = request.urlopen(url_i)
# 获取网页源代码
contents = page.read()
print('n' '------------------' '第' str(i) '页' '------------------' 'n')
soup = BeautifulSoup(contents,"html.parser")
for tag in soup.find_all('div', class_='info'):
# print tag
m_name = tag.find('span', class_='title').get_text()
m_rating_score = float(tag.find('span',class_='rating_num').get_text())
m_people = tag.find('div',class_="star")
m_span = m_people.findAll('span')
m_peoplecount = m_span[3].contents[0]
m_url=tag.find('a').get('href')
print( m_name " " str(m_rating_score) " " m_peoplecount " " m_url )
运行结果
res001.png