bs4爬取豆瓣top250数据

2020-03-10 09:02:53 浏览数 (1)

python获取豆瓣top250电影数据

代码语言:javascript复制
from  urllib import request
import re
from bs4 import BeautifulSoup
from distutils.filelist import findall
import re

# 获取全部页面
for i in range(1,11):
    url_i = 'https://movie.douban.com/top250?start=' str((i-1)*25) '&filter='
    # print(url_i)
# url = 'https://movie.douban.com/top250?start=25&filter='
    page = request.urlopen(url_i)
    # 获取网页源代码
    contents = page.read()
    print('n' '------------------' '第' str(i) '页' '------------------' 'n')
    soup = BeautifulSoup(contents,"html.parser")
    for tag in soup.find_all('div', class_='info'):
       # print tag
        m_name = tag.find('span', class_='title').get_text()
        m_rating_score = float(tag.find('span',class_='rating_num').get_text())
        m_people = tag.find('div',class_="star")
        m_span = m_people.findAll('span')
        m_peoplecount = m_span[3].contents[0]
        m_url=tag.find('a').get('href')
        print( m_name "        "     str(m_rating_score)     "           "   m_peoplecount   "    "   m_url )

运行结果

res001.png

0 人点赞