这两天试着学了一下爬虫,刚学会了爬取静态网页,就趁热现学现卖,做了一个基金查询的demo。
基金数据来自网易财经基金页面,其URL格式为:
代码语言:javascript复制"http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format(
code=code,page="0",start=start,end=end)
如 "http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2009-02-22&end=2019-10-29&sort=TDATE&order=desc"
其中code为基金代码,例如"001630";start和end为起始日期和截止日期,格式为 "yyyy-MM-dd"
爬取的基金的净值数据用PyQT的表格控件展示。再将数据用matplotlib绘图,嵌入UI界面。
通过基金代码查询到的新的基金的名称和代码信息会存入文件,以供下次打开程序时程序下拉框自动加载。
代码如下:
代码语言:javascript复制import sys
from PyQt5.QtWidgets import *
from PyQt5.QtGui import QColor, QFont, QIcon,QPixmap,QRegExpValidator
from PyQt5.QtCore import Qt, QSize,QDate,QRegExp
import pickle
import requests
import re
from bs4 import BeautifulSoup
from matplotlib import pyplot as plt
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg,NavigationToolbar2QT
from matplotlib.figure import Figure
#import numpy as np
class Canvas(FigureCanvasQTAgg):
def __init__(self, parent=None, width=5, height=4, dpi=100):
fig = Figure(figsize=(width, height), dpi=dpi) #创建画布,设置宽高,每英寸像素点数
fig.set_tight_layout(True)
self.axes = fig.add_subplot(111)#
self.axes.tick_params(axis='x',rotation =90,direction="in")#日期旋转90度显示
FigureCanvasQTAgg.__init__(self, fig)#调用基类的初始化函数
self.setParent(parent)
#self.update_figure(1,1)
FigureCanvasQTAgg.updateGeometry(self)
def update_figure(self, x ,y,title):
#x = [4,3,2,1]
#y=[1,2,3,5]
x.reverse()
y.reverse()
self.axes.cla()#清除已绘的图形
self.axes.set_title(title,fontsize=18)
self.axes.plot(x,y)
self.axes.scatter(x,y, marker ='o')
self.axes.set_ylabel("基金净值[元]")
self.axes.grid(lw=0.5,ls="--",alpha=0.5)
self.draw()#重新绘制
class MainWindow(QMainWindow):
def __init__(self, parent = None):
super().__init__(parent)
self.funds = pickle.load(open("info.obj","rb")) # 基金代码和名称信息存在字典中保存到文件了,pickle加载
self.setWindowTitle("A股基金查询工具【数据来源于网易财经,python爬虫demo】")
self.create_table()
self.create_canvas()
self.setup_centralWidget()
#self.setWindowIcon(QIcon(":ICON/ICON/retest.png"))
self.createActions()
self.setup_toolBar()
self.setup_menuBar()
self.statusBar().showMessage("ready")
self.code = None
#self.resize(800,500)
def create_table(self):
self.table = QTableWidget()
self.table.setEditTriggers(QAbstractItemView.NoEditTriggers)
HorizontalHeaderLabels = ["公布日期", "单位净值","累计净值","增长率"]
columns = len(HorizontalHeaderLabels)
self.table.setColumnCount(columns)
self.rows=100
self.table.setRowCount(self.rows)#
self.headerWidth = (100,80,80,80)
self.table.setSortingEnabled (True)
self.table.horizontalHeader().setStyleSheet("QHeaderView::section{background-color:rgb(180,180,250);}")
for i in range(columns-1):
self.table.setColumnWidth (i,self.headerWidth[i])
self.table.setHorizontalHeaderLabels(HorizontalHeaderLabels)
def update_table(self):
self.table.clearContents()#清除内容
rows = len(self.rate)
if rows> self.rows:
self.table.setRowCount(rows)
for i in range(len(self.rate)):
item = QTableWidgetItem(self.date[i])
item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)
self.table.setItem(i, 0, item)
item = QTableWidgetItem(str(self.net[i]))
item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)
self.table.setItem(i, 1, item)
item = QTableWidgetItem(str(self.acc_net[i]))
item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)
self.table.setItem(i, 2, item)
rate = self.rate[i]
item = QTableWidgetItem(rate)#rate用的是文本
item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)
if rate[0] == "-":
item.setForeground(QColor("green"))
else:
item.setForeground(QColor("red"))
self.table.setItem(i, 3, item)
def create_canvas(self):
self.canvas = Canvas(self)
def setup_centralWidget(self):
#设置主窗口中心部件
self.tabWidget = QTabWidget()
self.tabWidget.addTab(self.table,"Table ")
vlayout = QVBoxLayout()
Navigation_toolbar = NavigationToolbar2QT(self.canvas, self)
vlayout.addWidget(self.canvas)
vlayout.addWidget(Navigation_toolbar)
plotWidget = QWidget()
plotWidget.setLayout(vlayout)
self.tabWidget.addTab(plotWidget,"Plot")
self.tabWidget.setCurrentIndex(1)
self.setCentralWidget(self.tabWidget)#指定主窗口中心部件
def createActions(self):
#self.newAction = QAction("New record", self)
#self.newAction.setIcon(QIcon(":new.png"))
#self.newAction.triggered.connect(self.newRecord)
#self.newAction.setStatusTip("###")
self.exitAction = QAction("E&xit",self)
self.exitAction.triggered.connect(self.close)
self.queryAction = QAction("查询",self)
self.queryAction.triggered.connect(self.query)
self.helpAboutAction = QAction("About",self)
self.helpAboutAction.setShortcut("Ctrl H")
self.helpAboutAction.triggered.connect(self.showAboutDlg)
def setup_menuBar(self):
fileMenu = self.menuBar().addMenu("&File")
fileMenu.addAction(self.exitAction)
helpMenu = self.menuBar().addMenu("&Help")
helpMenu.addAction(self.helpAboutAction)
def showAboutDlg(self):
QMessageBox.about(self,u"title",
u"Version: 0.1n"
u"author: wsp")
def name_selected(self):
self.name = self.comboName.currentText()
self.code = self.name.split(" ")[0]
print(self.name,self.code)
def closeEvent(self, event):
reply = QMessageBox.question(self, '提示',"是否要退出程序?",
QMessageBox.Yes | QMessageBox.No,QMessageBox.No)
if reply == QMessageBox.Yes:
pickle.dump(self.funds, open("info.obj","wb")) # 基金代码和名称信息存在字典中保存到文件
event.accept()
else:
event.ignore()
@staticmethod
def download(url,user_agent='wswp',num_retries=2,proxies=None):
print("Downloading: ", url)
headers = {'User-Agent' : user_agent}
resp = requests.get(url, headers=headers, proxies=proxies)
html = None
try:
resp = requests.get(url, headers=headers, proxies=proxies)
#print("status: ",resp.status_code)
html = resp.text
if resp.status_code >= 400:
print("Download error: ", html)
html = None
if num_retries>0 and 500 < resp.status_code <600:
#递归调用,遇到5xx错误,最多重试 2 次
return download(url, user_agent, num_retries-1, proxies)
except requests.exceptions.RequestException as e:
print('Download error: ' ,e.reason)
html = None
finally:
return html
def query(self):
if self.code is None:
QMessageBox.critical(self, "错误", "基金代码为空或格式错误!")
self.codeInput.setFocus()
return
code = self.code
start = self.start.text()
end = self.end.text()
#url0 ="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2009-02-22&end=2019-10-29&sort=TDATE&order=desc"
#url0="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2019-10-29&end=2019-10-29&sort=TDATE&order=desc"
#url0="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2019-07-01&end=2019-10-29&sort=TDATE&order=desc"
url0 = "http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format(
code=code,page="0",start=start,end=end)
#print(url0)
html = self.download(url0)
if html is None:
QMessageBox.critical(self, "错误", "爬不到有效信息,请检查基金代码是否有误!")
return
#print(html[:100])
soup = BeautifulSoup(html, 'html.parser')
html = soup.prettify() #修正可能存在的Html错误
#提取基金名称
fundInfo =soup.find(name="title")
#print(fundInfo.text)
self.name = fundInfo.text.split("_")[0]
#提取总的页数
matched =soup.find(name="div", attrs = {"class": "mod_pages"})
a_founds = matched.find_all(name="a")
if len(a_founds) ==0:
pages =1
else:
pages = int(matched.find_all(name="a")[-2].text)
print("pages:", pages)
self.date, self.net, self.acc_net, self.rate = [], [], [],[]
i = 0
for matched in soup.find_all("td"): #提取
text = matched.text
if i %4 == 0:
self.date.append(text)# datetime string
elif i%4 ==1:
self.net.append(float(text)) # 单位净值
elif i%4 ==2:
self.acc_net.append(float(text)) #累计净值
else:
self.rate.append(text)
i = 1
if pages>1:
for page in range(1,pages):
url = "http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format(code=code,page=str(page),start=start,end=end)
html = self.download(url)
#print(html[:100])
soup = BeautifulSoup(html, 'html.parser')
html = soup.prettify() #修正可能存在的Html错误
i = 0
for matched in soup.find_all("td"): #提取
text = matched.text
if i %4 == 0:
self.date.append(text)# datetime string
elif i%4 ==1:
self.net.append(float(text)) # 单位净值
elif i%4 ==2:
self.acc_net.append(float(text)) #累计净值
else:
self.rate.append(text)
i = 1
self.update_table()
self.canvas.update_figure(x=self.date ,y =self.net,title="%s (%s) 净值走势"%(self.name,self.code))
itemText = self.code " " self.name
if self.code not in self.funds:
self.funds[self.code] = self.name
self.comboName.addItem(itemText)
self.comboName.setCurrentText(itemText)
def codeInputFinished(self):
self.code = self.codeInput.text()
def setup_toolBar(self):
label0 = QLabel("选择基金:")
self.comboName = QComboBox()
fundItems = list(self.funds.items())
fundItems.sort()
for i, fund in enumerate(fundItems):
self.comboName.addItem(fund[0] " " fund[1])
self.comboName.currentIndexChanged[int].connect(self.name_selected)
self.comboName.setStatusTip("选择基金")
label_ = QLabel(" 基金代码:")
self.codeInput = QLineEdit()
regExp = QRegExp("^d{6}$")
validator = QRegExpValidator(regExp)
self.codeInput.setValidator(validator)
self.codeInput.setFixedWidth(50)
self.codeInput.editingFinished.connect(self.codeInputFinished)
label1 = QLabel(" 起始日期")
self.start= QDateEdit()
self.start.setCalendarPopup(True)
self.start.setDisplayFormat("yyyy-MM-dd")
label2 = QLabel(" 截止日期")
self.end= QDateEdit()
self.end.setCalendarPopup(True)
self.end.setDisplayFormat("yyyy-MM-dd")
today = QDate.currentDate()#当前时间
self.start.setMaximumDate(today) #不超过今天
self.start.setDate(today.addMonths (-3)) #3月前此时
self.end.setDate(today)
self.end.setMaximumDate(today)
toolbar0 = self.addToolBar("选择")#添加工具条
toolbar0.addWidget(label0)
toolbar0.addWidget(self.comboName)
toolbar0.addWidget(label_)
toolbar0.addWidget(self.codeInput)
toolbar0.addWidget(label1)
toolbar0.addWidget(self.start)
toolbar0.addWidget(label2)
toolbar0.addWidget(self.end)
toolbar0.addSeparator()
#toolbar0.addAction(self.queryAction)
self.queryButton = QPushButton("查询")
self.queryButton.clicked.connect(self.query)
toolbar0.addWidget(self.queryButton)
#help(toolbar0)
toolbar0.addSeparator()
if __name__ == '__main__':
app = QApplication(sys.argv)
mw = MainWindow()
mw.show()
sys.exit(app.exec_())