python 爬虫示例--基金查询demo

2019-11-05 00:56:52 浏览数 (2)

这两天试着学了一下爬虫,刚学会了爬取静态网页,就趁热现学现卖,做了一个基金查询的demo。

基金数据来自网易财经基金页面,其URL格式为:

代码语言:javascript复制
"http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format(
            code=code,page="0",start=start,end=end)
如 "http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2009-02-22&end=2019-10-29&sort=TDATE&order=desc" 

其中code为基金代码,例如"001630";start和end为起始日期和截止日期,格式为 "yyyy-MM-dd"

爬取的基金的净值数据用PyQT的表格控件展示。再将数据用matplotlib绘图,嵌入UI界面。

通过基金代码查询到的新的基金的名称和代码信息会存入文件,以供下次打开程序时程序下拉框自动加载。

代码如下:

代码语言:javascript复制
import sys
from PyQt5.QtWidgets import *
from PyQt5.QtGui import QColor, QFont, QIcon,QPixmap,QRegExpValidator
from PyQt5.QtCore import Qt, QSize,QDate,QRegExp
import pickle
import requests
import re
from bs4 import BeautifulSoup
from matplotlib import pyplot as plt
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg,NavigationToolbar2QT
from matplotlib.figure import Figure
#import numpy as np

class Canvas(FigureCanvasQTAgg):
    def __init__(self, parent=None, width=5, height=4, dpi=100):
        fig = Figure(figsize=(width, height), dpi=dpi) #创建画布,设置宽高,每英寸像素点数
        fig.set_tight_layout(True)
        self.axes = fig.add_subplot(111)#
        self.axes.tick_params(axis='x',rotation =90,direction="in")#日期旋转90度显示
        FigureCanvasQTAgg.__init__(self, fig)#调用基类的初始化函数
        self.setParent(parent)
        #self.update_figure(1,1)
        FigureCanvasQTAgg.updateGeometry(self)
    
    def update_figure(self, x ,y,title):
        #x = [4,3,2,1]
        #y=[1,2,3,5]
        x.reverse()
        y.reverse()
        self.axes.cla()#清除已绘的图形
        self.axes.set_title(title,fontsize=18)
        self.axes.plot(x,y)
        self.axes.scatter(x,y,  marker ='o')
        self.axes.set_ylabel("基金净值[元]")
        self.axes.grid(lw=0.5,ls="--",alpha=0.5)
        self.draw()#重新绘制
 
class MainWindow(QMainWindow):
    def __init__(self, parent = None):
        super().__init__(parent)
        self.funds = pickle.load(open("info.obj","rb")) # 基金代码和名称信息存在字典中保存到文件了,pickle加载
        self.setWindowTitle("A股基金查询工具【数据来源于网易财经,python爬虫demo】")
        self.create_table()
        self.create_canvas()
        self.setup_centralWidget()
        #self.setWindowIcon(QIcon(":ICON/ICON/retest.png"))
        self.createActions()
        self.setup_toolBar()
        self.setup_menuBar()
        self.statusBar().showMessage("ready")
        self.code = None
       
        #self.resize(800,500)
    def create_table(self):
        self.table = QTableWidget()
        self.table.setEditTriggers(QAbstractItemView.NoEditTriggers)
        HorizontalHeaderLabels = ["公布日期", "单位净值","累计净值","增长率"]
        columns = len(HorizontalHeaderLabels)
        self.table.setColumnCount(columns)
        self.rows=100
        self.table.setRowCount(self.rows)#
        self.headerWidth = (100,80,80,80)
       
        self.table.setSortingEnabled (True)
        self.table.horizontalHeader().setStyleSheet("QHeaderView::section{background-color:rgb(180,180,250);}")
        for i in range(columns-1):
            self.table.setColumnWidth (i,self.headerWidth[i])
       
        self.table.setHorizontalHeaderLabels(HorizontalHeaderLabels)
    
    def update_table(self):
        self.table.clearContents()#清除内容
        rows = len(self.rate)
        if rows> self.rows:
            self.table.setRowCount(rows)
        for i in range(len(self.rate)):
            item = QTableWidgetItem(self.date[i])
            item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)
            self.table.setItem(i, 0, item)
            item = QTableWidgetItem(str(self.net[i]))
            item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)
            self.table.setItem(i, 1, item)
            item = QTableWidgetItem(str(self.acc_net[i]))
            item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)
            self.table.setItem(i, 2, item)
            rate = self.rate[i]
            item = QTableWidgetItem(rate)#rate用的是文本
            item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)
            if rate[0] == "-":
                item.setForeground(QColor("green"))
            else:
                item.setForeground(QColor("red"))
            self.table.setItem(i, 3, item)
            
    def create_canvas(self):
        self.canvas = Canvas(self)
       
    def setup_centralWidget(self):
        #设置主窗口中心部件
        self.tabWidget = QTabWidget()
        self.tabWidget.addTab(self.table,"Table ")
       
        vlayout = QVBoxLayout()
        Navigation_toolbar = NavigationToolbar2QT(self.canvas, self)
        vlayout.addWidget(self.canvas)
        vlayout.addWidget(Navigation_toolbar)
        plotWidget = QWidget()
        plotWidget.setLayout(vlayout)
       
        self.tabWidget.addTab(plotWidget,"Plot")
        self.tabWidget.setCurrentIndex(1)
        self.setCentralWidget(self.tabWidget)#指定主窗口中心部件 
    
    def createActions(self):
        #self.newAction = QAction("New record", self)
        #self.newAction.setIcon(QIcon(":new.png"))
        #self.newAction.triggered.connect(self.newRecord)
        #self.newAction.setStatusTip("###")
        self.exitAction = QAction("E&xit",self)
        self.exitAction.triggered.connect(self.close)
        self.queryAction = QAction("查询",self)
        self.queryAction.triggered.connect(self.query)
       
        self.helpAboutAction = QAction("About",self)
        self.helpAboutAction.setShortcut("Ctrl H")
        self.helpAboutAction.triggered.connect(self.showAboutDlg)  
    
    def setup_menuBar(self):
        fileMenu = self.menuBar().addMenu("&File")
        fileMenu.addAction(self.exitAction)
       
        helpMenu = self.menuBar().addMenu("&Help")
        helpMenu.addAction(self.helpAboutAction)
    
    def showAboutDlg(self):
        QMessageBox.about(self,u"title",
                          u"Version:  0.1n"
                          u"author:  wsp")
       
    def name_selected(self):
        self.name = self.comboName.currentText()
        self.code = self.name.split(" ")[0]
        print(self.name,self.code)
    
    def closeEvent(self, event):
        reply = QMessageBox.question(self, '提示',"是否要退出程序?",
                                               QMessageBox.Yes | QMessageBox.No,QMessageBox.No)
        if reply == QMessageBox.Yes:
            pickle.dump(self.funds, open("info.obj","wb")) # 基金代码和名称信息存在字典中保存到文件
            event.accept()
        else:
            event.ignore()

    @staticmethod
    def download(url,user_agent='wswp',num_retries=2,proxies=None):
        print("Downloading: ", url)
        headers = {'User-Agent' : user_agent}
        resp = requests.get(url, headers=headers, proxies=proxies)
        html = None
        try:
            resp = requests.get(url, headers=headers, proxies=proxies)
            #print("status: ",resp.status_code)
            html = resp.text
            if resp.status_code >= 400:
                print("Download error: ", html)
                html = None
                if num_retries>0 and 500 < resp.status_code <600:
                    #递归调用,遇到5xx错误,最多重试 2 次
                    return download(url, user_agent, num_retries-1, proxies)
        except requests.exceptions.RequestException as e:
            print('Download error: ' ,e.reason)
            html = None
        finally:
            return html
   
    def query(self):
        if self.code is None:
            QMessageBox.critical(self, "错误", "基金代码为空或格式错误!")
            self.codeInput.setFocus()
            return
           
        code = self.code
        start = self.start.text()
        end = self.end.text()
        #url0 ="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2009-02-22&end=2019-10-29&sort=TDATE&order=desc"
        #url0="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2019-10-29&end=2019-10-29&sort=TDATE&order=desc"
        #url0="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2019-07-01&end=2019-10-29&sort=TDATE&order=desc"
        url0 = "http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format(
            code=code,page="0",start=start,end=end)
        #print(url0)
        html = self.download(url0)
        if html is None:
            QMessageBox.critical(self, "错误", "爬不到有效信息,请检查基金代码是否有误!")
            return
        #print(html[:100])
        soup = BeautifulSoup(html, 'html.parser')
        html = soup.prettify() #修正可能存在的Html错误
        #提取基金名称
        fundInfo =soup.find(name="title")
        #print(fundInfo.text)
        self.name = fundInfo.text.split("_")[0]
        #提取总的页数
        matched =soup.find(name="div", attrs = {"class": "mod_pages"})
        a_founds = matched.find_all(name="a")
        if len(a_founds) ==0:
            pages =1
        else:
            pages = int(matched.find_all(name="a")[-2].text)
        print("pages:", pages)
        self.date, self.net, self.acc_net, self.rate = [], [], [],[]
        i = 0
        for matched in soup.find_all("td"): #提取
            text = matched.text
            if i %4 == 0:
                    self.date.append(text)# datetime string
            elif i%4 ==1:
                    self.net.append(float(text)) # 单位净值
            elif i%4 ==2:
                    self.acc_net.append(float(text)) #累计净值
            else:
                self.rate.append(text)
            i  = 1

        if pages>1:
            for page in range(1,pages):
                url = "http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format(code=code,page=str(page),start=start,end=end)
                html = self.download(url)
                #print(html[:100])
                soup = BeautifulSoup(html, 'html.parser')
                html = soup.prettify() #修正可能存在的Html错误
                i = 0
                for matched in soup.find_all("td"): #提取
                    text = matched.text
                    if i %4 == 0:
                            self.date.append(text)# datetime string
                    elif i%4 ==1:
                            self.net.append(float(text)) # 单位净值
                    elif i%4 ==2:
                            self.acc_net.append(float(text)) #累计净值
                    else:
                        self.rate.append(text)
                    i  = 1
        self.update_table()
        self.canvas.update_figure(x=self.date ,y =self.net,title="%s (%s) 净值走势"%(self.name,self.code))
        itemText = self.code " " self.name
        if self.code not in self.funds:
            self.funds[self.code] = self.name
            self.comboName.addItem(itemText)
        self.comboName.setCurrentText(itemText)
       
    def codeInputFinished(self):
        self.code = self.codeInput.text()
    
    def setup_toolBar(self):      
        label0 = QLabel("选择基金:")
        self.comboName = QComboBox()
        fundItems = list(self.funds.items())
        fundItems.sort()
        for i, fund in enumerate(fundItems):
            self.comboName.addItem(fund[0] " " fund[1])
        self.comboName.currentIndexChanged[int].connect(self.name_selected)
        self.comboName.setStatusTip("选择基金")
        label_ = QLabel("   基金代码:")
        self.codeInput = QLineEdit()
        regExp = QRegExp("^d{6}$")
        validator = QRegExpValidator(regExp)
        self.codeInput.setValidator(validator)
        self.codeInput.setFixedWidth(50)
        self.codeInput.editingFinished.connect(self.codeInputFinished)
       
        label1 = QLabel("   起始日期")
        self.start= QDateEdit()
        self.start.setCalendarPopup(True)
        self.start.setDisplayFormat("yyyy-MM-dd")
        label2 = QLabel("   截止日期")
        self.end= QDateEdit()
        self.end.setCalendarPopup(True)
        self.end.setDisplayFormat("yyyy-MM-dd")
        today = QDate.currentDate()#当前时间
        self.start.setMaximumDate(today) #不超过今天
        self.start.setDate(today.addMonths (-3)) #3月前此时
        self.end.setDate(today)
        self.end.setMaximumDate(today)
       
        toolbar0 = self.addToolBar("选择")#添加工具条      
        toolbar0.addWidget(label0)
        toolbar0.addWidget(self.comboName)
        toolbar0.addWidget(label_)
        toolbar0.addWidget(self.codeInput)
        toolbar0.addWidget(label1)
        toolbar0.addWidget(self.start)
        toolbar0.addWidget(label2)
        toolbar0.addWidget(self.end)
        toolbar0.addSeparator()
        #toolbar0.addAction(self.queryAction)
        self.queryButton = QPushButton("查询")
        self.queryButton.clicked.connect(self.query)
        toolbar0.addWidget(self.queryButton)
        #help(toolbar0)
        toolbar0.addSeparator()
    
       
if __name__ == '__main__':
    app = QApplication(sys.argv)
    mw = MainWindow()
    mw.show()
    sys.exit(app.exec_())

0 人点赞