首先pyecharts是个可视化的好工具,pyecharts已做了变更目前是1.6.2版本,但工具的灵活运用学习过程还是很痛苦的。
其次通过pandas处理数据集问题,在pandas进行数据清洗转换汇总过滤不是不可能,但学习曲线有点长,因此采用read_sql,通过数据库来进行数据转换,某些地方还是用到了pandas的透视表
再次数据需要在pandas、numpy和list之间不断转换,以适配图表的数据格式要求。
最后,最重要的是选择最合适的可视化图表类型。
关于数据是从网上下载的一份中国各省市自治区1950-2010年总体情况表,只保留了14个字段11个指标,通过基于某些计算要求,新加了一张省市自治区名称转换表,以实现map功能和基于行政区划的统计。
表名称:chinaeconomy 中国各省市自治区1950-2010年总体情况表
REPORTYEAR年度
PROVINCEID省市编号
PROVINCENAME省市名称
GDP地区国内生产总值(亿元)
EVERAGEGDP人均国内生产总值(元)
PRIMARYINDUSTRYVALUE各地区第一产业增加值(亿元)
SECONDINDUSTRYVALUE各地区第二产业增加值(亿元)
THIRDINDUSTRYVALUE各地区第三产业增加值(亿元)
IMPORTEXPORTVALUE进出口总额(万美元)
EXPORTVALUE出口总额(万美元)
POPULATION年底总人口数(万人)
BIRTHRATE人口出生率(‰)
DEADRATE人口死亡率(‰)
GROWTHRATE人口自然增长率(‰)
------------------------------------------------------
表名称:provinceinfo 中国各省市自治区对应echart中的省市自治区简写和行政区域
PROVINCENAME 省市名称
ECHARTPROVINCENAME echart省市名称
AREANAME 行政区划
基本SQL语句
select b.areaname,b.echartprovincename,a.reportyear,
a.gdp,a.everagegdp,
a.primaryindustryvalue,a.secondindustryvalue,a.thirdindustryvalue,
a.importexportvalue,a.exportvalue,
a.population,a.birthrate,a.deadrate,a.growthrate
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
码示例
代码语言:javascript复制import cx_Oracle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pyecharts.charts importBar
from pyecharts import options as opts
from pyecharts.globals importThemeType
from pyecharts.faker importFaker
from pyecharts.commons.utils importJsCode
from pyecharts.charts importEffectScatter
from pyecharts.charts importHeatMap
from pyecharts.charts importLine
from pyecharts.charts importScatter
from pyecharts.globals importSymbolType
# theme 定制图表样式,共内置15种
# ThemeType.WHITE|ThemeType.LIGHT|ThemeType.DARK|ThemeType.CHALK|ThemeType.ESSOS|
# ThemeType.INFOGRAPHIC|ThemeType.MACARONS|ThemeType.PURPLE_PASSION|ThemeType.ROMA|ThemeType.ROMANTIC|
# ThemeType.SHINE|ThemeType.VINTAGE|ThemeType.WALDEN|ThemeType.WESTEROS|Type.WONDERLAND
# 全局配置项
# InitOpts:初始化配置项
# TitleOpts:标题配置项
# AnimationOpts:Echarts画图动画配置项
# ToolBoxFeatureOpts:工具箱工具配置项
# ToolboxOpts:工具箱配置项
# BrushOpts:区域选择组件配置项
# DataZoomOpts:区域缩放配置项
# LegendOpts:图例配置项
# VisualMapOpts:视觉映射配置项
# TooltipOpts:提示框配置项
# AxisLineOpts: 坐标轴轴线配置项
# AxisTickOpts: 坐标轴刻度配置项
# AxisPointerOpts: 坐标轴指示器配置项
# AxisOpts:坐标轴配置项
# SingleAxisOpts:单轴配置项
# GraphicGroup:原生图形元素组件
# 系列配置项
# ItemStyleOpts:图元样式配置项
# TextStyleOpts:文字样式配置项
# LabelOpts:标签配置项
# LineStyleOpts:线样式配置项
# SplitLineOpts:分割线配置项
# MarkPointItem:标记点数据项
# MarkPointOpts:标记点配置项
# MarkLineItem:标记线数据项
# MarkLineOpts:标记线配置项
# MarkAreaItem: 标记区域数据项
# MarkAreaOpts: 标记区域配置项
# EffectOpts:涟漪特效配置项
# AreaStyleOpts:区域填充样式配置项
# SplitAreaOpts:分隔区域配置项
'''
数据情况,考虑到pandas对数据处理的不便,还是使用了标准的数据库方式
------------------------------------------------------
表名称:chinaeconomy 中国各省市自治区1950-2010年总体情况表
REPORTYEAR 年度
PROVINCEID 省市编号
PROVINCENAME 省市名称
GDP 地区国内生产总值(亿元)
EVERAGEGDP 人均国内生产总值(元)
PRIMARYINDUSTRYVALUE 各地区第一产业增加值(亿元)
SECONDINDUSTRYVALUE 各地区第二产业增加值(亿元)
THIRDINDUSTRYVALUE 各地区第三产业增加值(亿元)
IMPORTEXPORTVALUE 进出口总额(万美元)
EXPORTVALUE 出口总额(万美元)
POPULATION 年底总人口数(万人)
BIRTHRATE 人口出生率(‰)
DEADRATE 人口死亡率(‰)
GROWTHRATE 人口自然增长率(‰)
------------------------------------------------------
表名称:provinceinfo 中国各省市自治区对应echart中的省市自治区简写和行政区域
PROVINCENAME 省市名称
ECHARTPROVINCENAME echart省市名称
AREANAME 行政区划
------------------------------------------------------
基本SQL语句
select b.areaname,b.echartprovincename,a.reportyear,
a.gdp,a.everagegdp,
a.primaryindustryvalue,a.secondindustryvalue,a.thirdindustryvalue,
a.importexportvalue,a.exportvalue,
a.population,a.birthrate,a.deadrate,a.growthrate
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
'''
sql='''
select b.areaname,b.echartprovincename,a.reportyear,
a.gdp,a.everagegdp,
a.primaryindustryvalue,a.secondindustryvalue,a.thirdindustryvalue,
a.importexportvalue,a.exportvalue,a.importexportvalue-a.exportvalue as importvalue,
a.population,a.birthrate,a.deadrate,a.growthrate
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
'''
# 初始化
username = 'XXX'
password = 'XXX'
connstr ='127.0.0.1:1521/ORCL'
conn = cx_Oracle.connect(username,password,connstr) #连接数据库
# pandas.read_sql测试
df = pd.read_sql(sql,conn) #read_sql读取数据
print(df.head(5)) #测试前5行
# def bar_base_with_animation() -> Bar:
# c = (
# Bar(
# init_opts=opts.InitOpts( #初始化配置项
# theme=ThemeType.INFOGRAPHIC, #设置主题
# animation_opts=opts.AnimationOpts( #画图动画配置项
# animation_delay=1000, animation_easing="elasticOut"#动画设置
# ),
# bg_color={ #背景色
# "type": "pattern",
# "image": JsCode("img"), #背景图片
# "repeat": "repeat"} #是否重复
# )
# )
# .add_xaxis(Faker.choose()) #设置X轴数据
# .add_yaxis("商家A", Faker.values(), gap="10%") #设置Y周数据 gap不同系列柱间距离
# .add_yaxis("商家B", Faker.values(), gap="0%") #设置Y周数据 gap不同系列柱间距离
# .set_series_opts(itemstyle_opts={ #图元样式设置
# "normal": { #Bar-渐变圆柱
# "color": JsCode("""new echarts.graphic.LinearGradient(0, 0, 0, 1, [{
# offset: 0,
# color: 'rgba(0, 244, 255, 1)'
# }, {
# offset: 1,
# color: 'rgba(0, 77, 167, 1)'
# }], false)"""),
# "barBorderRadius": [30, 30, 30, 30],
# "shadowColor": 'rgb(0, 160, 221)',
# }})
# .set_global_opts( #全局配置项
# title_opts=opts.TitleOpts( #标题配置项
# title="Bar-动画配置基本示例", #报表标题
# subtitle="我是副标题", #报表副标题
# pos_left="center", #标题位置
# title_textstyle_opts=opts.TextStyleOpts(color="green"), #标题字体颜色
# subtitle_textstyle_opts=opts.TextStyleOpts(color="blue") #副标题字体颜色
# ),
# toolbox_opts=opts.ToolboxOpts(), #显示 ToolBox
# legend_opts = opts.LegendOpts(is_show=True,pos_left="right",pos_top="10%"),
# yaxis_opts=opts.AxisOpts(name="我是 Y 轴", #标轴配置项
# axislabel_opts=opts.LabelOpts(formatter="{value} /月") #坐标轴标签配置项,配置标签格式,Y 轴 formatter
# ),
# xaxis_opts=opts.AxisOpts(name="我是 X 轴", #标轴配置项
# axislabel_opts=opts.LabelOpts(rotate=45) #坐标轴标签配置项,配置倾斜度
# )
# )
# )
# c.add_js_funcs(
# """
# var img = newImage(); img.src = 'ditu.jpg';
# """
# )
# return c
# c=bar_base_with_animation()
# c.render()
# 柱状图 六大行政区划 进出口数据对比
sql='''
select b.areaname,
sum(a.exportvalue)/10000 as exportvalue,sum(a.importexportvalue-a.exportvalue)/10000 as importvalue
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
and a.reportyear=2010
group by b.areaname
order by 2
'''
df = pd.read_sql(sql,conn) #read_sql读取数据
xdata=df['AREANAME'].tolist()
exportdata=df['EXPORTVALUE'].round(2).tolist() #四舍五入,再转list
importdata=df['IMPORTVALUE'].round(2).tolist() #四舍五入,再转list
def bar_export_import_dif() -> Bar:
c = (
Bar(
# init_opts=opts.InitOpts( #初始化配置项
# theme=ThemeType.INFOGRAPHIC #设置主题
# )
)
.add_xaxis(xdata) #设置X轴数据
.add_yaxis("进口数据(亿)", exportdata, gap="10%") #设置Y周数据 gap不同系列柱间距离
.add_yaxis("出口数据(亿)", importdata, gap="0%") #设置Y周数据 gap不同系列柱间距离
.set_global_opts( #全局配置项
title_opts=opts.TitleOpts( #标题配置项
title="全国2010年进出口数据", #报表标题
subtitle="各行政区域对比", #报表副标题
pos_left="center", #标题位置
title_textstyle_opts=opts.TextStyleOpts(color="green"), #标题字体颜色
subtitle_textstyle_opts=opts.TextStyleOpts(color="blue") #副标题字体颜色
),
toolbox_opts=opts.ToolboxOpts(), #显示 ToolBox
legend_opts = opts.LegendOpts(is_show=True,pos_left="right",pos_top="7%")
)
)
return c
c=bar_export_import_dif()
c.render('bar_export_import_dif.html')
# 柱状图 六大行政区划 第1、2、3产业总体情况对比
sql='''
select b.areaname,
sum(a.primaryindustryvalue) as primaryindustryvalue,
sum(a.secondindustryvalue) as secondindustryvalue,
sum(a.thirdindustryvalue) as thirdindustryvalue
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
and a.reportyear=2010
group by b.areaname
order by 2
'''
df = pd.read_sql(sql,conn) #read_sql读取数据
xdata=df['AREANAME'].tolist()
primaryindustryvalue=df['PRIMARYINDUSTRYVALUE'].round(2).tolist() #四舍五入,再转list
secondindustryvalue=df['SECONDINDUSTRYVALUE'].round(2).tolist() #四舍五入,再转list
thirdindustryvalue=df['THIRDINDUSTRYVALUE'].round(2).tolist() #四舍五入,再转list
def bar_industry_dif() -> Bar:
c = (
Bar(
# init_opts=opts.InitOpts( #初始化配置项
# theme=ThemeType.INFOGRAPHIC #设置主题
# )
)
.add_xaxis(xdata) #设置X轴数据
.add_yaxis("第一产业(亿)", primaryindustryvalue, stack="stack1", gap="0", category_gap="40%") #设置Y周数据 gap不同系列柱间距离
.add_yaxis("第二产业(亿)", secondindustryvalue, stack="stack1", gap="0", category_gap="40%") #设置Y周数据 gap不同系列柱间距离
.add_yaxis("第三产业(亿)", thirdindustryvalue, stack="stack1", gap="0", category_gap="40%")
# .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts( #全局配置项
title_opts=opts.TitleOpts( #标题配置项
title="全国2010年第一二三产业数据", #报表标题
subtitle="各行政区域对比", #报表副标题
pos_left="center", #标题位置
title_textstyle_opts=opts.TextStyleOpts(color="green"), #标题字体颜色
subtitle_textstyle_opts=opts.TextStyleOpts(color="blue") #副标题字体颜色
),
legend_opts = opts.LegendOpts(is_show=True,pos_left="right",pos_top="7%",orient='vertical')
)
)
return c
c=bar_industry_dif()
c.render('bar_industry_dif.html')
# 柱状图 六大行政区划出生率死亡率总体情况对比
sql='''
select b.areaname,
avg(a.birthrate) as birthrate,
avg(a.deadrate) as deadrate
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
and a.reportyear=2010
group by b.areaname
order by 2
'''
df = pd.read_sql(sql,conn) #read_sql读取数据
xdata=df['AREANAME'].tolist()
birthrate=df['BIRTHRATE'].round(2).tolist() #四舍五入,再转list
deadrate=df['DEADRATE'].round(2).tolist() #四舍五入,再转list
def bar_birth_dead() -> Bar:
c = (
Bar(
# init_opts=opts.InitOpts( #初始化配置项
# theme=ThemeType.INFOGRAPHIC #设置主题
# )
)
.add_xaxis(xdata) #设置X轴数据
.add_yaxis("出生率", birthrate, stack="stack1", gap="0", category_gap="40%") #设置Y周数据 gap不同系列柱间距离
.add_yaxis("死亡率", deadrate, stack="stack1", gap="0", category_gap="40%") #设置Y周数据 gap不同系列柱间距离
.reversal_axis() #旋转XY轴
# .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts( #全局配置项
title_opts=opts.TitleOpts( #标题配置项
title="全国2010年出生率死亡率数据", #报表标题
subtitle="各行政区域对比", #报表副标题
pos_left="center", #标题位置
title_textstyle_opts=opts.TextStyleOpts(color="green"), #标题字体颜色
subtitle_textstyle_opts=opts.TextStyleOpts(color="blue") #副标题字体颜色
),
legend_opts = opts.LegendOpts(is_show=True,pos_left="right",pos_top="7%",orient='vertical')
)
)
return c
c=bar_birth_dead()
c.render('bar_birth_dead.html')
# 柱状图 六大行政区划出生率死亡率总体情况对比
sql='''
select b.echartprovincename,
a.birthrate as birthrate,
a.deadrate as deadrate
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
and a.reportyear=2010
order by 2
'''
df = pd.read_sql(sql,conn) #read_sql读取数据
xdata=df['ECHARTPROVINCENAME'].tolist()
birthrate=df['BIRTHRATE'].round(2).tolist() #四舍五入,再转list
deadrate=df['DEADRATE'].round(2).tolist() #四舍五入,再转list
def bar_province_birth_dead() -> Bar:
c = (
Bar(
init_opts=opts.InitOpts( #初始化配置项
theme=ThemeType.LIGHT, #设置主题
width="1000px",
height="600px"
)
)
.add_xaxis(xdata) #设置X轴数据
.add_yaxis("出生率", birthrate, stack="stack1", gap="0", category_gap="40%") #设置Y周数据 gap不同系列柱间距离
.add_yaxis("死亡率", deadrate, stack="stack1", gap="0", category_gap="40%") #设置Y周数据 gap不同系列柱间距离
.reversal_axis() #旋转XY轴
.set_global_opts( #全局配置项
title_opts=opts.TitleOpts( #标题配置项
title="全国2010年出生率死亡率数据", #报表标题
subtitle="各省市自治区对比", #报表副标题
pos_left="center", #标题位置
title_textstyle_opts=opts.TextStyleOpts(color="green",font_size=30), #标题字体颜色
subtitle_textstyle_opts=opts.TextStyleOpts(color="blue",font_size=20) #副标题字体颜色
),
yaxis_opts=opts.AxisOpts(name="各省市自治区", # 标轴配置项
axislabel_opts=opts.LabelOpts(font_size=8, interval=0,formatter="{value}") #设置x轴标签全部显示
),
legend_opts = opts.LegendOpts(is_show=True,pos_left="right",pos_top="7%",orient='vertical')
)
.set_series_opts( #Bar-MarkPoint(指定类型)
label_opts=opts.LabelOpts(is_show=False),
markline_opts=opts.MarkLineOpts(
data=[
opts.MarkLineItem(type_="max", name="最大值"),
opts.MarkLineItem(type_="min", name="最小值"),
opts.MarkLineItem(type_="average", name="平均值"),
]
)
# markpoint_opts=opts.MarkPointOpts(
# data=[
# opts.MarkPointItem(type_="max", name="最大值"),
# opts.MarkPointItem(type_="min", name="最小值"),
# opts.MarkPointItem(type_="average", name="平均值"),
# ]
# )
)
)
return c
c = bar_province_birth_dead()
c.render('bar_province_birth_dead.html')
# 柱状图 六大行政区划出生率死亡率总体情况对比,横坐标滑动
sql='''
select b.echartprovincename,
a.birthrate as birthrate,
a.deadrate as deadrate
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
and a.reportyear=2010
order by 2
'''
df = pd.read_sql(sql,conn) #read_sql读取数据
xdata=df['ECHARTPROVINCENAME'].tolist()
birthrate=df['BIRTHRATE'].round(2).tolist() #四舍五入,再转list
deadrate=df['DEADRATE'].round(2).tolist() #四舍五入,再转list
def bar_province_birth_dead_slide() -> Bar:
c = (
Bar(
init_opts=opts.InitOpts( #初始化配置项
theme=ThemeType.LIGHT, #设置主题
width="1000px",
height="600px"
)
)
.add_xaxis(xdata) #设置X轴数据
.add_yaxis("出生率", birthrate, stack="stack1", gap="0", category_gap="40%") #设置Y周数据 gap不同系列柱间距离
.add_yaxis("死亡率", deadrate, stack="stack1", gap="0", category_gap="40%") #设置Y周数据 gap不同系列柱间距离
.set_global_opts( #全局配置项
title_opts=opts.TitleOpts( #标题配置项
title="全国2010年出生率死亡率数据", #报表标题
subtitle="各省市自治区对比", #报表副标题
pos_left="center", #标题位置
title_textstyle_opts=opts.TextStyleOpts(color="green",font_size=30), #标题字体颜色
subtitle_textstyle_opts=opts.TextStyleOpts(color="blue",font_size=20) #副标题字体颜色
),
#datazoom_opts=opts.DataZoomOpts(), #滑动窗口
datazoom_opts=[opts.DataZoomOpts(), opts.DataZoomOpts(type_="inside")], #slider inside滑动窗口
xaxis_opts=opts.AxisOpts(name="各省市自治区", # 标轴配置项
axislabel_opts=opts.LabelOpts(font_size=8, interval=0,formatter="{value}",rotate=45) #设置x轴标签全部显示
),
legend_opts = opts.LegendOpts(is_show=True,pos_left="right",pos_top="7%",orient='vertical')
)
.set_series_opts( #Bar-MarkPoint(指定类型)
label_opts=opts.LabelOpts(is_show=False),
markline_opts=opts.MarkLineOpts(
data=[
opts.MarkLineItem(type_="max", name="最大值"),
opts.MarkLineItem(type_="min", name="最小值"),
opts.MarkLineItem(type_="average", name="平均值"),
]
)
# markpoint_opts=opts.MarkPointOpts(
# data=[
# opts.MarkPointItem(type_="max", name="最大值"),
# opts.MarkPointItem(type_="min", name="最小值"),
# opts.MarkPointItem(type_="average", name="平均值"),
# ]
# )
)
)
return c
c = bar_province_birth_dead_slide()
c.render('bar_province_birth_dead_slide.html')
# 柱状图 六大行政区划出生率死亡率总体情况对比,横坐标滑动
sql='''
select b.echartprovincename,
a.population as population
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
and a.reportyear=2010
order by 2
'''
df = pd.read_sql(sql,conn) #read_sql读取数据
xdata=df['ECHARTPROVINCENAME'].tolist()
population=df['POPULATION'].round(2).tolist() #四舍五入,再转list
def bar_population_histogram() -> Bar:
c = (
Bar()
.add_xaxis(xdata)
.add_yaxis("各省市自治区直辖市", population, category_gap=0) #直方图就是category_gap=0
.set_global_opts(title_opts=opts.TitleOpts(title="各省市自治区直辖市直方图"))
)
return c
c = bar_population_histogram()
c.render('bar_population_histogram.html')
# 涟漪特效散点图 各省市自治区人口
sql='''
select b.echartprovincename,b.areaname,
a.population as population
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
and a.reportyear=2010
order by b.areaname
'''
df = pd.read_sql(sql,conn) #read_sql读取数据
xdata=df['ECHARTPROVINCENAME'].tolist()
population=df['POPULATION'].round(2).tolist() #四舍五入,再转list
maxdata=max(population)
#huabei=df[df['AREANAME']=='西北地区']['POPULATION'].round(2).tolist()
def population_effectscatter_base() -> EffectScatter:
c = (
EffectScatter()
.add_xaxis(xdata)
.add_yaxis("人口", population)
.set_global_opts(title_opts=opts.TitleOpts(title="各省市自治区直辖市人口分布图"),
xaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True),
axislabel_opts=opts.LabelOpts(font_size=10, interval=0,
formatter="{value}", rotate=45)
),
yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)),
visualmap_opts=opts.VisualMapOpts(max_=maxdata),
)
)
return c
c = population_effectscatter_base()
c.render('population_effectscatter_base.html')
sql='''
select a.reportyear,b.echartprovincename,a.population
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
'''
df = pd.read_sql(sql,conn) #read_sql读取数据
res=pd.pivot_table(df,index=["ECHARTPROVINCENAME"],values=["POPULATION"],
columns=["REPORTYEAR"],aggfunc=[np.sum])
xdata= [colname[2] for colname in res.columns.tolist()] #时间
ydata= res.index.tolist() #省市自治区直辖市
newdata=[]
row,col=res.values.shape
maxdata=np.max(res.values)
mindata=np.min(res.values)
for i in range(row):
for j in range(col):
newdata.append([j,i,res.values[i,j]])
def heatmap_base() -> HeatMap:
c = (
HeatMap()
.add_xaxis(xdata)
.add_yaxis("series0", ydata, newdata,) #label_opts=opts.LabelOpts(is_show=True, position="inside"),)
.set_global_opts(
title_opts=opts.TitleOpts(title="HeatMap-基本示例"),
visualmap_opts=opts.VisualMapOpts(range_color=["#E5E8F3", "#6685CA", "#29487E"],max_=maxdata,min_=mindata),
yaxis_opts=opts.AxisOpts(name="", # 标轴配置项
axislabel_opts=opts.LabelOpts(font_size=8, interval=0, formatter="{value}")
# 设置x轴标签全部显示
),
)
)
return c
c = heatmap_base()
c.render('heatmap_base.html')
sql='''
select b.echartprovincename,
a.primaryindustryvalue,
a.secondindustryvalue,
a.thirdindustryvalue
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
and a.reportyear=2010
order by b.areaname
'''
df = pd.read_sql(sql,conn) #read_sql读取数据
xdata=df['ECHARTPROVINCENAME'].tolist()
primaryindustryvalue=df['PRIMARYINDUSTRYVALUE'].round(2).tolist() #四舍五入,再转list
secondindustryvalue=df['SECONDINDUSTRYVALUE'].round(2).tolist() #四舍五入,再转list
thirdindustryvalue=df['THIRDINDUSTRYVALUE'].round(2).tolist() #四舍五入,再转list
def line_base() -> Line:
c = (
Line()
.add_xaxis(xdata)
.add_yaxis("第一产业", primaryindustryvalue)
.add_yaxis("第二产业", secondindustryvalue)
.add_yaxis("第三产业", thirdindustryvalue)
.set_global_opts(title_opts=opts.TitleOpts(title="全国各省第一二三产业增加值"))
)
return c
c=line_base()
c.render('line_base.html')
def line_smooth() -> Line:
c = (
Line()
.add_xaxis(xdata)
.add_yaxis("第一产业", primaryindustryvalue, is_smooth=True)
.add_yaxis("第二产业", secondindustryvalue, is_smooth=True)
.add_yaxis("第三产业", thirdindustryvalue, is_smooth=True)
.set_global_opts(title_opts=opts.TitleOpts(title="Line-smooth"))
)
return c
c=line_smooth()
c.render('line_smooth.html')
sql='''
select b.areaname,a.reportyear,
sum(a.primaryindustryvalue) as primaryindustryvalue,
sum(a.secondindustryvalue) as secondindustryvalue,
sum(a.thirdindustryvalue) as thirdindustryvalue,
sum(a.primaryindustryvalue a.secondindustryvalue a.thirdindustryvalue) as industryvalue
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
group by b.areaname,a.reportyear
order by 2,1
'''
df = pd.read_sql(sql,conn) #read_sql读取数据
res=pd.pivot_table(df,index=["AREANAME"],values=["INDUSTRYVALUE"],
columns=["REPORTYEAR"],aggfunc=[np.sum])
# REPORTYEAR 195019511952... 200820092010
#AREANAME ...
#东北地区 0.00.083.9300... 28409.000031078.250037493.4800
#中南地区 0.00.0130.7400... 86223.216794397.1766112745.3439
#华东地区 0.00.0207.8800... 124093.5300136345.2800162031.3800
xdata= [str(colname[2]) for colname in res.columns.tolist()] #时间
#[1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1
ydata= res.index.tolist() #省市自治区直辖市
#['东北地区', '中南地区', '华东地区', '华北地区', '西北地区', '西南地区']
db=res.loc[['东北地区']].values[0].tolist()
zn=res.loc[['中南地区']].values[0].tolist()
hd=res.loc[['华东地区']].values[0].tolist()
hb=res.loc[['华北地区']].values[0].tolist()
xb=res.loc[['西北地区']].values[0].tolist()
xn=res.loc[['西南地区']].values[0].tolist()
# print(res.loc[['中南地区']])
# print(res.loc[['中南地区']].values)
# print(res.loc[['中南地区']].values[0].tolist())
def line_industry_smooth() -> Line:
c = (
Line()
.add_xaxis(xdata)
.add_yaxis("东北地区", db, is_smooth=True)
.add_yaxis("中南地区", zn, is_smooth=True)
.add_yaxis("华东地区", hd, is_smooth=True)
.add_yaxis("华北地区", hb, is_smooth=True)
.add_yaxis("西北地区", xb, is_smooth=True)
.add_yaxis("西南地区", xn, is_smooth=True)
.set_global_opts(title_opts=opts.TitleOpts(title="Line-smooth"))
)
return c
c=line_industry_smooth()
c.render('line_industry_smooth.html')
# 散点图 各省市自治区出生率,死亡率
sql='''
select b.echartprovincename,b.areaname,
a.birthrate as birthrate,
a.deadrate as deadrate
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
and a.reportyear=2010
order by b.areaname
'''
df = pd.read_sql(sql,conn) #read_sql读取数据
xdata=df['ECHARTPROVINCENAME'].tolist()
birthrate=df['BIRTHRATE'].round(2).tolist() #四舍五入,再转list
deadrate=df['DEADRATE'].round(2).tolist() #四舍五入,再转list
maxdata=max(birthrate)
def population_scatter() -> Scatter:
c = (
Scatter()
.add_xaxis(xdata)
.add_yaxis("人口出生率", birthrate)
.add_yaxis("人口死亡率", deadrate)
.set_global_opts(title_opts=opts.TitleOpts(title="各省市自治区直辖市人口出生率/死亡率情况"),
xaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True),
axislabel_opts=opts.LabelOpts(font_size=10, interval=0,
formatter="{value}", rotate=45)
),
yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)),
visualmap_opts=opts.VisualMapOpts(type_="size",max_=maxdata,min_=10),
)
)
return c
c = population_scatter()
c.render('population_scatter.html')
sql='''
select b.echartprovincename,b.areaname,
a.population as population,
a.birthrate as birthrate,
a.deadrate as deadrate
from chinaeconomy a,proviceinfo b
where a.provincename=b.provincename
and a.reportyear=2010
order by b.areaname
'''
df = pd.read_sql(sql,conn) #read_sql读取数据
xdata=df['ECHARTPROVINCENAME'].tolist()
population=df['POPULATION'].round(2).tolist() #四舍五入,再转list
birthrate=df['BIRTHRATE'].round(2).tolist() #四舍五入,再转list
deadrate=df['DEADRATE'].round(2).tolist() #四舍五入,再转list
maxdata=max(birthrate)
def overlap_bar_line() -> Bar:
bar = (
Bar()
.add_xaxis(xdata)
.add_yaxis("出生率", birthrate, stack="stack1")
.add_yaxis("死亡率", deadrate, stack="stack1")
.extend_axis(
yaxis=opts.AxisOpts(
axislabel_opts=opts.LabelOpts(formatter="{value}")#, interval=5
)
)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(
title_opts=opts.TitleOpts(title="Overlap-bar line(双 Y 轴)"),
yaxis_opts=opts.AxisOpts(
axislabel_opts=opts.LabelOpts(formatter="{value}%")
),
)
)
line = Line().add_xaxis(xdata).add_yaxis("人口总量", population, yaxis_index=1)
bar.overlap(line)
return bar
c = overlap_bar_line()
c.render('overlap_bar_line.html')