前言
上次我们爬了拉钩网的数据,现在我们来分析下,看看哪些公司的招聘信息具体需求都是哪些,让我们用可视化图表展示
导入所需模块
代码语言:javascript复制import numpy as np
import pandas as pd
from pyecharts.charts import *
from pyecharts import options as opts
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
from pyecharts.globals import ThemeType
有疑问的同学,或者想要Python相关资料的可以加群:1039649593 找管理员领取资料和一对一解答
清洗数据
导入数据
代码语言:javascript复制df = pd.read_csv('data.csv', encoding='UTF-8')
df.head()
查看整体性描述
代码语言:javascript复制df.info()
代码语言:javascript复制df.describe()
删除重复值
代码语言:javascript复制df.drop_duplicates(inplace = True)
df.duplicated().sum()
获取
代码语言:javascript复制df['地区'] = df['地区'].apply(lambda x:x.split('-')[0])
df['地区'].unique()
df['经验'].unique()
df['薪资'].unique()
df['m_max'] = df['薪资'].str.extract('(d )')#提取出最低薪资
df['m_min'] = df['薪资'].str.extract('(d )K')#提取出最高薪资
df['m_max'] = df['m_max'].apply('float64')#转换数据类型
df['m_min'] = df['m_min'].apply('float64')
df['平均薪资'] = (df['m_max'] df['m_min'])/2
df.head()
可视化
每个地区的招聘数量
代码语言:javascript复制dq = df.groupby('地区').count()['标题']
dq_index = dq.index.tolist()
dq_value = dq.values.tolist()
bar1 = (Bar(init_opts=opts.InitOpts(width='800px', height='400px',theme=ThemeType.MACARONS))
.add_xaxis(dq_index)
.add_yaxis('', dq_value,category_gap="50%")
.set_global_opts(title_opts=opts.TitleOpts(title="每个地区的招聘数量"),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-50)),
visualmap_opts=opts.VisualMapOpts(max_=80),#彩色块
datazoom_opts=[opts.DataZoomOpts()]#拉动条形轴
)
)
bar1.render_notebook()
经验学历需求图
代码语言:javascript复制pair_1 = [(i, int(j)) for i, j in zip(jingyan.index,jingyan.values)]
pie = (
Pie(init_opts=opts.InitOpts(theme=ThemeType.MACARONS,width='1000px',height='600px'))
.add('', pair_1, radius=['40%', '70%'])
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%"))
.set_global_opts(
title_opts=opts.TitleOpts(
title="经验学历需求图",
pos_left='center',
pos_top='center',
title_textstyle_opts=opts.TextStyleOpts(
color='black',
font_size=20,
font_weight='bold'
),
)
)
)
pie.render_notebook()
招聘公司所在领域
代码语言:javascript复制gongsi = df.groupby('经验').count()['标题']
pie1 = (
Pie(init_opts=opts.InitOpts(theme=ThemeType.MACARONS,width='1500px',height='600px'))
.add(
"",
[list(z) for z in zip(gongsi.index.tolist(), gongsi.values.tolist())],
radius=["20%", "80%"],
center=["25%", "70%"],
rosetype="radius",
label_opts=opts.LabelOpts(is_show=False),
).set_global_opts(title_opts=opts.TitleOpts(title="招聘公司所在领域"))
)
pie1.render_notebook()