Python代码|Python做数据可视化的代码

2020-07-02 10:15:11 浏览数 (1)

前言:

从代码中学习Python知识和Python与数据相关的知识,是一个有效的方法。例如:想了解Python做数据可视化的工作。我们可以从互联网找一些Python做数据可视化的代码进行阅读,调试和迁移。这样做的好处,突出实用性。同时,我们在结合联想的学习方法,对所用到的可视化函数,做个更深入地了解和使用。我借用《数据科学和人工智能》这个公众号,分享一些我在实际的数据问题时,从网上找到的Python代码,希望这些代码对大家有作用和启发。


Python做数据可视化代码

代码语言:javascript复制
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
df = pd.read_csv('./datasets/temporal.csv')
df.head(10)


# In[2]:


df.describe()


# In[3]:


df.info()


# In[5]:


pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


# In[6]:


format_dict = {'data science':'${0:,.2f}', 'Mes':'{:%m-%Y}', 'machine learning':'{:.2%}'}
#We make sure that the Month column has datetime format
df['Mes'] = pd.to_datetime(df['Mes'])
#We apply the style to the visualization
df.head().style.format(format_dict)


# In[7]:


format_dict = {'Mes':'{:%m-%Y}'} #Simplified format dictionary with values that do make sense for our data
df.head().style.format(format_dict).highlight_max(color='darkgreen').highlight_min(color='#ff0000')


# In[8]:


df.head(10).style.format(format_dict).background_gradient(subset=['data science', 'machine learning'], cmap='BuGn')


# In[9]:


df.head().style.format(format_dict).bar(color='red', subset=['data science', 'deep learning'])


# In[10]:


df.head(10).style.format(format_dict).background_gradient(subset=['data science', 'machine learning'], cmap='BuGn').highlight_max(color='yellow')


# In[11]:


from pandas_profiling import ProfileReport
prof = ProfileReport(df)
prof.to_file(output_file='report.html')


# In[17]:


import matplotlib.pyplot as plt
#The parameter label is to indicate the legend. This doesn't mean that it will be shown, we'll have to use another command that I'll explain later.
plt.plot(df['Mes'], df['data_science'], label='data science') 


# In[14]:


df


# In[18]:


plt.plot(df['Mes'], df['data_science'], label='data science')
plt.plot(df['Mes'], df['machine_learning'], label='machine learning')
plt.plot(df['Mes'], df['deep_learning'], label='deep learning')


# In[19]:


plt.plot(df['Mes'], df['data_science'], label='data science')
plt.plot(df['Mes'], df['machine_learning'], label='machine learning')
plt.plot(df['Mes'], df['deep_learning'], label='deep learning')
plt.xlabel('Date')
plt.ylabel('Popularity')
plt.title('Popularity of AI terms by date')
plt.grid(True)
plt.legend()


# In[20]:


fig, axes = plt.subplots(2,2)
axes[0, 0].hist(df['data_science'])
axes[0, 1].scatter(df['Mes'], df['data_science'])
axes[1, 0].plot(df['Mes'], df['machine_learning'])
axes[1, 1].plot(df['Mes'], df['deep_learning'])


# In[21]:


plt.plot(df['Mes'], df['data_science'], 'r-')
plt.plot(df['Mes'], df['data_science']*2, 'bs')
plt.plot(df['Mes'], df['data_science']*3, 'g^')


# In[23]:


plt.scatter(df['data_science'], df['machine_learning'])


# In[24]:


plt.bar(df['Mes'], df['machine_learning'], width=20)


# In[25]:


plt.hist(df['deep_learning'], bins=15)


# In[26]:


plt.plot(df['Mes'], df['data_science'], label='data science')
plt.plot(df['Mes'], df['machine_learning'], label='machine learning')
plt.plot(df['Mes'], df['deep_learning'], label='deep learning')
plt.xlabel('Date')
plt.ylabel('Popularity')
plt.title('Popularity of AI terms by date')
plt.grid(True)
plt.text(x='2010-01-01', y=80, s=r'$lambda=1, r^2=0.8$') #Coordinates use the same units as the graph
plt.annotate('Notice something?', xy=('2014-01-01', 30), xytext=('2006-01-01', 50), arrowprops={'facecolor':'red', 'shrink':0.05})


# In[28]:


import seaborn as sns
sns.set()
sns.scatterplot(df['Mes'], df['data_science'])


# In[29]:


sns.relplot(x='Mes', y='deep_learning', hue='data_science', size='machine_learning', col='categorical', data=df)


# In[30]:


sns.heatmap(df.corr(), annot=True, fmt='.2f')


# In[31]:


sns.pairplot(df)


# In[32]:


sns.pairplot(df, hue='categorical')


# In[34]:


sns.jointplot(x='data_science', y='machine_learning', data=df)


# In[35]:


sns.catplot(x='categorical', y='data_science', kind='violin', data=df)


# In[36]:


fig, axes = plt.subplots(1, 2, sharey=True, figsize=(8, 4))
sns.scatterplot(x="Mes", y="deep_learning", hue="categorical", data=df, ax=axes[0])
axes[0].set_title('Deep Learning')
sns.scatterplot(x="Mes", y="machine_learning", hue="categorical", data=df, ax=axes[1])
axes[1].set_title('Machine Learning')


# In[37]:


from bokeh.plotting import figure, output_file, save
output_file('data_science_popularity.html')


# In[38]:


p = figure(title='data science', x_axis_label='Mes', y_axis_label='data science')
p.line(df['Mes'], df['data_science'], legend='popularity', line_width=2)
save(p)


# In[40]:


from bokeh.layouts import gridplot
output_file('multiple_graphs.html')
s1 = figure(width=250, plot_height=250, title='data science')
s1.circle(df['Mes'], df['data_science'], size=10, color='navy', alpha=0.5)
s2 = figure(width=250, height=250, x_range=s1.x_range, y_range=s1.y_range, title='machine learning') #share both axis range
s2.triangle(df['Mes'], df['machine_learning'], size=10, color='red', alpha=0.5)
s3 = figure(width=250, height=250, x_range=s1.x_range, title='deep learning') #share only one axis range
s3.square(df['Mes'], df['deep_learning'], size=5, color='green', alpha=0.5)
p = gridplot([[s1, s2, s3]])
save(p)


# 参考资料:
# - https://towardsdatascience.com/complete-guide-to-data-visualization-with-python-2dd74df12b5e

这份Python代码我经过notebook调试测试通过的。

用到Python的库有pandas, pandas_profiling, matplotlib, seaborn和bokeh。

配套的数据集和notebook下载链接。

http://47.112.229.252:80/f/97e449826d0a44468a8e/

0 人点赞