数据科学和人工智能技术笔记 二十、数据可视化

2022-12-02 18:49:21 浏览数 (2)

二十、数据可视化

作者:Chris Albon 译者:飞龙 协议:CC BY-NC-SA 4.0

MatPlotLib 中的双向条形图

代码语言:javascript复制
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 创建数据帧
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'pre_score': [4, 24, 31, 2, 3],
        'mid_score': [25, 94, 57, 62, 70],
        'post_score': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
df

first_name

pre_score

mid_score

post_score

0

Jason

4

25

5

1

Molly

24

94

43

2

Tina

31

57

23

3

Jake

2

62

23

4

Amy

3

70

51

代码语言:javascript复制
# 输入数据,特别是第二和
# 第三行,跳过第一列
x1 = df.ix[1, 1:]
x2 = df.ix[2, 1:]

# 创建条形标签
bar_labels = ['Pre Score', 'Mid Score', 'Post Score']

# 创建图形
fig = plt.figure(figsize=(8,6))

# 设置 y 的位置
y_pos = np.arange(len(x1))
y_pos = [x for x in y_pos]
plt.yticks(y_pos, bar_labels, fontsize=10)

# 在 y_pos 的位置上创建水平条形
plt.barh(y_pos, 
         # 使用数据 x1
         x1, 
         # 中心对齐
         align='center', 
         # 透明度为 0.4
         alpha=0.4, 
         # 颜色为绿色
         color='#263F13')

# 在 y_pos 的位置上创建水平条形
plt.barh(y_pos, 
         # 使用数据 -x2
         -x2,
         # 中心对齐
         align='center', 
         # 透明度为 0.4
         alpha=0.4, 
         # 颜色为绿色
         color='#77A61D')

# 注解和标签
plt.xlabel('Tina's Score: Light Green. Molly's Score: Dark Green')
t = plt.title('Comparison of Molly and Tina's Score')
plt.ylim([-1,len(x1) 0.1])
plt.xlim([-max(x2)-10, max(x1) 10])
plt.grid()

plt.show()

MatPlotLib 中的条形图

代码语言:javascript复制
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 创建数据帧
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'pre_score': [4, 24, 31, 2, 3],
        'mid_score': [25, 94, 57, 62, 70],
        'post_score': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
df

first_name

pre_score

mid_score

post_score

0

Jason

4

25

5

1

Molly

24

94

43

2

Tina

31

57

23

3

Jake

2

62

23

4

Amy

3

70

51

代码语言:javascript复制
# 为每个变量创建得分均值的列表
mean_values = [df['pre_score'].mean(), df['mid_score'].mean(), df['post_score'].mean()]

# 创建变动列表,设为得分上下 .25
variance = [df['pre_score'].mean() * 0.25, df['pre_score'].mean() * 0.25, df['pre_score'].mean() * 0.25]

# 设置条形标签
bar_labels = ['Pre Score', 'Mid Score', 'Post Score']

# 创建条形的 x 位置
x_pos = list(range(len(bar_labels)))

# 在 x 位置上创建条形图
plt.bar(x_pos,
        # 使用 mean_values 中的数据
        mean_values, 
        # y-error 直线设置为变动
        yerr=variance, 
        # 中心对齐
        align='center',
        # 颜色
        color='#FFC222',
        # 透明度为 0.5
        alpha=0.5)

# 添加网格
plt.grid()

# 设置 y 轴高度
max_y = max(zip(mean_values, variance)) # returns a tuple, here: (3, 5)
plt.ylim([0, (max_y[0]   max_y[1]) * 1.1])

# 设置轴标签和标题
plt.ylabel('Score')
plt.xticks(x_pos, bar_labels)
plt.title('Mean Scores For Each Test')

plt.show()

Seaborn 中的调色板

代码语言:javascript复制
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

# 创建数据帧
data = {'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:05.119994', '2014-05-02 18:47:05.178768', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.280592', '2014-05-03 18:47:05.332662', '2014-05-03 18:47:05.385109', '2014-05-04 18:47:05.436523', '2014-05-04 18:47:05.486877'], 
        'deaths_regiment_1': [34, 43, 14, 15, 15, 14, 31, 25, 62, 41],
        'deaths_regiment_2': [52, 66, 78, 15, 15, 5, 25, 25, 86, 1],
        'deaths_regiment_3': [13, 73, 82, 58, 52, 87, 26, 5, 56, 75],
        'deaths_regiment_4': [44, 75, 26, 15, 15, 14, 54, 25, 24, 72],
        'deaths_regiment_5': [25, 24, 25, 15, 57, 68, 21, 27, 62, 5],
        'deaths_regiment_6': [84, 84, 26, 15, 15, 14, 26, 25, 62, 24],
        'deaths_regiment_7': [46, 57, 26, 15, 15, 14, 26, 25, 62, 41]}
df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'deaths_regiment_1', 'deaths_regiment_2',
                                   'deaths_regiment_3', 'deaths_regiment_4', 'deaths_regiment_5',
                                   'deaths_regiment_6', 'deaths_regiment_7'])
df = df.set_index(df.date)

sns.palplot(sns.color_palette("deep", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("muted", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("bright", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("dark", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("colorblind", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("Paired", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("BuGn", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("GnBu", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("OrRd", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("PuBu", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("YlGn", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("YlGnBu", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("YlOrBr", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("YlOrRd", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("BrBG", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("PiYG", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("PRGn", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("PuOr", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("RdBu", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("RdGy", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("RdYlBu", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("RdYlGn", 10))

代码语言:javascript复制
sns.palplot(sns.color_palette("Spectral", 10))

代码语言:javascript复制
# 创建调色板并将其设为当前调色板
flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
sns.set_palette(flatui)
sns.palplot(sns.color_palette())

代码语言:javascript复制
# 设置绘图颜色
sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
            df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="#34495e")

# <matplotlib.axes._subplots.AxesSubplot at 0x116f5db70> 

使用 Seaborn 和 pandas 创建时间序列绘图

代码语言:javascript复制
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

data = {'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:05.119994', '2014-05-02 18:47:05.178768', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.280592', '2014-05-03 18:47:05.332662', '2014-05-03 18:47:05.385109', '2014-05-04 18:47:05.436523', '2014-05-04 18:47:05.486877'], 
        'deaths_regiment_1': [34, 43, 14, 15, 15, 14, 31, 25, 62, 41],
        'deaths_regiment_2': [52, 66, 78, 15, 15, 5, 25, 25, 86, 1],
        'deaths_regiment_3': [13, 73, 82, 58, 52, 87, 26, 5, 56, 75],
        'deaths_regiment_4': [44, 75, 26, 15, 15, 14, 54, 25, 24, 72],
        'deaths_regiment_5': [25, 24, 25, 15, 57, 68, 21, 27, 62, 5],
        'deaths_regiment_6': [84, 84, 26, 15, 15, 14, 26, 25, 62, 24],
        'deaths_regiment_7': [46, 57, 26, 15, 15, 14, 26, 25, 62, 41]}
df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'deaths_regiment_1', 'deaths_regiment_2',
                                   'deaths_regiment_3', 'deaths_regiment_4', 'deaths_regiment_5',
                                   'deaths_regiment_6', 'deaths_regiment_7'])
df = df.set_index(df.date)

sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
            df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="indianred")

# <matplotlib.axes._subplots.AxesSubplot at 0x1140be780> 

代码语言:javascript复制
# 带有置信区间直线,但是没有直线的时间序列绘图
sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
            df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], err_style="ci_bars", interpolate=False)

# <matplotlib.axes._subplots.AxesSubplot at 0x116400668> 

使用 Seaborn 创建散点图

代码语言:javascript复制
import pandas as pd
%matplotlib inline
import random
import matplotlib.pyplot as plt
import seaborn as sns

# 创建空数据帧
df = pd.DataFrame()

# 添加列
df['x'] = random.sample(range(1, 1000), 5)
df['y'] = random.sample(range(1, 1000), 5)
df['z'] = [1,0,0,1,0]
df['k'] = ['male','male','male','female','female']

# 查看前几行数据
df.head()

x

y

z

k

0

466

948

1

male

1

832

481

0

male

2

978

465

0

male

3

510

206

1

female

4

848

357

0

female

代码语言:javascript复制
# 设置散点图样式
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")

# 创建数据帧的散点图
sns.lmplot('x', # 横轴
           'y', # 纵轴
           data=df, # 数据源
           fit_reg=False, # 不要拟合回归直线
           hue="z", # 设置颜色
           scatter_kws={"marker": "D", # 设置标记样式
                        "s": 100}) # 设置标记大小

# 设置标题
plt.title('Histogram of IQ')

# 设置横轴标签
plt.xlabel('Time')

# 设置纵轴标签
plt.ylabel('Deaths')

# <matplotlib.text.Text at 0x112b7bb70> 

MatPlotLib 中的分组条形图

代码语言:javascript复制
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'pre_score': [4, 24, 31, 2, 3],
        'mid_score': [25, 94, 57, 62, 70],
        'post_score': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
df

first_name

pre_score

mid_score

post_score

0

Jason

4

25

5

1

Molly

24

94

43

2

Tina

31

57

23

3

Jake

2

62

23

4

Amy

3

70

51

代码语言:javascript复制
# 设置条形的位置和宽度
pos = list(range(len(df['pre_score']))) 
width = 0.25 

# 绘制条形
fig, ax = plt.subplots(figsize=(10,5))

# 使用 pre_score 数据,
# 在位置 pos 上创建条形
plt.bar(pos, 
        # 使用数据 df['pre_score']
        df['pre_score'], 
        # 宽度
        width, 
        # 透明度为 0.5
        alpha=0.5, 
        # 颜色
        color='#EE3224', 
        # 标签是 first_name 的第一个值
        label=df['first_name'][0]) 

# 使用 mid_score 数据,
# 在位置 pos   一定宽度上创建条形
plt.bar([p   width for p in pos], 
        # 使用数据 df['mid_score']
        df['mid_score'],
        # 宽度
        width, 
        # 透明度为 0.5
        alpha=0.5, 
        # 颜色
        color='#F78F1E', 
        # 标签是 first_name 的第二个值
        label=df['first_name'][1]) 

# 使用 post_score 数据,
# 在位置 pos   一定宽度上创建条形
plt.bar([p   width*2 for p in pos], 
        # 使用数据 df['post_score']
        df['post_score'], 
        # 宽度
        width, 
        # 透明度为 0.5
        alpha=0.5, 
        # 颜色
        color='#FFC222', 
        # 标签是 first_name 的第三个值
        label=df['first_name'][2]) 

# 设置纵轴标签
ax.set_ylabel('Score')

# 设置标题
ax.set_title('Test Subject Scores')

# 设置 x 刻度的位置
ax.set_xticks([p   1.5 * width for p in pos])

# 设置 x 刻度的标签
ax.set_xticklabels(df['first_name'])

# 设置横轴和纵轴的区域
plt.xlim(min(pos)-width, max(pos) width*4)
plt.ylim([0, max(df['pre_score']   df['mid_score']   df['post_score'])] )

# 添加图例并展示绘图
plt.legend(['Pre Score', 'Mid Score', 'Post Score'], loc='upper left')
plt.grid()
plt.show()

MatPlotLib 中的直方图

代码语言:javascript复制
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math

# 设置 ipython 的最大行数
pd.set_option('display.max_row', 1000)

# 将 ipython 的最大列宽设为 50
pd.set_option('display.max_columns', 50)

df = pd.read_csv('https://www.dropbox.com/s/52cb7kcflr8qm2u/5kings_battles_v1.csv?dl=1')
df.head()

name

year

battle_number

attacker_king

defender_king

attacker_1

attacker_2

attacker_3

attacker_4

defender_1

defender_2

defender_3

defender_4

attacker_outcome

battle_type

major_death

major_capture

attacker_size

defender_size

attacker_commander

defender_commander

summer

location

region

note

0

Battle of the Golden Tooth

298

1

Joffrey/Tommen Baratheon

Robb Stark

Lannister

NaN

NaN

NaN

Tully

NaN

NaN

NaN

win

pitched battle

1

0

15000

4000

Jaime Lannister

Clement Piper, Vance

1

Golden Tooth

The Westerlands

NaN

1

Battle at the Mummer’s Ford

298

2

Joffrey/Tommen Baratheon

Robb Stark

Lannister

NaN

NaN

NaN

Baratheon

NaN

NaN

NaN

win

ambush

1

0

NaN

120

Gregor Clegane

Beric Dondarrion

1

Mummer’s Ford

The Riverlands

NaN

2

Battle of Riverrun

298

3

Joffrey/Tommen Baratheon

Robb Stark

Lannister

NaN

NaN

NaN

Tully

NaN

NaN

NaN

win

pitched battle

0

1

15000

10000

Jaime Lannister, Andros Brax

Edmure Tully, Tytos Blackwood

1

Riverrun

The Riverlands

NaN

3

Battle of the Green Fork

298

4

Robb Stark

Joffrey/Tommen Baratheon

Stark

NaN

NaN

NaN

Lannister

NaN

NaN

NaN

loss

pitched battle

1

1

18000

20000

Roose Bolton, Wylis Manderly, Medger Cerwyn, H…

Tywin Lannister, Gregor Clegane, Kevan Lannist…

1

Green Fork

The Riverlands

NaN

4

Battle of the Whispering Wood

298

5

Robb Stark

Joffrey/Tommen Baratheon

Stark

Tully

NaN

NaN

Lannister

NaN

NaN

NaN

win

ambush

1

1

1875

6000

Robb Stark, Brynden Tully

Jaime Lannister

1

Whispering Wood

The Riverlands

NaN

代码语言:javascript复制
# 制作攻击方和防守方大小的两个变量
# 但是当有超过 10000 个攻击方时将其排除在外
data1 = df['attacker_size'][df['attacker_size'] < 90000]
data2 = df['defender_size'][df['attacker_size'] < 90000]

# 创建 2000 个桶
bins = np.arange(data1.min(), data2.max(), 2000) # 固定桶的大小

# 绘制攻击方大小的直方图
plt.hist(data1, 
         bins=bins, 
         alpha=0.5, 
         color='#EDD834',
         label='Attacker')

# 绘制防守方大小的直方图
plt.hist(data2, 
         bins=bins, 
         alpha=0.5, 
         color='#887E43',
         label='Defender')

# 设置图形的 x 和 y 边界
plt.ylim([0, 10])

# 设置标题和标签
plt.title('Histogram of Attacker and Defender Size')
plt.xlabel('Number of troops')
plt.ylabel('Number of battles')
plt.legend(loc='upper right')

plt.show()

代码语言:javascript复制
# 制作攻击方和防守方大小的两个变量
# 但是当有超过 10000 个攻击方时将其排除在外
data1 = df['attacker_size'][df['attacker_size'] < 90000]
data2 = df['defender_size'][df['attacker_size'] < 90000]

# 创建 10 个桶,最小值为 
# data1 和 data2 的最小值
bins = np.linspace(min(data1   data2), 
                   # 最大值为它们的最大值
                   max(data1   data2),
                   # 并分为 10 个桶
                   10)

# 绘制攻击方大小的直方图
plt.hist(data1, 
         # 使用定义好的桶
         bins=bins, 
         # 透明度
         alpha=0.5, 
         # 颜色
         color='#EDD834',
         # 攻击方的标签
         label='Attacker')

# 绘制防守方大小的直方图
plt.hist(data2, 
         # 使用定义好的桶
         bins=bins, 
         # 透明度
         alpha=0.5, 
         # 颜色
         color='#887E43',
         # 防守方的标签
         label='Defender')

# 设置图形的 x 和 y 边界
plt.ylim([0, 10])

# 设置标题和标签
plt.title('Histogram of Attacker and Defender Size')
plt.xlabel('Number of troops')
plt.ylabel('Number of battles')
plt.legend(loc='upper right')

plt.show()

从 Pandas 数据帧生成 MatPlotLib 散点图

代码语言:javascript复制
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 
        'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'], 
        'female': [0, 1, 1, 0, 1],
        'age': [42, 52, 36, 24, 73], 
        'preTestScore': [4, 24, 31, 2, 3],
        'postTestScore': [25, 94, 57, 62, 70]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'female', 'preTestScore', 'postTestScore'])
df

first_name

last_name

age

female

preTestScore

postTestScore

0

Jason

Miller

42

0

4

25

1

Molly

Jacobson

52

1

24

94

2

Tina

Ali

36

1

31

57

3

Jake

Milner

24

0

2

62

4

Amy

Cooze

73

1

3

70

代码语言:javascript复制
# preTestScore 和 postTestScore 的散点图
# 每个点的大小取决于年龄
plt.scatter(df.preTestScore, df.postTestScore
, s=df.age)

# <matplotlib.collections.PathCollection at 0x10ca42b00> 

代码语言:javascript复制
# preTestScore 和 postTestScore 的散点图
# 大小为 300,颜色取决于性别
plt.scatter(df.preTestScore, df.postTestScore, s=300, c=df.female)

# <matplotlib.collections.PathCollection at 0x10cb90a90> 

Matplotlib 的简单示例

代码语言:javascript复制
# 让 Jupyter 加载 matplotlib 
# 并内联创建所有绘图(也就是在页面上)
%matplotlib inline

import matplotlib.pyplot as pyplot

pyplot.plot([1.6, 2.7])

# [<matplotlib.lines.Line2D at 0x10c4e7978>] 

MatPlotLib 中的饼图

代码语言:javascript复制
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt

raw_data = {'officer_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'jan_arrests': [4, 24, 31, 2, 3],
        'feb_arrests': [25, 94, 57, 62, 70],
        'march_arrests': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['officer_name', 'jan_arrests', 'feb_arrests', 'march_arrests'])
df

officer_name

jan_arrests

feb_arrests

march_arrests

0

Jason

4

25

5

1

Molly

24

94

43

2

Tina

31

57

23

3

Jake

2

62

23

4

Amy

3

70

51

代码语言:javascript复制
# 创建一列,其中包含每个官员的总逮捕数
df['total_arrests'] = df['jan_arrests']   df['feb_arrests']   df['march_arrests']
df

officer_name

jan_arrests

feb_arrests

march_arrests

total_arrests

0

Jason

4

25

5

34

1

Molly

24

94

43

161

2

Tina

31

57

23

111

3

Jake

2

62

23

87

4

Amy

3

70

51

124

代码语言:javascript复制
# (从 iWantHue)创建一列颜色
colors = ["#E13F29", "#D69A80", "#D63B59", "#AE5552", "#CB5C3B", "#EB8076", "#96624E"]

# 创建饼图
plt.pie(
    # 使用数据 total_arrests
    df['total_arrests'],
    # 标签为官员名称
    labels=df['officer_name'],
    # 没有阴影
    shadow=False,
    # 颜色
    colors=colors,
    # 将一块扇形移出去
    explode=(0, 0, 0, 0, 0.15),
    # 起始角度为 90 度
    startangle=90,
    # 将百分比列为分数
    autopct='%1.1f%%',
    )

# 使饼状图为正圆
plt.axis('equal')

# 查看绘图
plt.tight_layout()
plt.show()

MatPlotLib 中的散点图

代码语言:javascript复制
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 展示 ipython 的最大行数
pd.set_option('display.max_row', 1000)

# 将 ipython 的最大列宽设为 50
pd.set_option('display.max_columns', 50)

df = pd.read_csv('https://raw.githubusercontent.com/chrisalbon/war_of_the_five_kings_dataset/master/5kings_battles_v1.csv')
df.head()

name

year

battle_number

attacker_king

defender_king

attacker_1

attacker_2

attacker_3

attacker_4

defender_1

defender_2

defender_3

defender_4

attacker_outcome

battle_type

major_death

major_capture

attacker_size

defender_size

attacker_commander

defender_commander

summer

location

region

note

0

Battle of the Golden Tooth

298

1

Joffrey/Tommen Baratheon

Robb Stark

Lannister

NaN

NaN

NaN

Tully

NaN

NaN

NaN

win

pitched battle

1.0

0.0

15000.0

4000.0

Jaime Lannister

Clement Piper, Vance

1.0

Golden Tooth

The Westerlands

NaN

1

Battle at the Mummer’s Ford

298

2

Joffrey/Tommen Baratheon

Robb Stark

Lannister

NaN

NaN

NaN

Baratheon

NaN

NaN

NaN

win

ambush

1.0

0.0

NaN

120.0

Gregor Clegane

Beric Dondarrion

1.0

Mummer’s Ford

The Riverlands

NaN

2

Battle of Riverrun

298

3

Joffrey/Tommen Baratheon

Robb Stark

Lannister

NaN

NaN

NaN

Tully

NaN

NaN

NaN

win

pitched battle

0.0

1.0

15000.0

10000.0

Jaime Lannister, Andros Brax

Edmure Tully, Tytos Blackwood

1.0

Riverrun

The Riverlands

NaN

3

Battle of the Green Fork

298

4

Robb Stark

Joffrey/Tommen Baratheon

Stark

NaN

NaN

NaN

Lannister

NaN

NaN

NaN

loss

pitched battle

1.0

1.0

18000.0

20000.0

Roose Bolton, Wylis Manderly, Medger Cerwyn, H…

Tywin Lannister, Gregor Clegane, Kevan Lannist…

1.0

Green Fork

The Riverlands

NaN

4

Battle of the Whispering Wood

298

5

Robb Stark

Joffrey/Tommen Baratheon

Stark

Tully

NaN

NaN

Lannister

NaN

NaN

NaN

win

ambush

1.0

1.0

1875.0

6000.0

Robb Stark, Brynden Tully

Jaime Lannister

1.0

Whispering Wood

The Riverlands

NaN

代码语言:javascript复制
# 创建图形
plt.figure(figsize=(10,8))

# 创建散点图
            # 298 年的攻击方大小为 x 轴
plt.scatter(df['attacker_size'][df['year'] == 298], 
            # 298 年的防守方大小为 y 轴
            df['defender_size'][df['year'] == 298], 
            # 标记
            marker='x', 
            # 颜色
            color='b',
            # 透明度
            alpha=0.7,
            # 大小
            s = 124,
            # 标签
            label='Year 298')

            # 299 年的攻击方大小为 x 轴
plt.scatter(df['attacker_size'][df['year'] == 299], 
            # 299 年的防守方大小为 y 轴
            df['defender_size'][df['year'] == 299], 
            # 标记
            marker='o', 
            # 颜色
            color='r', 
            # 透明度
            alpha=0.7,
            # 大小
            s = 124,
            # 标签
            label='Year 299')

            # 300 年的攻击方大小为 x 轴
plt.scatter(df['attacker_size'][df['year'] == 300], 
            # 300 年的防守方大小为 x 轴
            df['defender_size'][df['year'] == 300], 
            # 标记
            marker='^', 
            # 颜色
            color='g', 
            # 透明度
            alpha=0.7, 
            # 大小
            s = 124,
            # 标签
            label='Year 300')

# 标题
plt.title('Battles Of The War Of The Five Kings')

# y 标签
plt.ylabel('Defender Size')

# x 标签
plt.xlabel('Attacker Size')

# 图例
plt.legend(loc='upper right')

# 设置图形边界
plt.xlim([min(df['attacker_size'])-1000, max(df['attacker_size']) 1000])
plt.ylim([min(df['defender_size'])-1000, max(df['defender_size']) 1000])

plt.show()

MatPlotLib 中的栈式百分比条形图

代码语言:javascript复制
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt

raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'pre_score': [4, 24, 31, 2, 3],
        'mid_score': [25, 94, 57, 62, 70],
        'post_score': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
df

first_name

pre_score

mid_score

post_score

0

Jason

4

25

5

1

Molly

24

94

43

2

Tina

31

57

23

3

Jake

2

62

23

4

Amy

3

70

51

代码语言:javascript复制
# 创建带有一个子图的图形
f, ax = plt.subplots(1, figsize=(10,5))

# 将条宽设为 1
bar_width = 1

# 条形左边界的位置
bar_l = [i for i in range(len(df['pre_score']))] 

# x 轴刻度的位置(条形的中心是条形标签)
tick_pos = [i (bar_width/2) for i in bar_l] 

# 创建每个参与者的总得分
totals = [i j k for i,j,k in zip(df['pre_score'], df['mid_score'], df['post_score'])]

# 创建每个参与者的 pre_score 和总得分的百分比
pre_rel = [i / j * 100 for  i,j in zip(df['pre_score'], totals)]

# 创建每个参与者的 mid_score 和总得分的百分比
mid_rel = [i / j * 100 for  i,j in zip(df['mid_score'], totals)]

# 创建每个参与者的 post_score 和总得分的百分比
post_rel = [i / j * 100 for  i,j in zip(df['post_score'], totals)]

# 在位置 bar_1 创建条形图
ax.bar(bar_l, 
       # 使用数据 pre_rel
       pre_rel, 
       # 标签 
       label='Pre Score', 
       # 透明度
       alpha=0.9, 
       # 颜色
       color='#019600',
       # 条形宽度
       width=bar_width,
       # 边框颜色
       edgecolor='white'
       )

# 在位置 bar_1 创建条形图
ax.bar(bar_l, 
       # 使用数据 mid_rel
       mid_rel, 
       # 底部为 pre_rel
       bottom=pre_rel, 
       # 标签
       label='Mid Score', 
       # 透明度
       alpha=0.9, 
       # 颜色
       color='#3C5F5A', 
       # 条形宽度
       width=bar_width,
       # 边框颜色
       edgecolor='white'
       )

# Create a bar chart in position bar_1
ax.bar(bar_l, 
       # 使用数据 post_rel
       post_rel, 
       # 底部为 pre_rel 和 mid_rel
       bottom=[i j for i,j in zip(pre_rel, mid_rel)], 
       # 标签
       label='Post Score',
       # 透明度
       alpha=0.9, 
       # 颜色
       color='#219AD8', 
       # 条形宽度
       width=bar_width,
       # 边框颜色
       edgecolor='white'
       )

# 将刻度设为 first_name
plt.xticks(tick_pos, df['first_name'])
ax.set_ylabel("Percentage")
ax.set_xlabel("")

# 设置图形边界
plt.xlim([min(tick_pos)-bar_width, max(tick_pos) bar_width])
plt.ylim(-10, 110)

# 旋转轴标签
plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')

# 展示绘图
plt.show()

0 人点赞