代码语言:javascript复制前提要对数据信息之间的关系要了解
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
代码语言:javascript复制# 读入excel文件
df = pd.read_excel('/Users/bennyrhys/Desktop/数据分析可视化-数据集/homework/sales-funnel.xlsx')
df
Account | Name | Rep | Manager | Product | Quantity | Price | Status | |
---|---|---|---|---|---|---|---|---|
0 | 714466 | Trantow-Barrows | Craig Booker | Debra Henley | CPU | 1 | 30000 | presented |
1 | 714466 | Trantow-Barrows | Craig Booker | Debra Henley | Software | 1 | 10000 | presented |
2 | 714466 | Trantow-Barrows | Craig Booker | Debra Henley | Maintenance | 2 | 5000 | pending |
3 | 737550 | Fritsch, Russel and Anderson | Craig Booker | Debra Henley | CPU | 1 | 35000 | declined |
4 | 146832 | Kiehn-Spinka | Daniel Hilton | Debra Henley | CPU | 2 | 65000 | won |
5 | 218895 | Kulas Inc | Daniel Hilton | Debra Henley | CPU | 2 | 40000 | pending |
6 | 218895 | Kulas Inc | Daniel Hilton | Debra Henley | Software | 1 | 10000 | presented |
7 | 412290 | Jerde-Hilpert | John Smith | Debra Henley | Maintenance | 2 | 5000 | pending |
8 | 740150 | Barton LLC | John Smith | Debra Henley | CPU | 1 | 35000 | declined |
9 | 141962 | Herman LLC | Cedric Moss | Fred Anderson | CPU | 2 | 65000 | won |
10 | 163416 | Purdy-Kunde | Cedric Moss | Fred Anderson | CPU | 1 | 30000 | presented |
11 | 239344 | Stokes LLC | Cedric Moss | Fred Anderson | Maintenance | 1 | 5000 | pending |
12 | 239344 | Stokes LLC | Cedric Moss | Fred Anderson | Software | 1 | 10000 | presented |
13 | 307599 | Kassulke, Ondricka and Metz | Wendy Yule | Fred Anderson | Maintenance | 3 | 7000 | won |
14 | 688981 | Keeling LLC | Wendy Yule | Fred Anderson | CPU | 5 | 100000 | won |
15 | 729833 | Koepp Ltd | Wendy Yule | Fred Anderson | CPU | 2 | 65000 | declined |
16 | 729833 | Koepp Ltd | Wendy Yule | Fred Anderson | Monitor | 2 | 5000 | presented |
# 透视表-去重复(聚合默认 agg平均值)
pd.pivot_table(df, index=['Name'])
Account | Price | Quantity | |
---|---|---|---|
Name | |||
Barton LLC | 740150 | 35000 | 1.000000 |
Fritsch, Russel and Anderson | 737550 | 35000 | 1.000000 |
Herman LLC | 141962 | 65000 | 2.000000 |
Jerde-Hilpert | 412290 | 5000 | 2.000000 |
Kassulke, Ondricka and Metz | 307599 | 7000 | 3.000000 |
Keeling LLC | 688981 | 100000 | 5.000000 |
Kiehn-Spinka | 146832 | 65000 | 2.000000 |
Koepp Ltd | 729833 | 35000 | 2.000000 |
Kulas Inc | 218895 | 25000 | 1.500000 |
Purdy-Kunde | 163416 | 30000 | 1.000000 |
Stokes LLC | 239344 | 7500 | 1.000000 |
Trantow-Barrows | 714466 | 15000 | 1.333333 |
# 透视表-去重复(聚合默认 agg-sum)
pd.pivot_table(df, index=['Name'], aggfunc='sum')
Account | Price | Quantity | |
---|---|---|---|
Name | |||
Barton LLC | 740150 | 35000 | 1 |
Fritsch, Russel and Anderson | 737550 | 35000 | 1 |
Herman LLC | 141962 | 65000 | 2 |
Jerde-Hilpert | 412290 | 5000 | 2 |
Kassulke, Ondricka and Metz | 307599 | 7000 | 3 |
Keeling LLC | 688981 | 100000 | 5 |
Kiehn-Spinka | 146832 | 65000 | 2 |
Koepp Ltd | 1459666 | 70000 | 4 |
Kulas Inc | 437790 | 50000 | 3 |
Purdy-Kunde | 163416 | 30000 | 1 |
Stokes LLC | 478688 | 15000 | 2 |
Trantow-Barrows | 2143398 | 45000 | 4 |
# 透视表-多列索引(基本不变)
pd.pivot_table(df, index=['Name','Rep','Manager'])
Account | Price | Quantity | |||
---|---|---|---|---|---|
Name | Rep | Manager | |||
Barton LLC | John Smith | Debra Henley | 740150 | 35000 | 1.000000 |
Fritsch, Russel and Anderson | Craig Booker | Debra Henley | 737550 | 35000 | 1.000000 |
Herman LLC | Cedric Moss | Fred Anderson | 141962 | 65000 | 2.000000 |
Jerde-Hilpert | John Smith | Debra Henley | 412290 | 5000 | 2.000000 |
Kassulke, Ondricka and Metz | Wendy Yule | Fred Anderson | 307599 | 7000 | 3.000000 |
Keeling LLC | Wendy Yule | Fred Anderson | 688981 | 100000 | 5.000000 |
Kiehn-Spinka | Daniel Hilton | Debra Henley | 146832 | 65000 | 2.000000 |
Koepp Ltd | Wendy Yule | Fred Anderson | 729833 | 35000 | 2.000000 |
Kulas Inc | Daniel Hilton | Debra Henley | 218895 | 25000 | 1.500000 |
Purdy-Kunde | Cedric Moss | Fred Anderson | 163416 | 30000 | 1.000000 |
Stokes LLC | Cedric Moss | Fred Anderson | 239344 | 7500 | 1.000000 |
Trantow-Barrows | Craig Booker | Debra Henley | 714466 | 15000 | 1.333333 |
# 透视表-包含关系
pd.pivot_table(df, index=['Manager','Rep'])
Account | Price | Quantity | ||
---|---|---|---|---|
Manager | Rep | |||
Debra Henley | Craig Booker | 720237.0 | 20000.000000 | 1.250000 |
Daniel Hilton | 194874.0 | 38333.333333 | 1.666667 | |
John Smith | 576220.0 | 20000.000000 | 1.500000 | |
Fred Anderson | Cedric Moss | 196016.5 | 27500.000000 | 1.250000 |
Wendy Yule | 614061.5 | 44250.000000 | 3.000000 |
# 透视表指定 values默认是None
pd.pivot_table(df, index=['Manager','Rep'], values=['Price','Quantity'])
Price | Quantity | ||
---|---|---|---|
Manager | Rep | ||
Debra Henley | Craig Booker | 20000.000000 | 1.250000 |
Daniel Hilton | 38333.333333 | 1.666667 | |
John Smith | 20000.000000 | 1.500000 | |
Fred Anderson | Cedric Moss | 27500.000000 | 1.250000 |
Wendy Yule | 44250.000000 | 3.000000 |
# 透视表-col
pd.pivot_table(df, index=['Manager','Rep'], values=['Price','Quantity'], columns=['Product'])
Price | Quantity | ||||||||
---|---|---|---|---|---|---|---|---|---|
Product | CPU | Maintenance | Monitor | Software | CPU | Maintenance | Monitor | Software | |
Manager | Rep | ||||||||
Debra Henley | Craig Booker | 32500.0 | 5000.0 | NaN | 10000.0 | 1.0 | 2.0 | NaN | 1.0 |
Daniel Hilton | 52500.0 | NaN | NaN | 10000.0 | 2.0 | NaN | NaN | 1.0 | |
John Smith | 35000.0 | 5000.0 | NaN | NaN | 1.0 | 2.0 | NaN | NaN | |
Fred Anderson | Cedric Moss | 47500.0 | 5000.0 | NaN | 10000.0 | 1.5 | 1.0 | NaN | 1.0 |
Wendy Yule | 82500.0 | 7000.0 | 5000.0 | NaN | 3.5 | 3.0 | 2.0 | NaN |
# 透视表-填充nan
pd.pivot_table(df, index=['Manager','Rep'], values=['Price','Quantity'], columns=['Product'],fill_value=0)
Price | Quantity | ||||||||
---|---|---|---|---|---|---|---|---|---|
Product | CPU | Maintenance | Monitor | Software | CPU | Maintenance | Monitor | Software | |
Manager | Rep | ||||||||
Debra Henley | Craig Booker | 32500 | 5000 | 0 | 10000 | 1.0 | 2 | 0 | 1 |
Daniel Hilton | 52500 | 0 | 0 | 10000 | 2.0 | 0 | 0 | 1 | |
John Smith | 35000 | 5000 | 0 | 0 | 1.0 | 2 | 0 | 0 | |
Fred Anderson | Cedric Moss | 47500 | 5000 | 0 | 10000 | 1.5 | 1 | 0 | 1 |
Wendy Yule | 82500 | 7000 | 5000 | 0 | 3.5 | 3 | 2 | 0 |