【数据分析可视化】透视表

2020-07-07 19:50:58 浏览数 (1)

前提要对数据信息之间的关系要了解

代码语言:javascript复制
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
代码语言:javascript复制
# 读入excel文件
df = pd.read_excel('/Users/bennyrhys/Desktop/数据分析可视化-数据集/homework/sales-funnel.xlsx')
df

Account

Name

Rep

Manager

Product

Quantity

Price

Status

0

714466

Trantow-Barrows

Craig Booker

Debra Henley

CPU

1

30000

presented

1

714466

Trantow-Barrows

Craig Booker

Debra Henley

Software

1

10000

presented

2

714466

Trantow-Barrows

Craig Booker

Debra Henley

Maintenance

2

5000

pending

3

737550

Fritsch, Russel and Anderson

Craig Booker

Debra Henley

CPU

1

35000

declined

4

146832

Kiehn-Spinka

Daniel Hilton

Debra Henley

CPU

2

65000

won

5

218895

Kulas Inc

Daniel Hilton

Debra Henley

CPU

2

40000

pending

6

218895

Kulas Inc

Daniel Hilton

Debra Henley

Software

1

10000

presented

7

412290

Jerde-Hilpert

John Smith

Debra Henley

Maintenance

2

5000

pending

8

740150

Barton LLC

John Smith

Debra Henley

CPU

1

35000

declined

9

141962

Herman LLC

Cedric Moss

Fred Anderson

CPU

2

65000

won

10

163416

Purdy-Kunde

Cedric Moss

Fred Anderson

CPU

1

30000

presented

11

239344

Stokes LLC

Cedric Moss

Fred Anderson

Maintenance

1

5000

pending

12

239344

Stokes LLC

Cedric Moss

Fred Anderson

Software

1

10000

presented

13

307599

Kassulke, Ondricka and Metz

Wendy Yule

Fred Anderson

Maintenance

3

7000

won

14

688981

Keeling LLC

Wendy Yule

Fred Anderson

CPU

5

100000

won

15

729833

Koepp Ltd

Wendy Yule

Fred Anderson

CPU

2

65000

declined

16

729833

Koepp Ltd

Wendy Yule

Fred Anderson

Monitor

2

5000

presented

代码语言:javascript复制
# 透视表-去重复(聚合默认 agg平均值)
pd.pivot_table(df, index=['Name'])

Account

Price

Quantity

Name

Barton LLC

740150

35000

1.000000

Fritsch, Russel and Anderson

737550

35000

1.000000

Herman LLC

141962

65000

2.000000

Jerde-Hilpert

412290

5000

2.000000

Kassulke, Ondricka and Metz

307599

7000

3.000000

Keeling LLC

688981

100000

5.000000

Kiehn-Spinka

146832

65000

2.000000

Koepp Ltd

729833

35000

2.000000

Kulas Inc

218895

25000

1.500000

Purdy-Kunde

163416

30000

1.000000

Stokes LLC

239344

7500

1.000000

Trantow-Barrows

714466

15000

1.333333

代码语言:javascript复制
# 透视表-去重复(聚合默认 agg-sum)
pd.pivot_table(df, index=['Name'], aggfunc='sum')

Account

Price

Quantity

Name

Barton LLC

740150

35000

1

Fritsch, Russel and Anderson

737550

35000

1

Herman LLC

141962

65000

2

Jerde-Hilpert

412290

5000

2

Kassulke, Ondricka and Metz

307599

7000

3

Keeling LLC

688981

100000

5

Kiehn-Spinka

146832

65000

2

Koepp Ltd

1459666

70000

4

Kulas Inc

437790

50000

3

Purdy-Kunde

163416

30000

1

Stokes LLC

478688

15000

2

Trantow-Barrows

2143398

45000

4

代码语言:javascript复制
# 透视表-多列索引(基本不变)
pd.pivot_table(df, index=['Name','Rep','Manager'])

Account

Price

Quantity

Name

Rep

Manager

Barton LLC

John Smith

Debra Henley

740150

35000

1.000000

Fritsch, Russel and Anderson

Craig Booker

Debra Henley

737550

35000

1.000000

Herman LLC

Cedric Moss

Fred Anderson

141962

65000

2.000000

Jerde-Hilpert

John Smith

Debra Henley

412290

5000

2.000000

Kassulke, Ondricka and Metz

Wendy Yule

Fred Anderson

307599

7000

3.000000

Keeling LLC

Wendy Yule

Fred Anderson

688981

100000

5.000000

Kiehn-Spinka

Daniel Hilton

Debra Henley

146832

65000

2.000000

Koepp Ltd

Wendy Yule

Fred Anderson

729833

35000

2.000000

Kulas Inc

Daniel Hilton

Debra Henley

218895

25000

1.500000

Purdy-Kunde

Cedric Moss

Fred Anderson

163416

30000

1.000000

Stokes LLC

Cedric Moss

Fred Anderson

239344

7500

1.000000

Trantow-Barrows

Craig Booker

Debra Henley

714466

15000

1.333333

代码语言:javascript复制
# 透视表-包含关系
pd.pivot_table(df, index=['Manager','Rep'])

Account

Price

Quantity

Manager

Rep

Debra Henley

Craig Booker

720237.0

20000.000000

1.250000

Daniel Hilton

194874.0

38333.333333

1.666667

John Smith

576220.0

20000.000000

1.500000

Fred Anderson

Cedric Moss

196016.5

27500.000000

1.250000

Wendy Yule

614061.5

44250.000000

3.000000

代码语言:javascript复制
# 透视表指定 values默认是None
pd.pivot_table(df, index=['Manager','Rep'], values=['Price','Quantity'])

Price

Quantity

Manager

Rep

Debra Henley

Craig Booker

20000.000000

1.250000

Daniel Hilton

38333.333333

1.666667

John Smith

20000.000000

1.500000

Fred Anderson

Cedric Moss

27500.000000

1.250000

Wendy Yule

44250.000000

3.000000

代码语言:javascript复制
# 透视表-col
pd.pivot_table(df, index=['Manager','Rep'], values=['Price','Quantity'], columns=['Product'])

Price

Quantity

Product

CPU

Maintenance

Monitor

Software

CPU

Maintenance

Monitor

Software

Manager

Rep

Debra Henley

Craig Booker

32500.0

5000.0

NaN

10000.0

1.0

2.0

NaN

1.0

Daniel Hilton

52500.0

NaN

NaN

10000.0

2.0

NaN

NaN

1.0

John Smith

35000.0

5000.0

NaN

NaN

1.0

2.0

NaN

NaN

Fred Anderson

Cedric Moss

47500.0

5000.0

NaN

10000.0

1.5

1.0

NaN

1.0

Wendy Yule

82500.0

7000.0

5000.0

NaN

3.5

3.0

2.0

NaN

代码语言:javascript复制
# 透视表-填充nan
pd.pivot_table(df, index=['Manager','Rep'], values=['Price','Quantity'], columns=['Product'],fill_value=0)

Price

Quantity

Product

CPU

Maintenance

Monitor

Software

CPU

Maintenance

Monitor

Software

Manager

Rep

Debra Henley

Craig Booker

32500

5000

0

10000

1.0

2

0

1

Daniel Hilton

52500

0

0

10000

2.0

0

0

1

John Smith

35000

5000

0

0

1.0

2

0

0

Fred Anderson

Cedric Moss

47500

5000

0

10000

1.5

1

0

1

Wendy Yule

82500

7000

5000

0

3.5

3

2

0

nan

0 人点赞