【数据分析可视化】Series和Dataframe的Reindexing

2020-07-07 20:01:30 浏览数 (1)

代码语言:javascript复制
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

Series reindex

代码语言:javascript复制
s1 =  Series([1,2,3,4], index=['A','B','C','D'])
s1
代码语言:javascript复制
A    1
B    2
C    3
D    4
dtype: int64
代码语言:javascript复制
 # reindex新的Series索引 (查看参数快捷键shift tab)
s1.reindex(index=['A','B','C','D','E'])
代码语言:javascript复制
A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
dtype: float64
代码语言:javascript复制
# reindex并给空值索引赋值
s1.reindex(index=['A','B','C','D','E'],fill_value=10)
代码语言:javascript复制
A     1
B     2
C     3
D     4
E    10
dtype: int64
代码语言:javascript复制
# 测试reindex的赋值方式
s2 = Series(['A','B','C'],index=[1,5,10])
s2
代码语言:javascript复制
1     A
5     B
10    C
dtype: object
代码语言:javascript复制
# 重新索引
s2.reindex(index=range(15))
代码语言:javascript复制
0     NaN
1       A
2     NaN
3     NaN
4     NaN
5       B
6     NaN
7     NaN
8     NaN
9     NaN
10      C
11    NaN
12    NaN
13    NaN
14    NaN
dtype: object
代码语言:javascript复制
# 函数填充值(ffill函数将重新索引前安装区间范围依次赋值)
s2.reindex(index=range(15),method='ffill')
代码语言:javascript复制
0     NaN
1       A
2       A
3       A
4       A
5       B
6       B
7       B
8       B
9       B
10      C
11      C
12      C
13      C
14      C
dtype: object

DataFrame reindex

代码语言:javascript复制
# 故意漏掉C
df1 = DataFrame(np.random.rand(25).reshape([5,5]), index=['A','B','D','E','F'], columns=['c1','c2','c3','c4','c5'])
df1

c1

c2

c3

c4

c5

A

0.916976

0.277428

0.487468

0.392332

0.906246

B

0.112718

0.000009

0.958650

0.890877

0.640683

D

0.715841

0.857899

0.834954

0.134856

0.982175

E

0.375207

0.925308

0.734072

0.583107

0.677676

F

0.627784

0.818094

0.636362

0.417960

0.063043

代码语言:javascript复制
# reindex恢复C-重新索引行
df1.reindex(['A','B','C','D','E','F'])

c1

c2

c3

c4

c5

A

0.916976

0.277428

0.487468

0.392332

0.906246

B

0.112718

0.000009

0.958650

0.890877

0.640683

C

NaN

NaN

NaN

NaN

NaN

D

0.715841

0.857899

0.834954

0.134856

0.982175

E

0.375207

0.925308

0.734072

0.583107

0.677676

F

0.627784

0.818094

0.636362

0.417960

0.063043

代码语言:javascript复制
# reindex-重新索引列
df1.reindex(columns=['c1','c2','c3','c4','c5','c6'])

c1

c2

c3

c4

c5

c6

A

0.916976

0.277428

0.487468

0.392332

0.906246

NaN

B

0.112718

0.000009

0.958650

0.890877

0.640683

NaN

D

0.715841

0.857899

0.834954

0.134856

0.982175

NaN

E

0.375207

0.925308

0.734072

0.583107

0.677676

NaN

F

0.627784

0.818094

0.636362

0.417960

0.063043

NaN

代码语言:javascript复制
 # reindex-行列同时重排(新增)
df1.reindex(index=['A','B','C','D','E','F'] ,columns=['c1','c2','c3','c4','c5','c6'])

c1

c2

c3

c4

c5

c6

A

0.916976

0.277428

0.487468

0.392332

0.906246

NaN

B

0.112718

0.000009

0.958650

0.890877

0.640683

NaN

C

NaN

NaN

NaN

NaN

NaN

NaN

D

0.715841

0.857899

0.834954

0.134856

0.982175

NaN

E

0.375207

0.925308

0.734072

0.583107

0.677676

NaN

F

0.627784

0.818094

0.636362

0.417960

0.063043

NaN

代码语言:javascript复制
# Series(减少)
s1
代码语言:javascript复制
A    1
B    2
C    3
D    4
dtype: int64
代码语言:javascript复制
s1.reindex(index=['A','B'])
代码语言:javascript复制
A    1
B    2
dtype: int64
代码语言:javascript复制
# reindex-行列同时重排(减少)
df1

c1

c2

c3

c4

c5

A

0.916976

0.277428

0.487468

0.392332

0.906246

B

0.112718

0.000009

0.958650

0.890877

0.640683

D

0.715841

0.857899

0.834954

0.134856

0.982175

E

0.375207

0.925308

0.734072

0.583107

0.677676

F

0.627784

0.818094

0.636362

0.417960

0.063043

代码语言:javascript复制
df1.reindex(['A','B'])

c1

c2

c3

c4

c5

A

0.916976

0.277428

0.487468

0.392332

0.906246

B

0.112718

0.000009

0.958650

0.890877

0.640683

代码语言:javascript复制
# 删掉的方法
s1.drop('A')
代码语言:javascript复制
B    2
C    3
D    4
dtype: int64
代码语言:javascript复制
# 0行1列
df1.drop('A',axis=0)

c1

c2

c3

c4

c5

B

0.112718

0.000009

0.958650

0.890877

0.640683

D

0.715841

0.857899

0.834954

0.134856

0.982175

E

0.375207

0.925308

0.734072

0.583107

0.677676

F

0.627784

0.818094

0.636362

0.417960

0.063043

代码语言:javascript复制
df1.drop('c1',axis=1)

c2

c3

c4

c5

A

0.277428

0.487468

0.392332

0.906246

B

0.000009

0.958650

0.890877

0.640683

D

0.857899

0.834954

0.134856

0.982175

E

0.925308

0.734072

0.583107

0.677676

F

0.818094

0.636362

0.417960

0.063043

nan

0 人点赞