【数据分析与可视化】DataFrame的Selecting和indexing

2020-07-07 20:06:36 浏览数 (1)

代码语言:javascript复制
import numpy as np
import pandas as pd
代码语言:javascript复制
!pwd
代码语言:javascript复制
/Users/bennyrhys/opt/anaconda3/bin
代码语言:javascript复制
!ls /Users/bennyrhys/Desktop/数据分析可视化-数据集/homework
代码语言:javascript复制
AMZN.csv           apply_demo.csv     iris.csv           top5.csv
BABA.csv           city_weather.csv   movie_metadata.csv train.csv
Pokemon.csv        demo_duplicate.csv sales-funnel.xlsx  usa_flights.csv
代码语言:javascript复制
# 读取电影csv文件
imdb = pd.read_csv('/Users/bennyrhys/Desktop/数据分析可视化-数据集/homework/movie_metadata.csv')
imdb

color

director_name

num_critic_for_reviews

duration

director_facebook_likes

actor_3_facebook_likes

actor_2_name

actor_1_facebook_likes

gross

genres

...

num_user_for_reviews

language

country

content_rating

budget

title_year

actor_2_facebook_likes

imdb_score

aspect_ratio

movie_facebook_likes

0

Color

James Cameron

723.0

178.0

0.0

855.0

Joel David Moore

1000.0

760505847.0

Action|Adventure|Fantasy|Sci-Fi

...

3054.0

English

USA

PG-13

237000000.0

2009.0

936.0

7.9

1.78

33000

1

Color

Gore Verbinski

302.0

169.0

563.0

1000.0

Orlando Bloom

40000.0

309404152.0

Action|Adventure|Fantasy

...

1238.0

English

USA

PG-13

300000000.0

2007.0

5000.0

7.1

2.35

0

2

Color

Sam Mendes

602.0

148.0

0.0

161.0

Rory Kinnear

11000.0

200074175.0

Action|Adventure|Thriller

...

994.0

English

UK

PG-13

245000000.0

2015.0

393.0

6.8

2.35

85000

3

Color

Christopher Nolan

813.0

164.0

22000.0

23000.0

Christian Bale

27000.0

448130642.0

Action|Thriller

...

2701.0

English

USA

PG-13

250000000.0

2012.0

23000.0

8.5

2.35

164000

4

NaN

Doug Walker

NaN

NaN

131.0

NaN

Rob Walker

131.0

NaN

Documentary

...

NaN

NaN

NaN

NaN

NaN

NaN

12.0

7.1

NaN

0

...

...

...

...

...

...

...

...

...

...

...

...

...

...

...

...

...

...

...

...

...

...

5038

Color

Scott Smith

1.0

87.0

2.0

318.0

Daphne Zuniga

637.0

NaN

Comedy|Drama

...

6.0

English

Canada

NaN

NaN

2013.0

470.0

7.7

NaN

84

5039

Color

NaN

43.0

43.0

NaN

319.0

Valorie Curry

841.0

NaN

Crime|Drama|Mystery|Thriller

...

359.0

English

USA

TV-14

NaN

NaN

593.0

7.5

16.00

32000

5040

Color

Benjamin Roberds

13.0

76.0

0.0

0.0

Maxwell Moody

0.0

NaN

Drama|Horror|Thriller

...

3.0

English

USA

NaN

1400.0

2013.0

0.0

6.3

NaN

16

5041

Color

Daniel Hsia

14.0

100.0

0.0

489.0

Daniel Henney

946.0

10443.0

Comedy|Drama|Romance

...

9.0

English

USA

PG-13

NaN

2012.0

719.0

6.3

2.35

660

5042

Color

Jon Gunn

43.0

90.0

16.0

16.0

Brian Herzlinger

86.0

85222.0

Documentary

...

84.0

English

USA

PG

1100.0

2004.0

23.0

6.6

1.85

456

5043 rows × 28 columns

代码语言:javascript复制
# 数据多,查看多少 行列
imdb.shape
代码语言:javascript复制
(5043, 28)
代码语言:javascript复制
# 显示部分(默认返回前五行)
imdb.head()

color

director_name

num_critic_for_reviews

duration

director_facebook_likes

actor_3_facebook_likes

actor_2_name

actor_1_facebook_likes

gross

genres

...

num_user_for_reviews

language

country

content_rating

budget

title_year

actor_2_facebook_likes

imdb_score

aspect_ratio

movie_facebook_likes

0

Color

James Cameron

723.0

178.0

0.0

855.0

Joel David Moore

1000.0

760505847.0

Action|Adventure|Fantasy|Sci-Fi

...

3054.0

English

USA

PG-13

237000000.0

2009.0

936.0

7.9

1.78

33000

1

Color

Gore Verbinski

302.0

169.0

563.0

1000.0

Orlando Bloom

40000.0

309404152.0

Action|Adventure|Fantasy

...

1238.0

English

USA

PG-13

300000000.0

2007.0

5000.0

7.1

2.35

0

2

Color

Sam Mendes

602.0

148.0

0.0

161.0

Rory Kinnear

11000.0

200074175.0

Action|Adventure|Thriller

...

994.0

English

UK

PG-13

245000000.0

2015.0

393.0

6.8

2.35

85000

3

Color

Christopher Nolan

813.0

164.0

22000.0

23000.0

Christian Bale

27000.0

448130642.0

Action|Thriller

...

2701.0

English

USA

PG-13

250000000.0

2012.0

23000.0

8.5

2.35

164000

4

NaN

Doug Walker

NaN

NaN

131.0

NaN

Rob Walker

131.0

NaN

Documentary

...

NaN

NaN

NaN

NaN

NaN

NaN

12.0

7.1

NaN

0

5 rows × 28 columns

代码语言:javascript复制
# 显示部分(默认返回后五行)
imdb.tail()

color

director_name

num_critic_for_reviews

duration

director_facebook_likes

actor_3_facebook_likes

actor_2_name

actor_1_facebook_likes

gross

genres

...

num_user_for_reviews

language

country

content_rating

budget

title_year

actor_2_facebook_likes

imdb_score

aspect_ratio

movie_facebook_likes

5038

Color

Scott Smith

1.0

87.0

2.0

318.0

Daphne Zuniga

637.0

NaN

Comedy|Drama

...

6.0

English

Canada

NaN

NaN

2013.0

470.0

7.7

NaN

84

5039

Color

NaN

43.0

43.0

NaN

319.0

Valorie Curry

841.0

NaN

Crime|Drama|Mystery|Thriller

...

359.0

English

USA

TV-14

NaN

NaN

593.0

7.5

16.00

32000

5040

Color

Benjamin Roberds

13.0

76.0

0.0

0.0

Maxwell Moody

0.0

NaN

Drama|Horror|Thriller

...

3.0

English

USA

NaN

1400.0

2013.0

0.0

6.3

NaN

16

5041

Color

Daniel Hsia

14.0

100.0

0.0

489.0

Daniel Henney

946.0

10443.0

Comedy|Drama|Romance

...

9.0

English

USA

PG-13

NaN

2012.0

719.0

6.3

2.35

660

5042

Color

Jon Gunn

43.0

90.0

16.0

16.0

Brian Herzlinger

86.0

85222.0

Documentary

...

84.0

English

USA

PG

1100.0

2004.0

23.0

6.6

1.85

456

5 rows × 28 columns

代码语言:javascript复制
# 显示部分(指定返回行数)
imdb.head(10)

color

director_name

num_critic_for_reviews

duration

director_facebook_likes

actor_3_facebook_likes

actor_2_name

actor_1_facebook_likes

gross

genres

...

num_user_for_reviews

language

country

content_rating

budget

title_year

actor_2_facebook_likes

imdb_score

aspect_ratio

movie_facebook_likes

0

Color

James Cameron

723.0

178.0

0.0

855.0

Joel David Moore

1000.0

760505847.0

Action|Adventure|Fantasy|Sci-Fi

...

3054.0

English

USA

PG-13

237000000.0

2009.0

936.0

7.9

1.78

33000

1

Color

Gore Verbinski

302.0

169.0

563.0

1000.0

Orlando Bloom

40000.0

309404152.0

Action|Adventure|Fantasy

...

1238.0

English

USA

PG-13

300000000.0

2007.0

5000.0

7.1

2.35

0

2

Color

Sam Mendes

602.0

148.0

0.0

161.0

Rory Kinnear

11000.0

200074175.0

Action|Adventure|Thriller

...

994.0

English

UK

PG-13

245000000.0

2015.0

393.0

6.8

2.35

85000

3

Color

Christopher Nolan

813.0

164.0

22000.0

23000.0

Christian Bale

27000.0

448130642.0

Action|Thriller

...

2701.0

English

USA

PG-13

250000000.0

2012.0

23000.0

8.5

2.35

164000

4

NaN

Doug Walker

NaN

NaN

131.0

NaN

Rob Walker

131.0

NaN

Documentary

...

NaN

NaN

NaN

NaN

NaN

NaN

12.0

7.1

NaN

0

5

Color

Andrew Stanton

462.0

132.0

475.0

530.0

Samantha Morton

640.0

73058679.0

Action|Adventure|Sci-Fi

...

738.0

English

USA

PG-13

263700000.0

2012.0

632.0

6.6

2.35

24000

6

Color

Sam Raimi

392.0

156.0

0.0

4000.0

James Franco

24000.0

336530303.0

Action|Adventure|Romance

...

1902.0

English

USA

PG-13

258000000.0

2007.0

11000.0

6.2

2.35

0

7

Color

Nathan Greno

324.0

100.0

15.0

284.0

Donna Murphy

799.0

200807262.0

Adventure|Animation|Comedy|Family|Fantasy|Musi...

...

387.0

English

USA

PG

260000000.0

2010.0

553.0

7.8

1.85

29000

8

Color

Joss Whedon

635.0

141.0

0.0

19000.0

Robert Downey Jr.

26000.0

458991599.0

Action|Adventure|Sci-Fi

...

1117.0

English

USA

PG-13

250000000.0

2015.0

21000.0

7.5

2.35

118000

9

Color

David Yates

375.0

153.0

282.0

10000.0

Daniel Radcliffe

25000.0

301956980.0

Adventure|Family|Fantasy|Mystery

...

973.0

English

UK

PG

250000000.0

2009.0

11000.0

7.5

2.35

10000

10 rows × 28 columns

代码语言:javascript复制
# 返回列 Series类型,生成新的DataFrame
imdb[['color','director_name']]

color

director_name

0

Color

James Cameron

1

Color

Gore Verbinski

2

Color

Sam Mendes

3

Color

Christopher Nolan

4

NaN

Doug Walker

...

...

...

5038

Color

Scott Smith

5039

Color

NaN

5040

Color

Benjamin Roberds

5041

Color

Daniel Hsia

5042

Color

Jon Gunn

5043 rows × 2 columns

代码语言:javascript复制
# 无法通过head取指定的行范围
row_bf = imdb[['director_name','movie_title','imdb_score']]
row_bf.head()

director_name

movie_title

imdb_score

0

James Cameron

Avatar

7.9

1

Gore Verbinski

Pirates of the Caribbean: At World's End

7.1

2

Sam Mendes

Spectre

6.8

3

Christopher Nolan

The Dark Knight Rises

8.5

4

Doug Walker

Star Wars: Episode VII - The Force Awakens ...

7.1

代码语言:javascript复制
# 行列 的范围实现切片
row_bf.iloc[10:20,:]

director_name

movie_title

imdb_score

10

Zack Snyder

Batman v Superman: Dawn of Justice

6.9

11

Bryan Singer

Superman Returns

6.1

12

Marc Forster

Quantum of Solace

6.7

13

Gore Verbinski

Pirates of the Caribbean: Dead Man's Chest

7.3

14

Gore Verbinski

The Lone Ranger

6.5

15

Zack Snyder

Man of Steel

7.2

16

Andrew Adamson

The Chronicles of Narnia: Prince Caspian

6.6

17

Joss Whedon

The Avengers

8.1

18

Rob Marshall

Pirates of the Caribbean: On Stranger Tides

6.7

19

Barry Sonnenfeld

Men in Black 3

6.8

代码语言:javascript复制
# 限制列切片
tmp_df = row_bf.iloc[10:20,0:2]
tmp_df

director_name

movie_title

10

Zack Snyder

Batman v Superman: Dawn of Justice

11

Bryan Singer

Superman Returns

12

Marc Forster

Quantum of Solace

13

Gore Verbinski

Pirates of the Caribbean: Dead Man's Chest

14

Gore Verbinski

The Lone Ranger

15

Zack Snyder

Man of Steel

16

Andrew Adamson

The Chronicles of Narnia: Prince Caspian

17

Joss Whedon

The Avengers

18

Rob Marshall

Pirates of the Caribbean: On Stranger Tides

19

Barry Sonnenfeld

Men in Black 3

代码语言:javascript复制
# iloc对当前数据框的选择范围(index 和label没关系)
tmp_df.iloc[2:4,:]

director_name

movie_title

12

Marc Forster

Quantum of Solace

13

Gore Verbinski

Pirates of the Caribbean: Dead Man's Chest

代码语言:javascript复制
# 想通过label名字去定位(loc没有前面的i,通过名字查不受index限制)
tmp_df.loc[15:17,:]

director_name

movie_title

15

Zack Snyder

Man of Steel

16

Andrew Adamson

The Chronicles of Narnia: Prince Caspian

17

Joss Whedon

The Avengers

代码语言:javascript复制
# 列也可以
tmp_df.loc[15:17,:'director_name']

director_name

15

Zack Snyder

16

Andrew Adamson

17

Joss Whedon

0 人点赞