import numpy as np
import pandas as pd
代码语言:javascript复制!pwd
代码语言:javascript复制/Users/bennyrhys/opt/anaconda3/bin
代码语言:javascript复制!ls /Users/bennyrhys/Desktop/数据分析可视化-数据集/homework
代码语言:javascript复制AMZN.csv apply_demo.csv iris.csv top5.csv
BABA.csv city_weather.csv movie_metadata.csv train.csv
Pokemon.csv demo_duplicate.csv sales-funnel.xlsx usa_flights.csv
代码语言:javascript复制# 读取电影csv文件
imdb = pd.read_csv('/Users/bennyrhys/Desktop/数据分析可视化-数据集/homework/movie_metadata.csv')
imdb
color | director_name | num_critic_for_reviews | duration | director_facebook_likes | actor_3_facebook_likes | actor_2_name | actor_1_facebook_likes | gross | genres | ... | num_user_for_reviews | language | country | content_rating | budget | title_year | actor_2_facebook_likes | imdb_score | aspect_ratio | movie_facebook_likes | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Color | James Cameron | 723.0 | 178.0 | 0.0 | 855.0 | Joel David Moore | 1000.0 | 760505847.0 | Action|Adventure|Fantasy|Sci-Fi | ... | 3054.0 | English | USA | PG-13 | 237000000.0 | 2009.0 | 936.0 | 7.9 | 1.78 | 33000 |
1 | Color | Gore Verbinski | 302.0 | 169.0 | 563.0 | 1000.0 | Orlando Bloom | 40000.0 | 309404152.0 | Action|Adventure|Fantasy | ... | 1238.0 | English | USA | PG-13 | 300000000.0 | 2007.0 | 5000.0 | 7.1 | 2.35 | 0 |
2 | Color | Sam Mendes | 602.0 | 148.0 | 0.0 | 161.0 | Rory Kinnear | 11000.0 | 200074175.0 | Action|Adventure|Thriller | ... | 994.0 | English | UK | PG-13 | 245000000.0 | 2015.0 | 393.0 | 6.8 | 2.35 | 85000 |
3 | Color | Christopher Nolan | 813.0 | 164.0 | 22000.0 | 23000.0 | Christian Bale | 27000.0 | 448130642.0 | Action|Thriller | ... | 2701.0 | English | USA | PG-13 | 250000000.0 | 2012.0 | 23000.0 | 8.5 | 2.35 | 164000 |
4 | NaN | Doug Walker | NaN | NaN | 131.0 | NaN | Rob Walker | 131.0 | NaN | Documentary | ... | NaN | NaN | NaN | NaN | NaN | NaN | 12.0 | 7.1 | NaN | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5038 | Color | Scott Smith | 1.0 | 87.0 | 2.0 | 318.0 | Daphne Zuniga | 637.0 | NaN | Comedy|Drama | ... | 6.0 | English | Canada | NaN | NaN | 2013.0 | 470.0 | 7.7 | NaN | 84 |
5039 | Color | NaN | 43.0 | 43.0 | NaN | 319.0 | Valorie Curry | 841.0 | NaN | Crime|Drama|Mystery|Thriller | ... | 359.0 | English | USA | TV-14 | NaN | NaN | 593.0 | 7.5 | 16.00 | 32000 |
5040 | Color | Benjamin Roberds | 13.0 | 76.0 | 0.0 | 0.0 | Maxwell Moody | 0.0 | NaN | Drama|Horror|Thriller | ... | 3.0 | English | USA | NaN | 1400.0 | 2013.0 | 0.0 | 6.3 | NaN | 16 |
5041 | Color | Daniel Hsia | 14.0 | 100.0 | 0.0 | 489.0 | Daniel Henney | 946.0 | 10443.0 | Comedy|Drama|Romance | ... | 9.0 | English | USA | PG-13 | NaN | 2012.0 | 719.0 | 6.3 | 2.35 | 660 |
5042 | Color | Jon Gunn | 43.0 | 90.0 | 16.0 | 16.0 | Brian Herzlinger | 86.0 | 85222.0 | Documentary | ... | 84.0 | English | USA | PG | 1100.0 | 2004.0 | 23.0 | 6.6 | 1.85 | 456 |
5043 rows × 28 columns
代码语言:javascript复制# 数据多,查看多少 行列
imdb.shape
代码语言:javascript复制(5043, 28)
代码语言:javascript复制# 显示部分(默认返回前五行)
imdb.head()
color | director_name | num_critic_for_reviews | duration | director_facebook_likes | actor_3_facebook_likes | actor_2_name | actor_1_facebook_likes | gross | genres | ... | num_user_for_reviews | language | country | content_rating | budget | title_year | actor_2_facebook_likes | imdb_score | aspect_ratio | movie_facebook_likes | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Color | James Cameron | 723.0 | 178.0 | 0.0 | 855.0 | Joel David Moore | 1000.0 | 760505847.0 | Action|Adventure|Fantasy|Sci-Fi | ... | 3054.0 | English | USA | PG-13 | 237000000.0 | 2009.0 | 936.0 | 7.9 | 1.78 | 33000 |
1 | Color | Gore Verbinski | 302.0 | 169.0 | 563.0 | 1000.0 | Orlando Bloom | 40000.0 | 309404152.0 | Action|Adventure|Fantasy | ... | 1238.0 | English | USA | PG-13 | 300000000.0 | 2007.0 | 5000.0 | 7.1 | 2.35 | 0 |
2 | Color | Sam Mendes | 602.0 | 148.0 | 0.0 | 161.0 | Rory Kinnear | 11000.0 | 200074175.0 | Action|Adventure|Thriller | ... | 994.0 | English | UK | PG-13 | 245000000.0 | 2015.0 | 393.0 | 6.8 | 2.35 | 85000 |
3 | Color | Christopher Nolan | 813.0 | 164.0 | 22000.0 | 23000.0 | Christian Bale | 27000.0 | 448130642.0 | Action|Thriller | ... | 2701.0 | English | USA | PG-13 | 250000000.0 | 2012.0 | 23000.0 | 8.5 | 2.35 | 164000 |
4 | NaN | Doug Walker | NaN | NaN | 131.0 | NaN | Rob Walker | 131.0 | NaN | Documentary | ... | NaN | NaN | NaN | NaN | NaN | NaN | 12.0 | 7.1 | NaN | 0 |
5 rows × 28 columns
代码语言:javascript复制# 显示部分(默认返回后五行)
imdb.tail()
color | director_name | num_critic_for_reviews | duration | director_facebook_likes | actor_3_facebook_likes | actor_2_name | actor_1_facebook_likes | gross | genres | ... | num_user_for_reviews | language | country | content_rating | budget | title_year | actor_2_facebook_likes | imdb_score | aspect_ratio | movie_facebook_likes | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
5038 | Color | Scott Smith | 1.0 | 87.0 | 2.0 | 318.0 | Daphne Zuniga | 637.0 | NaN | Comedy|Drama | ... | 6.0 | English | Canada | NaN | NaN | 2013.0 | 470.0 | 7.7 | NaN | 84 |
5039 | Color | NaN | 43.0 | 43.0 | NaN | 319.0 | Valorie Curry | 841.0 | NaN | Crime|Drama|Mystery|Thriller | ... | 359.0 | English | USA | TV-14 | NaN | NaN | 593.0 | 7.5 | 16.00 | 32000 |
5040 | Color | Benjamin Roberds | 13.0 | 76.0 | 0.0 | 0.0 | Maxwell Moody | 0.0 | NaN | Drama|Horror|Thriller | ... | 3.0 | English | USA | NaN | 1400.0 | 2013.0 | 0.0 | 6.3 | NaN | 16 |
5041 | Color | Daniel Hsia | 14.0 | 100.0 | 0.0 | 489.0 | Daniel Henney | 946.0 | 10443.0 | Comedy|Drama|Romance | ... | 9.0 | English | USA | PG-13 | NaN | 2012.0 | 719.0 | 6.3 | 2.35 | 660 |
5042 | Color | Jon Gunn | 43.0 | 90.0 | 16.0 | 16.0 | Brian Herzlinger | 86.0 | 85222.0 | Documentary | ... | 84.0 | English | USA | PG | 1100.0 | 2004.0 | 23.0 | 6.6 | 1.85 | 456 |
5 rows × 28 columns
代码语言:javascript复制# 显示部分(指定返回行数)
imdb.head(10)
color | director_name | num_critic_for_reviews | duration | director_facebook_likes | actor_3_facebook_likes | actor_2_name | actor_1_facebook_likes | gross | genres | ... | num_user_for_reviews | language | country | content_rating | budget | title_year | actor_2_facebook_likes | imdb_score | aspect_ratio | movie_facebook_likes | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Color | James Cameron | 723.0 | 178.0 | 0.0 | 855.0 | Joel David Moore | 1000.0 | 760505847.0 | Action|Adventure|Fantasy|Sci-Fi | ... | 3054.0 | English | USA | PG-13 | 237000000.0 | 2009.0 | 936.0 | 7.9 | 1.78 | 33000 |
1 | Color | Gore Verbinski | 302.0 | 169.0 | 563.0 | 1000.0 | Orlando Bloom | 40000.0 | 309404152.0 | Action|Adventure|Fantasy | ... | 1238.0 | English | USA | PG-13 | 300000000.0 | 2007.0 | 5000.0 | 7.1 | 2.35 | 0 |
2 | Color | Sam Mendes | 602.0 | 148.0 | 0.0 | 161.0 | Rory Kinnear | 11000.0 | 200074175.0 | Action|Adventure|Thriller | ... | 994.0 | English | UK | PG-13 | 245000000.0 | 2015.0 | 393.0 | 6.8 | 2.35 | 85000 |
3 | Color | Christopher Nolan | 813.0 | 164.0 | 22000.0 | 23000.0 | Christian Bale | 27000.0 | 448130642.0 | Action|Thriller | ... | 2701.0 | English | USA | PG-13 | 250000000.0 | 2012.0 | 23000.0 | 8.5 | 2.35 | 164000 |
4 | NaN | Doug Walker | NaN | NaN | 131.0 | NaN | Rob Walker | 131.0 | NaN | Documentary | ... | NaN | NaN | NaN | NaN | NaN | NaN | 12.0 | 7.1 | NaN | 0 |
5 | Color | Andrew Stanton | 462.0 | 132.0 | 475.0 | 530.0 | Samantha Morton | 640.0 | 73058679.0 | Action|Adventure|Sci-Fi | ... | 738.0 | English | USA | PG-13 | 263700000.0 | 2012.0 | 632.0 | 6.6 | 2.35 | 24000 |
6 | Color | Sam Raimi | 392.0 | 156.0 | 0.0 | 4000.0 | James Franco | 24000.0 | 336530303.0 | Action|Adventure|Romance | ... | 1902.0 | English | USA | PG-13 | 258000000.0 | 2007.0 | 11000.0 | 6.2 | 2.35 | 0 |
7 | Color | Nathan Greno | 324.0 | 100.0 | 15.0 | 284.0 | Donna Murphy | 799.0 | 200807262.0 | Adventure|Animation|Comedy|Family|Fantasy|Musi... | ... | 387.0 | English | USA | PG | 260000000.0 | 2010.0 | 553.0 | 7.8 | 1.85 | 29000 |
8 | Color | Joss Whedon | 635.0 | 141.0 | 0.0 | 19000.0 | Robert Downey Jr. | 26000.0 | 458991599.0 | Action|Adventure|Sci-Fi | ... | 1117.0 | English | USA | PG-13 | 250000000.0 | 2015.0 | 21000.0 | 7.5 | 2.35 | 118000 |
9 | Color | David Yates | 375.0 | 153.0 | 282.0 | 10000.0 | Daniel Radcliffe | 25000.0 | 301956980.0 | Adventure|Family|Fantasy|Mystery | ... | 973.0 | English | UK | PG | 250000000.0 | 2009.0 | 11000.0 | 7.5 | 2.35 | 10000 |
10 rows × 28 columns
代码语言:javascript复制# 返回列 Series类型,生成新的DataFrame
imdb[['color','director_name']]
color | director_name | |
---|---|---|
0 | Color | James Cameron |
1 | Color | Gore Verbinski |
2 | Color | Sam Mendes |
3 | Color | Christopher Nolan |
4 | NaN | Doug Walker |
... | ... | ... |
5038 | Color | Scott Smith |
5039 | Color | NaN |
5040 | Color | Benjamin Roberds |
5041 | Color | Daniel Hsia |
5042 | Color | Jon Gunn |
5043 rows × 2 columns
代码语言:javascript复制# 无法通过head取指定的行范围
row_bf = imdb[['director_name','movie_title','imdb_score']]
row_bf.head()
director_name | movie_title | imdb_score | |
---|---|---|---|
0 | James Cameron | Avatar | 7.9 |
1 | Gore Verbinski | Pirates of the Caribbean: At World's End | 7.1 |
2 | Sam Mendes | Spectre | 6.8 |
3 | Christopher Nolan | The Dark Knight Rises | 8.5 |
4 | Doug Walker | Star Wars: Episode VII - The Force Awakens ... | 7.1 |
# 行列 的范围实现切片
row_bf.iloc[10:20,:]
director_name | movie_title | imdb_score | |
---|---|---|---|
10 | Zack Snyder | Batman v Superman: Dawn of Justice | 6.9 |
11 | Bryan Singer | Superman Returns | 6.1 |
12 | Marc Forster | Quantum of Solace | 6.7 |
13 | Gore Verbinski | Pirates of the Caribbean: Dead Man's Chest | 7.3 |
14 | Gore Verbinski | The Lone Ranger | 6.5 |
15 | Zack Snyder | Man of Steel | 7.2 |
16 | Andrew Adamson | The Chronicles of Narnia: Prince Caspian | 6.6 |
17 | Joss Whedon | The Avengers | 8.1 |
18 | Rob Marshall | Pirates of the Caribbean: On Stranger Tides | 6.7 |
19 | Barry Sonnenfeld | Men in Black 3 | 6.8 |
# 限制列切片
tmp_df = row_bf.iloc[10:20,0:2]
tmp_df
director_name | movie_title | |
---|---|---|
10 | Zack Snyder | Batman v Superman: Dawn of Justice |
11 | Bryan Singer | Superman Returns |
12 | Marc Forster | Quantum of Solace |
13 | Gore Verbinski | Pirates of the Caribbean: Dead Man's Chest |
14 | Gore Verbinski | The Lone Ranger |
15 | Zack Snyder | Man of Steel |
16 | Andrew Adamson | The Chronicles of Narnia: Prince Caspian |
17 | Joss Whedon | The Avengers |
18 | Rob Marshall | Pirates of the Caribbean: On Stranger Tides |
19 | Barry Sonnenfeld | Men in Black 3 |
# iloc对当前数据框的选择范围(index 和label没关系)
tmp_df.iloc[2:4,:]
director_name | movie_title | |
---|---|---|
12 | Marc Forster | Quantum of Solace |
13 | Gore Verbinski | Pirates of the Caribbean: Dead Man's Chest |
# 想通过label名字去定位(loc没有前面的i,通过名字查不受index限制)
tmp_df.loc[15:17,:]
director_name | movie_title | |
---|---|---|
15 | Zack Snyder | Man of Steel |
16 | Andrew Adamson | The Chronicles of Narnia: Prince Caspian |
17 | Joss Whedon | The Avengers |
# 列也可以
tmp_df.loc[15:17,:'director_name']
director_name | |
---|---|
15 | Zack Snyder |
16 | Andrew Adamson |
17 | Joss Whedon |