【数据分析与可视化】Pandas Dataframe

2020-07-07 20:12:15 浏览数 (1)

代码语言:javascript复制
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
代码语言:javascript复制
 # 引入网页
import webbrowser
link = 'https://www.tiobe.com/tiobe-index/'
代码语言:javascript复制
webbrowser.open(link)
代码语言:javascript复制
True
代码语言:javascript复制
# 获取剪贴板数据
df = pd.read_clipboard()
df

Year

Winner

2019

medal

C

2018

medal

Python

2017

medal

C

2016

medal

Go

2015

medal

Java

2014

medal

JavaScript

2013

medal

Transact-SQL

2012

medal

Objective-C

2011

medal

Objective-C

2010

medal

Python

2009

medal

Go

2008

medal

C

2007

medal

Python

2006

medal

Ruby

2005

medal

Java

2004

medal

PHP

2003

medal

C

代码语言:javascript复制
# 查看类型-DataFrame
type(df)
代码语言:javascript复制
pandas.core.frame.DataFrame
代码语言:javascript复制
# 返回列
df.columns
代码语言:javascript复制
Index(['Year', 'Winner'], dtype='object')
代码语言:javascript复制
# 获取某列的value
df.Winner
代码语言:javascript复制
2019               C
2018          Python
2017               C
2016              Go
2015            Java
2014      JavaScript
2013    Transact-SQL
2012     Objective-C
2011     Objective-C
2010          Python
2009              Go
2008               C
2007          Python
2006            Ruby
2005            Java
2004             PHP
2003             C  
Name: Winner, dtype: object
代码语言:javascript复制
# 提取数据生成新 过滤DataFrame
df_new = DataFrame(df, columns=['Year'])
df_new

Year

2019

medal

2018

medal

2017

medal

2016

medal

2015

medal

2014

medal

2013

medal

2012

medal

2011

medal

2010

medal

2009

medal

2008

medal

2007

medal

2006

medal

2005

medal

2004

medal

2003

medal

代码语言:javascript复制
# 假如列名有空格,用此方法读取
df_new['Year']
代码语言:javascript复制
2019    medal
2018    medal
2017    medal
2016    medal
2015    medal
2014    medal
2013    medal
2012    medal
2011    medal
2010    medal
2009    medal
2008    medal
2007    medal
2006    medal
2005    medal
2004    medal
2003    medal
Name: Year, dtype: object
代码语言:javascript复制
# 其中某列的类型 Series
type(df_new['Year'])
代码语言:javascript复制
pandas.core.series.Series
代码语言:javascript复制
# 提取数据生成新 过滤DataFrame
# 如果列名根本不存在 值默认为NaN
df_new = DataFrame(df, columns=['Year','Age'])
df_new

Year

Age

2019

medal

NaN

2018

medal

NaN

2017

medal

NaN

2016

medal

NaN

2015

medal

NaN

2014

medal

NaN

2013

medal

NaN

2012

medal

NaN

2011

medal

NaN

2010

medal

NaN

2009

medal

NaN

2008

medal

NaN

2007

medal

NaN

2006

medal

NaN

2005

medal

NaN

2004

medal

NaN

2003

medal

NaN

代码语言:javascript复制
# 通过字典方式给空值赋值(必须全部写满列)
df_new['Age'] = range(0,17)
df_new

Year

Age

2019

medal

0

2018

medal

1

2017

medal

2

2016

medal

3

2015

medal

4

2014

medal

5

2013

medal

6

2012

medal

7

2011

medal

8

2010

medal

9

2009

medal

10

2008

medal

11

2007

medal

12

2006

medal

13

2005

medal

14

2004

medal

15

2003

medal

16

代码语言:javascript复制
df_new['Age'] = np.arange(0,17)
df_new

Year

Age

2019

medal

0

2018

medal

1

2017

medal

2

2016

medal

3

2015

medal

4

2014

medal

5

2013

medal

6

2012

medal

7

2011

medal

8

2010

medal

9

2009

medal

10

2008

medal

11

2007

medal

12

2006

medal

13

2005

medal

14

2004

medal

15

2003

medal

16

代码语言:javascript复制
# 利用DataFrame某列返回值是Series类型的特点
df_new['Age'] = pd.Series(np.arange(0,17))
df_new

Year

Age

2019

NaN

NaN

2018

NaN

NaN

2017

NaN

NaN

2016

NaN

NaN

2015

NaN

NaN

2014

NaN

NaN

2013

NaN

NaN

2012

NaN

NaN

2011

NaN

NaN

2010

NaN

NaN

2009

NaN

NaN

2008

NaN

NaN

2007

NaN

NaN

2006

NaN

NaN

2005

NaN

NaN

2004

NaN

NaN

2003

NaN

NaN

代码语言:javascript复制
# 修改部分值
df_new['Age'] = pd.Series([18, 17], index=[2019, 2018])
df_new

Year

Age

2019

NaN

18.0

2018

NaN

17.0

2017

NaN

NaN

2016

NaN

NaN

2015

NaN

NaN

2014

NaN

NaN

2013

NaN

NaN

2012

NaN

NaN

2011

NaN

NaN

2010

NaN

NaN

2009

NaN

NaN

2008

NaN

NaN

2007

NaN

NaN

2006

NaN

NaN

2005

NaN

NaN

2004

NaN

NaN

2003

NaN

NaN

0 人点赞