import numpy as np
import pandas as pd
from pandas import Series, DataFrame
代码语言:javascript复制 # 引入网页
import webbrowser
link = 'https://www.tiobe.com/tiobe-index/'
代码语言:javascript复制webbrowser.open(link)
代码语言:javascript复制True
代码语言:javascript复制# 获取剪贴板数据
df = pd.read_clipboard()
df
Year | Winner | |
---|---|---|
2019 | medal | C |
2018 | medal | Python |
2017 | medal | C |
2016 | medal | Go |
2015 | medal | Java |
2014 | medal | JavaScript |
2013 | medal | Transact-SQL |
2012 | medal | Objective-C |
2011 | medal | Objective-C |
2010 | medal | Python |
2009 | medal | Go |
2008 | medal | C |
2007 | medal | Python |
2006 | medal | Ruby |
2005 | medal | Java |
2004 | medal | PHP |
2003 | medal | C |
# 查看类型-DataFrame
type(df)
代码语言:javascript复制pandas.core.frame.DataFrame
代码语言:javascript复制# 返回列
df.columns
代码语言:javascript复制Index(['Year', 'Winner'], dtype='object')
代码语言:javascript复制# 获取某列的value
df.Winner
代码语言:javascript复制2019 C
2018 Python
2017 C
2016 Go
2015 Java
2014 JavaScript
2013 Transact-SQL
2012 Objective-C
2011 Objective-C
2010 Python
2009 Go
2008 C
2007 Python
2006 Ruby
2005 Java
2004 PHP
2003 C
Name: Winner, dtype: object
代码语言:javascript复制# 提取数据生成新 过滤DataFrame
df_new = DataFrame(df, columns=['Year'])
df_new
Year | |
---|---|
2019 | medal |
2018 | medal |
2017 | medal |
2016 | medal |
2015 | medal |
2014 | medal |
2013 | medal |
2012 | medal |
2011 | medal |
2010 | medal |
2009 | medal |
2008 | medal |
2007 | medal |
2006 | medal |
2005 | medal |
2004 | medal |
2003 | medal |
# 假如列名有空格,用此方法读取
df_new['Year']
代码语言:javascript复制2019 medal
2018 medal
2017 medal
2016 medal
2015 medal
2014 medal
2013 medal
2012 medal
2011 medal
2010 medal
2009 medal
2008 medal
2007 medal
2006 medal
2005 medal
2004 medal
2003 medal
Name: Year, dtype: object
代码语言:javascript复制# 其中某列的类型 Series
type(df_new['Year'])
代码语言:javascript复制pandas.core.series.Series
代码语言:javascript复制# 提取数据生成新 过滤DataFrame
# 如果列名根本不存在 值默认为NaN
df_new = DataFrame(df, columns=['Year','Age'])
df_new
Year | Age | |
---|---|---|
2019 | medal | NaN |
2018 | medal | NaN |
2017 | medal | NaN |
2016 | medal | NaN |
2015 | medal | NaN |
2014 | medal | NaN |
2013 | medal | NaN |
2012 | medal | NaN |
2011 | medal | NaN |
2010 | medal | NaN |
2009 | medal | NaN |
2008 | medal | NaN |
2007 | medal | NaN |
2006 | medal | NaN |
2005 | medal | NaN |
2004 | medal | NaN |
2003 | medal | NaN |
# 通过字典方式给空值赋值(必须全部写满列)
df_new['Age'] = range(0,17)
df_new
Year | Age | |
---|---|---|
2019 | medal | 0 |
2018 | medal | 1 |
2017 | medal | 2 |
2016 | medal | 3 |
2015 | medal | 4 |
2014 | medal | 5 |
2013 | medal | 6 |
2012 | medal | 7 |
2011 | medal | 8 |
2010 | medal | 9 |
2009 | medal | 10 |
2008 | medal | 11 |
2007 | medal | 12 |
2006 | medal | 13 |
2005 | medal | 14 |
2004 | medal | 15 |
2003 | medal | 16 |
df_new['Age'] = np.arange(0,17)
df_new
Year | Age | |
---|---|---|
2019 | medal | 0 |
2018 | medal | 1 |
2017 | medal | 2 |
2016 | medal | 3 |
2015 | medal | 4 |
2014 | medal | 5 |
2013 | medal | 6 |
2012 | medal | 7 |
2011 | medal | 8 |
2010 | medal | 9 |
2009 | medal | 10 |
2008 | medal | 11 |
2007 | medal | 12 |
2006 | medal | 13 |
2005 | medal | 14 |
2004 | medal | 15 |
2003 | medal | 16 |
# 利用DataFrame某列返回值是Series类型的特点
df_new['Age'] = pd.Series(np.arange(0,17))
df_new
Year | Age | |
---|---|---|
2019 | NaN | NaN |
2018 | NaN | NaN |
2017 | NaN | NaN |
2016 | NaN | NaN |
2015 | NaN | NaN |
2014 | NaN | NaN |
2013 | NaN | NaN |
2012 | NaN | NaN |
2011 | NaN | NaN |
2010 | NaN | NaN |
2009 | NaN | NaN |
2008 | NaN | NaN |
2007 | NaN | NaN |
2006 | NaN | NaN |
2005 | NaN | NaN |
2004 | NaN | NaN |
2003 | NaN | NaN |
# 修改部分值
df_new['Age'] = pd.Series([18, 17], index=[2019, 2018])
df_new
Year | Age | |
---|---|---|
2019 | NaN | 18.0 |
2018 | NaN | 17.0 |
2017 | NaN | NaN |
2016 | NaN | NaN |
2015 | NaN | NaN |
2014 | NaN | NaN |
2013 | NaN | NaN |
2012 | NaN | NaN |
2011 | NaN | NaN |
2010 | NaN | NaN |
2009 | NaN | NaN |
2008 | NaN | NaN |
2007 | NaN | NaN |
2006 | NaN | NaN |
2005 | NaN | NaN |
2004 | NaN | NaN |
2003 | NaN | NaN |