pandas数据分析练习
代码语言:javascript
复制# coding=utf-8
"""
@Project :pachong-master
@File :list_series.py
@Author :gaojs
@Date :2022/6/5 22:06
@Blogs : https://www.gaojs.com.cn
"""
import pandas as pd
import numpy as np
class Pandas:
"""
pandas类库练习
"""
def __init__(self):
pass
def list_series(self):
"""
列表转换成series:索引为数字
:return:
"""
course = ["语文", "数学", "英语", "计算机"]
data = pd.Series(data=course)
print(data)
def dict_series(self):
"""
字典转换成series:索引为key值
:return:
"""
grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
data = pd.Series(data=grades)
print(data)
def series_list(self):
"""
series转换成list
:return:
"""
grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
data = pd.Series(data=grades)
members = data.to_list()
print(members)
def series_dataframe(self):
"""
series转换成dataframe
:return:
"""
grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
data = pd.Series(data=grades)
df = pd.DataFrame(data, columns=['grade'])
print(df)
def numpy_create_series(self):
"""
numpy创建series
:return:
"""
s = pd.Series(
# 树脂:10-90,间隔10
np.arange(10, 100, 10),
# 索引:101-109,间隔1
index=np.arange(101, 110),
# 类型:float64
dtype='float'
)
print(s)
def series_datatype(self):
"""
数据类型转换
:return:
"""
s = pd.Series(
data=["001", "002", "003", "004"],
index=list("abcd")
)
# s = s.astype(int)
# 效果一样,map里的int是函数
s = s.map(int)
print(s)
def series_add_ele(self):
"""
给series添加新元素
:return:
"""
grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
data = pd.Series(data=grades)
data = data.append(pd.Series({
"物理": 99,
"高数": 88
}))
print(data)
def series_to_dataframe(self):
"""
series转换成dataframe
:return:
"""
grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
data = pd.Series(data=grades)
df = data.reset_index()
df.columns = ['course', 'grade']
print(df)
def dict_create_dataframe(self):
"""
使用字典创建dataframe
:return:
"""
df = pd.DataFrame(
{
"姓名": ["张三", "李四", "王麻子", "高先生"],
"性别": ["男", "女", "男", "女"],
"年龄": [18, 19, 20, 21]
}
)
print(df)
def set_dataframe_index(self):
"""
给dataframe设置索引列
:return:
"""
df = pd.DataFrame(
{
"姓名": ["张三", "李四", "王麻子", "高先生"],
"性别": ["男", "女", "男", "女"],
"年龄": [18, 19, 20, 21]
}
)
df.set_index("姓名", inplace=True)
print(df)
def dataMonth(self):
"""
一个月份所有日期
:return:
"""
# date_range = pd.date_range(start='2022-05-01', end='2022-05-31')
date_range = pd.date_range(start='2022-05-01', periods=31)
print(date_range)
def mondayDate(self):
"""
一年所有的周一日期
:return:
"""
# data_range = pd.date_range(start='2022-01-01', end='2022-12-31', freq='W-Mon')
data_range = pd.date_range(start='2022-01-01', periods=52, freq='W-Mon')
print(data_range)
def dayHourDate(self):
"""
一日所有的小时
:return:
"""
# data_range = pd.date_range(start='2022-01-01', periods=24, freq='H')
data_range = pd.date_range(start='2022-01-01', end='2022-01-02', freq='H', closed='left')
print(data_range)
def dateToDataframe(self):
"""
日期生成DataFrame
:return:
"""
data_range = pd.date_range(start='2022-5-01', periods=31, freq='D')
# print(data_range)
df = pd.DataFrame(data=data_range, columns=['day'])
df['day_of_year'] = df['day'].dt.dayofyear
print(df)
def dateToRandomDataFrame(self):
"""
日期随机生成dataframe:均匀 正态 二项分布
:return:
"""
data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
data = {
'norm': np.random.normal(loc=0, scale=1, size=1000),
'unifom': np.random.uniform(low=0, high=1, size=1000),
'binomial': np.random.binomial(n=1, p=0.2, size=1000)
}
df = pd.DataFrame(data=data, index=data_range)
print(df)
def logHeadLine(self):
"""
打印前10行
:return:
"""
data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
data = {
'norm': np.random.normal(loc=0, scale=1, size=1000),
'unifom': np.random.uniform(low=0, high=1, size=1000),
'binomial': np.random.binomial(n=1, p=0.2, size=1000)
}
df = pd.DataFrame(data=data, index=data_range)
print(df.head(10))
print(df.tail(5))
def catDataFrame(self):
"""
查看dataframe基本信息
:return:
"""
data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
data = {
'norm': np.random.normal(loc=0, scale=1, size=1000),
'unifom': np.random.uniform(low=0, high=1, size=1000),
'binomial': np.random.binomial(n=1, p=0.2, size=1000)
}
df = pd.DataFrame(data=data, index=data_range)
print(df.info())
print(df.describe())
def countDataAppear(self):
"""
统计数据出现次数
:return:
"""
data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
data = {
'norm': np.random.normal(loc=0, scale=1, size=1000),
'unifom': np.random.uniform(low=0, high=1, size=1000),
'binomial': np.random.binomial(n=1, p=0.2, size=1000)
}
df = pd.DataFrame(data=data, index=data_range)
print(df['binomial'].value_counts())
def saveCSV(self):
"""
保存前一百行存入csv
:return:
"""
data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
data = {
'norm': np.random.normal(loc=0, scale=1, size=1000),
'unifom': np.random.uniform(low=0, high=1, size=1000),
'binomial': np.random.binomial(n=1, p=0.2, size=1000)
}
df = pd.DataFrame(data=data, index=data_range)
df.head(10).to_csv('分布数据前50.csv')
def csvLoadDataframe(self):
"""
csv加载成dataframe
:return:
"""
df = pd.read_csv('分布数据前50.csv', index_col=0)
print(df.info())
print(df.head())
# test = Pandas()
# test.csvLoadDataframe()
未完待续