pandas数据分析练习记录

2022-08-24 12:29:38 浏览数 (1)

pandas数据分析练习

代码语言:javascript复制
# coding=utf-8
"""
    @Project :pachong-master 
    @File    :list_series.py
    @Author  :gaojs
    @Date    :2022/6/5 22:06
    @Blogs   : https://www.gaojs.com.cn
"""
import pandas as pd
import numpy as np


class Pandas:
    """
    pandas类库练习
    """
    def __init__(self):
        pass

    def list_series(self):
        """
        列表转换成series:索引为数字
        :return:
        """
        course = ["语文", "数学", "英语", "计算机"]
        data = pd.Series(data=course)
        print(data)

    def dict_series(self):
        """
        字典转换成series:索引为key值
        :return:
        """
        grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
        data = pd.Series(data=grades)
        print(data)

    def series_list(self):
        """
        series转换成list
        :return:
        """
        grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
        data = pd.Series(data=grades)
        members = data.to_list()
        print(members)

    def series_dataframe(self):
        """
        series转换成dataframe
        :return:
        """
        grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
        data = pd.Series(data=grades)
        df = pd.DataFrame(data, columns=['grade'])
        print(df)

    def numpy_create_series(self):
        """
        numpy创建series
        :return:
        """
        s = pd.Series(
            # 树脂:10-90,间隔10
            np.arange(10, 100, 10),
            # 索引:101-109,间隔1
            index=np.arange(101, 110),
            # 类型:float64
            dtype='float'
        )
        print(s)

    def series_datatype(self):
        """
        数据类型转换
        :return:
        """
        s = pd.Series(
            data=["001", "002", "003", "004"],
            index=list("abcd")
        )
        # s = s.astype(int)
        # 效果一样,map里的int是函数
        s = s.map(int)
        print(s)

    def series_add_ele(self):
        """
        给series添加新元素
        :return:
        """
        grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
        data = pd.Series(data=grades)

        data = data.append(pd.Series({
            "物理": 99,
            "高数": 88
        }))
        print(data)

    def series_to_dataframe(self):
        """
        series转换成dataframe
        :return:
        """
        grades = {"语文": 80, "数学": 90, "英语": 100, "计算机": 95}
        data = pd.Series(data=grades)
        df = data.reset_index()
        df.columns = ['course', 'grade']
        print(df)

    def dict_create_dataframe(self):
        """
        使用字典创建dataframe
        :return:
        """
        df = pd.DataFrame(
            {
                "姓名": ["张三", "李四", "王麻子", "高先生"],
                "性别": ["男", "女", "男", "女"],
                "年龄": [18, 19, 20, 21]
            }
        )
        print(df)

    def set_dataframe_index(self):
        """
        给dataframe设置索引列
        :return:
        """
        df = pd.DataFrame(
            {
                "姓名": ["张三", "李四", "王麻子", "高先生"],
                "性别": ["男", "女", "男", "女"],
                "年龄": [18, 19, 20, 21]
            }
        )
        df.set_index("姓名", inplace=True)
        print(df)
        
    def dataMonth(self):
        """
        一个月份所有日期
        :return:
        """
        # date_range = pd.date_range(start='2022-05-01', end='2022-05-31')
        date_range = pd.date_range(start='2022-05-01', periods=31)
        print(date_range)

    def mondayDate(self):
        """
        一年所有的周一日期
        :return:
        """
        # data_range = pd.date_range(start='2022-01-01', end='2022-12-31', freq='W-Mon')
        data_range = pd.date_range(start='2022-01-01', periods=52, freq='W-Mon')
        print(data_range)

    def dayHourDate(self):
        """
        一日所有的小时
        :return:
        """
        # data_range = pd.date_range(start='2022-01-01', periods=24, freq='H')
        data_range = pd.date_range(start='2022-01-01', end='2022-01-02', freq='H', closed='left')
        print(data_range)

    def dateToDataframe(self):
        """
        日期生成DataFrame
        :return:
        """
        data_range = pd.date_range(start='2022-5-01', periods=31, freq='D')
        # print(data_range)
        df = pd.DataFrame(data=data_range, columns=['day'])
        df['day_of_year'] = df['day'].dt.dayofyear
        print(df)

    def dateToRandomDataFrame(self):
        """
        日期随机生成dataframe:均匀  正态  二项分布
        :return:
        """
        data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
        data = {
            'norm': np.random.normal(loc=0, scale=1, size=1000),
            'unifom': np.random.uniform(low=0, high=1, size=1000),
            'binomial': np.random.binomial(n=1, p=0.2, size=1000)
        }
        df = pd.DataFrame(data=data, index=data_range)
        print(df)

    def logHeadLine(self):
        """
        打印前10行
        :return:
        """
        data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
        data = {
            'norm': np.random.normal(loc=0, scale=1, size=1000),
            'unifom': np.random.uniform(low=0, high=1, size=1000),
            'binomial': np.random.binomial(n=1, p=0.2, size=1000)
        }
        df = pd.DataFrame(data=data, index=data_range)
        print(df.head(10))
        print(df.tail(5))

    def catDataFrame(self):
        """
        查看dataframe基本信息
        :return:
        """
        data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
        data = {
            'norm': np.random.normal(loc=0, scale=1, size=1000),
            'unifom': np.random.uniform(low=0, high=1, size=1000),
            'binomial': np.random.binomial(n=1, p=0.2, size=1000)
        }
        df = pd.DataFrame(data=data, index=data_range)
        print(df.info())
        print(df.describe())

    def countDataAppear(self):
        """
        统计数据出现次数
        :return:
        """
        data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
        data = {
            'norm': np.random.normal(loc=0, scale=1, size=1000),
            'unifom': np.random.uniform(low=0, high=1, size=1000),
            'binomial': np.random.binomial(n=1, p=0.2, size=1000)
        }
        df = pd.DataFrame(data=data, index=data_range)
        print(df['binomial'].value_counts())

    def saveCSV(self):
        """
        保存前一百行存入csv
        :return:
        """
        data_range = pd.date_range(start='2022-1-01', periods=1000, freq='D')
        data = {
            'norm': np.random.normal(loc=0, scale=1, size=1000),
            'unifom': np.random.uniform(low=0, high=1, size=1000),
            'binomial': np.random.binomial(n=1, p=0.2, size=1000)
        }
        df = pd.DataFrame(data=data, index=data_range)
        df.head(10).to_csv('分布数据前50.csv')

    def csvLoadDataframe(self):
        """
        csv加载成dataframe
        :return:
        """
        df = pd.read_csv('分布数据前50.csv', index_col=0)
        print(df.info())
        print(df.head())


# test = Pandas()
# test.csvLoadDataframe()

未完待续

0 人点赞