import numpy as np
import pandas as pd
from pandas import Series,DataFrame
代码语言:javascript复制# 生成时间索引的Series序列
t = pd.date_range('2019-01-01','2019-12-29')
t
代码语言:javascript复制DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04',
'2019-01-05', '2019-01-06', '2019-01-07', '2019-01-08',
'2019-01-09', '2019-01-10',
...
'2019-12-20', '2019-12-21', '2019-12-22', '2019-12-23',
'2019-12-24', '2019-12-25', '2019-12-26', '2019-12-27',
'2019-12-28', '2019-12-29'],
dtype='datetime64[ns]', length=363, freq='D')
代码语言:javascript复制s1 = Series(np.random.randn(len(t)), index=t)
s1
代码语言:javascript复制2019-01-01 -0.951227
2019-01-02 0.761531
2019-01-03 0.146341
2019-01-04 0.249194
2019-01-05 -0.437687
...
2019-12-25 0.169545
2019-12-26 3.220068
2019-12-27 1.515246
2019-12-28 -0.622776
2019-12-29 0.609221
Freq: D, Length: 363, dtype: float64
代码语言:javascript复制# 采样月份数据
# 其中一个月份
s1['2019-01']
代码语言:javascript复制2019-01-01 -0.951227
2019-01-02 0.761531
2019-01-03 0.146341
2019-01-04 0.249194
2019-01-05 -0.437687
2019-01-06 1.186244
2019-01-07 0.974844
2019-01-08 0.521287
2019-01-09 1.715429
2019-01-10 2.260809
2019-01-11 0.758895
2019-01-12 -1.564395
2019-01-13 -0.505064
2019-01-14 -0.585892
2019-01-15 0.055110
2019-01-16 -0.610403
2019-01-17 0.525841
2019-01-18 -1.189281
2019-01-19 -2.111323
2019-01-20 0.326681
2019-01-21 0.157808
2019-01-22 -1.162134
2019-01-23 0.230476
2019-01-24 1.347033
2019-01-25 0.909771
2019-01-26 -0.033264
2019-01-27 -0.307241
2019-01-28 -1.847770
2019-01-29 -0.222650
2019-01-30 1.248396
2019-01-31 -0.051844
Freq: D, dtype: float64
代码语言:javascript复制# 一个月份的平均值 (将值放进新的Series,但pd实现了一个更方便的方法)
s1['2019-01'].mean()
代码语言:javascript复制0.05791979036590383
代码语言:javascript复制# pd实现了时间采样(天数据->月数据)
s_m1 = s1.resample('M').mean()
s_m1
代码语言:javascript复制2019-01-31 0.057920
2019-02-28 0.146369
2019-03-31 0.010041
2019-04-30 0.000835
2019-05-31 -0.125909
2019-06-30 0.159881
2019-07-31 0.189943
2019-08-31 -0.337287
2019-09-30 0.005125
2019-10-31 -0.132957
2019-11-30 0.076836
2019-12-31 0.203451
Freq: M, dtype: float64
代码语言:javascript复制# (天->小时)resample提供了填充数据的几种方式 (ffill数据向前填充)
s1.resample('H').ffill()
代码语言:javascript复制2019-01-01 00:00:00 -0.951227
2019-01-01 01:00:00 -0.951227
2019-01-01 02:00:00 -0.951227
2019-01-01 03:00:00 -0.951227
2019-01-01 04:00:00 -0.951227
...
2019-12-28 20:00:00 -0.622776
2019-12-28 21:00:00 -0.622776
2019-12-28 22:00:00 -0.622776
2019-12-28 23:00:00 -0.622776
2019-12-29 00:00:00 0.609221
Freq: H, Length: 8689, dtype: float64
代码语言:javascript复制# 数据向后填(01-01 01 数据来自于2-1)
s1.resample('H').bfill()
代码语言:javascript复制2019-01-01 00:00:00 -0.951227
2019-01-01 01:00:00 0.761531
2019-01-01 02:00:00 0.761531
2019-01-01 03:00:00 0.761531
2019-01-01 04:00:00 0.761531
...
2019-12-28 20:00:00 0.609221
2019-12-28 21:00:00 0.609221
2019-12-28 22:00:00 0.609221
2019-12-28 23:00:00 0.609221
2019-12-29 00:00:00 0.609221
Freq: H, Length: 8689, dtype: float64
模拟构建时间序列图
代码语言:javascript复制# 一年按小时生成数据存入DataFrame
t2 = pd.date_range('2019-01-01','2019-12-29', freq='H')
t2
代码语言:javascript复制DatetimeIndex(['2019-01-01 00:00:00', '2019-01-01 01:00:00',
'2019-01-01 02:00:00', '2019-01-01 03:00:00',
'2019-01-01 04:00:00', '2019-01-01 05:00:00',
'2019-01-01 06:00:00', '2019-01-01 07:00:00',
'2019-01-01 08:00:00', '2019-01-01 09:00:00',
...
'2019-12-28 15:00:00', '2019-12-28 16:00:00',
'2019-12-28 17:00:00', '2019-12-28 18:00:00',
'2019-12-28 19:00:00', '2019-12-28 20:00:00',
'2019-12-28 21:00:00', '2019-12-28 22:00:00',
'2019-12-28 23:00:00', '2019-12-29 00:00:00'],
dtype='datetime64[ns]', length=8689, freq='H')
代码语言:javascript复制df = DataFrame(index=t2)
df
2019-01-01 00:00:00 |
2019-01-01 01:00:00 |
2019-01-01 02:00:00 |
2019-01-01 03:00:00 |
2019-01-01 04:00:00 |
... |
2019-12-28 20:00:00 |
2019-12-28 21:00:00 |
2019-12-28 22:00:00 |
2019-12-28 23:00:00 |
2019-12-29 00:00:00 |
8689 rows × 0 columns
代码语言:javascript复制# 插入xx公司股票数据
df['AL'] = np.random.randint(80, 160, size=len(t2))
df
AL | |
---|---|
2019-01-01 00:00:00 | 116 |
2019-01-01 01:00:00 | 102 |
2019-01-01 02:00:00 | 124 |
2019-01-01 03:00:00 | 81 |
2019-01-01 04:00:00 | 152 |
... | ... |
2019-12-28 20:00:00 | 114 |
2019-12-28 21:00:00 | 91 |
2019-12-28 22:00:00 | 89 |
2019-12-28 23:00:00 | 159 |
2019-12-29 00:00:00 | 133 |
8689 rows × 1 columns
代码语言:javascript复制df['TC'] = np.random.randint(30,50, size=len(t2))
df
AL | TC | |
---|---|---|
2019-01-01 00:00:00 | 116 | 40 |
2019-01-01 01:00:00 | 102 | 43 |
2019-01-01 02:00:00 | 124 | 33 |
2019-01-01 03:00:00 | 81 | 46 |
2019-01-01 04:00:00 | 152 | 49 |
... | ... | ... |
2019-12-28 20:00:00 | 114 | 44 |
2019-12-28 21:00:00 | 91 | 33 |
2019-12-28 22:00:00 | 89 | 44 |
2019-12-28 23:00:00 | 159 | 35 |
2019-12-29 00:00:00 | 133 | 36 |
8689 rows × 2 columns
代码语言:javascript复制df.plot()
代码语言:javascript复制<matplotlib.axes._subplots.AxesSubplot at 0x128151110>
代码语言:javascript复制# 画图
import matplotlib.pyplot as plt
plt.show()
代码语言:javascript复制# 改成每周采样
week_df = DataFrame()
week_df['AL'] = df['AL'].resample('W').mean()
week_df['TC'] = df['TC'].resample('W').mean()
week_df
AL | TC | |
---|---|---|
2019-01-06 | 119.333333 | 39.958333 |
2019-01-13 | 121.142857 | 39.988095 |
2019-01-20 | 119.053571 | 38.898810 |
2019-01-27 | 120.613095 | 38.339286 |
2019-02-03 | 118.833333 | 39.625000 |
2019-02-10 | 115.517857 | 39.392857 |
2019-02-17 | 120.738095 | 38.755952 |
2019-02-24 | 119.440476 | 38.809524 |
2019-03-03 | 122.345238 | 39.470238 |
2019-03-10 | 121.827381 | 39.738095 |
2019-03-17 | 120.660714 | 39.226190 |
2019-03-24 | 118.303571 | 39.011905 |
2019-03-31 | 119.047619 | 39.672619 |
2019-04-07 | 121.892857 | 38.654762 |
2019-04-14 | 120.613095 | 38.761905 |
2019-04-21 | 118.327381 | 38.791667 |
2019-04-28 | 119.023810 | 40.261905 |
2019-05-05 | 121.297619 | 39.392857 |
2019-05-12 | 120.130952 | 39.238095 |
2019-05-19 | 121.577381 | 39.696429 |
2019-05-26 | 120.148810 | 40.107143 |
2019-06-02 | 118.940476 | 39.458333 |
2019-06-09 | 119.821429 | 39.607143 |
2019-06-16 | 116.351190 | 39.386905 |
2019-06-23 | 118.755952 | 39.619048 |
2019-06-30 | 117.404762 | 39.142857 |
2019-07-07 | 119.898810 | 39.934524 |
2019-07-14 | 118.125000 | 39.559524 |
2019-07-21 | 117.690476 | 39.255952 |
2019-07-28 | 119.113095 | 39.279762 |
2019-08-04 | 118.696429 | 39.357143 |
2019-08-11 | 119.642857 | 39.208333 |
2019-08-18 | 121.511905 | 39.863095 |
2019-08-25 | 117.261905 | 40.125000 |
2019-09-01 | 123.285714 | 40.404762 |
2019-09-08 | 118.470238 | 39.940476 |
2019-09-15 | 116.636905 | 39.107143 |
2019-09-22 | 116.702381 | 39.226190 |
2019-09-29 | 121.327381 | 40.297619 |
2019-10-06 | 120.833333 | 39.410714 |
2019-10-13 | 119.505952 | 38.982143 |
2019-10-20 | 119.946429 | 41.017857 |
2019-10-27 | 118.988095 | 39.482143 |
2019-11-03 | 117.994048 | 39.440476 |
2019-11-10 | 115.077381 | 39.803571 |
2019-11-17 | 119.732143 | 40.238095 |
2019-11-24 | 116.035714 | 38.815476 |
2019-12-01 | 118.250000 | 39.910714 |
2019-12-08 | 120.005952 | 39.434524 |
2019-12-15 | 121.797619 | 40.261905 |
2019-12-22 | 120.553571 | 39.702381 |
2019-12-29 | 121.703448 | 39.724138 |
week_df.plot()
代码语言:javascript复制<matplotlib.axes._subplots.AxesSubplot at 0x127e82f90>