import datetime as dt
import numpy as np
import pandas as pd
from create_time_feature import create_time_feature as ctf
# 生成日期、时间信息
today = dt.datetime.today()
size = 600
full_time = pd.date_range(end=today, freq='1D 45min 13s 451521us', periods=size)
# 生成交易额信息
np.random.seed(size)
consume_num = np.random.uniform(0, 1000, size)
# 运用datetime.strftime("%Y-%m-%d")从完整的时间中分离出字符串格式的日期、时间
# 运用pd.DatetimeIndex或者pd.to_datetime将字符串格式的日期转化为日期索引
consume_date = pd.DatetimeIndex(full_time.strftime("%Y-%m-%d"))
# 构建数据框
dict_ = {"Amount": consume_num, "Time":full_time}
sales = pd.DataFrame(dict_, index=consume_date)
sales.head()
Amount | Time | |
---|---|---|
2017-07-11 | 32.367275 | 2017-07-11 17:30:14.852224 |
2017-07-12 | 542.409663 | 2017-07-12 18:15:28.303745 |
2017-07-13 | 802.919610 | 2017-07-13 19:00:41.755266 |
2017-07-14 | 55.159403 | 2017-07-14 19:45:55.206787 |
2017-07-15 | 382.264775 | 2017-07-15 20:31:08.658308 |
ctf(sales, sales['Time'], 3).tail(5)
Amount | Is_Month_Start_End | Weekday | Is_Weekend | Week_Order | Season | Hour_of_Day | Time_Range | Day_Order | |
---|---|---|---|---|---|---|---|---|---|
2019-03-17 | 167.892404 | 0 | Sun | 1 | 10 | Spring | 09 | AM | 076 |
2019-03-18 | 728.618652 | 0 | Mon | 0 | 11 | Spring | 10 | AM | 077 |
2019-03-19 | 976.788669 | 0 | Tue | 0 | 11 | Spring | 11 | AM | 078 |
2019-03-20 | 458.933563 | 0 | Wed | 0 | 11 | Spring | 12 | PM | 079 |
2019-03-21 | 421.315669 | 0 | Thu | 0 | 11 | Spring | 12 | PM | 080 |
print(sales.loc["2018-05", "Amount"].sum())
13259.086647654212
sales.resample("3M").agg([np.sum, np.mean])
Amount | Is_Month_Start_End | Is_Weekend | ||||
---|---|---|---|---|---|---|
sum | mean | sum | mean | sum | mean | |
2017-07-31 | 7964.874424 | 398.243721 | 3 | 0.150000 | 6 | 0.300000 |
2017-10-31 | 42624.193038 | 478.923517 | 18 | 0.202247 | 25 | 0.280899 |
2018-01-31 | 52071.987789 | 578.577642 | 16 | 0.177778 | 25 | 0.277778 |
2018-04-30 | 38017.128956 | 442.059639 | 17 | 0.197674 | 26 | 0.302326 |
2018-07-31 | 45222.353345 | 508.116330 | 18 | 0.202247 | 25 | 0.280899 |
2018-10-31 | 44577.379553 | 500.869433 | 18 | 0.202247 | 25 | 0.280899 |
2019-01-31 | 39211.077433 | 440.573904 | 17 | 0.191011 | 25 | 0.280899 |
2019-04-30 | 25348.311909 | 528.089831 | 8 | 0.166667 | 13 | 0.270833 |
sales.pivot_table(index=["Season"], values=["Amount"], columns=["Time_Range"], aggfunc=[np.sum, np.mean])
sum | mean | |||||||
---|---|---|---|---|---|---|---|---|
Amount | Amount | |||||||
Time_Range | AM | Mid Night | Night | PM | AM | Mid Night | Night | PM |
Season | ||||||||
Autumn | 17030.478111 | 28295.772910 | 13636.219565 | 25423.063901 | 500.896415 | 533.882508 | 413.218775 | 529.647165 |
Spring | 20941.083065 | 24287.649816 | 13111.780454 | 25098.884251 | 436.272564 | 495.666323 | 624.370498 | 522.893422 |
Summer | 9287.479980 | 15292.584821 | 6982.025141 | 9745.677805 | 386.978332 | 546.163744 | 436.376571 | 487.283890 |
Winter | 21182.473400 | 22908.261242 | 15040.550491 | 26773.321497 | 460.488552 | 498.005679 | 485.179048 | 486.787664 |
time_sub_dummies = pd.get_dummies(sales["Time_Range"])
# sales.drop("time_sub", axis=1).join(time_sub_dummies).head()
# sales.join(time_sub_dummies).head()
sales["Time_Range"].value_counts()
Mid Night 176 PM 171 AM 152 Night 101 Name: Time_Range, dtype: int64