Use DatetimeIndex
%matplotlib inline
import seaborn as sns
import datetime
import numpy as np
import pandas as pd
# set display number of lines
pd.options.display.max_rows = 10
Generate date range by pandas.date_range
date_range = pd.date_range('2015-1-1', '2016-12-31', freq='B')
date_range
DatetimeIndex(['2015-01-01', '2015-01-02', '2015-01-05', '2015-01-06', '2015-01-07', '2015-01-08', '2015-01-09', '2015-01-12', '2015-01-13', '2015-01-14', ... '2016-12-19', '2016-12-20', '2016-12-21', '2016-12-22', '2016-12-23', '2016-12-26', '2016-12-27', '2016-12-28', '2016-12-29', '2016-12-30'], dtype='datetime64[ns]', length=522, freq='B')
Frequency strings
DateOffset objects
http://pandas.pydata.org/pandas-docs/stable/timeseries.html#dateoffset-objects
文字 | オフセットクラス | 説明 |
---|---|---|
D | Day | 暦通りの日次 |
B | BusinessDay | 毎営業日 |
H | Hour | 毎時 |
Tまたはmin | Minute | 毎分 |
S | Second | 毎秒 |
Lまたはms | Milli | 毎ミリ秒 |
U | Micro | 毎マイクロ秒 |
M | MonthEnd | 暦通りの月末毎 |
BM | BusinessMonthEnd | 月の最終営業日毎 |
MS | MonthBegin | 暦通りの月初毎 |
BMS | BusinessMonthBegin | 月の営業開始日毎 |
W-MON,W-TUE,... | Week | 毎週指定した曜日毎 |
WOM-1MON,WOM-2MON,... | WeekOfMonth | 月の1~4週目の指定した曜日毎 |
Q-JAN,Q-FEB,... | QuarterEnd | 指定した月に年度が変わる前提で、四半期の暦通りの月末毎 |
BQ-JAN,BQ-FEB,... | BusinessQuarterEnd | 指定した月に年度が変わる前提で、四半期の最終営業日毎 |
QS-JAN,QS-FEB,... | QuarterBegin | 指定した月に年度が変わる前提で、四半期の暦通りの月初毎 |
BQS-JAN,BQS-FEB,... | QuarterBegin | 指定した月に年度が変わる前提で、四半期の営業開始日毎 |
A-JAN,A-FEB,... | YearEnd | 1年に1度、指定した月の暦通りの月末毎 |
BA-JAN,BA-FEB,... | BusinessYearEnd | 1年に1度、指定した月の最終営業日毎 |
AS-JAN,AS-FEB,... | YearBegin | 1年に1度、指定した月の暦通りの月初毎 |
BAS-JAN,BAS-FEB,... | YearBegin | 1年に1度、指定した月の営業開始日毎 |
def generate_stock_price(S0, t, r, sigma, path, steps):
dt = float(t) / steps
w = np.cumsum(np.reshape(np.random.standard_normal(path * steps), (path, steps)), 1) * (np.sqrt(dt))
t = np.cumsum(np.ones((path, steps)), 1) * dt
return S0 * np.exp((r - 0.5 * np.power(sigma, 2)) * t + sigma * w)
days = len(date_range)
stock_price = generate_stock_price(1000, days / 365.0, 0.001, 0.2, 3, days)
df = pd.DataFrame(stock_price.T, index=date_range, columns=['price1', 'price2', 'price3'])
df.head()
price1 | price2 | price3 | |
---|---|---|---|
2015-01-01 | 989.740629 | 983.883285 | 1013.366341 |
2015-01-02 | 978.695980 | 985.032079 | 1012.314720 |
2015-01-05 | 994.142925 | 987.058421 | 1035.949139 |
2015-01-06 | 993.850903 | 979.988999 | 1030.669402 |
2015-01-07 | 986.860088 | 991.971306 | 1042.060216 |
df.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f8680460cc0>
select data in datetime.date
df.loc[datetime.date(2016, 1, 1)]
price1 923.276412 price2 1005.628056 price3 1120.888874 Name: 2016-01-01 00:00:00, dtype: float64
select data in datetime.date
df.loc[datetime.datetime(2016, 1, 1, 0, 0)]
price1 923.276412 price2 1005.628056 price3 1120.888874 Name: 2016-01-01 00:00:00, dtype: float64
select data in str
df.loc['2016/1/1']
price1 923.276412 price2 1005.628056 price3 1120.888874 Name: 2016-01-01 00:00:00, dtype: float64
df.loc['20160101']
price1 923.276412 price2 1005.628056 price3 1120.888874 Name: 2016-01-01 00:00:00, dtype: float64
df.loc['Jan 01, 2016']
price1 923.276412 price2 1005.628056 price3 1120.888874 Name: 2016-01-01 00:00:00, dtype: float64
slice data by year
df.loc['2016':].head()
price1 | price2 | price3 | |
---|---|---|---|
2016-01-01 | 923.276412 | 1005.628056 | 1120.888874 |
2016-01-04 | 940.646484 | 998.060468 | 1138.470680 |
2016-01-05 | 933.050665 | 987.750545 | 1149.391673 |
2016-01-06 | 938.822515 | 989.134029 | 1133.833442 |
2016-01-07 | 933.242641 | 981.338873 | 1122.444465 |
slice data by month
df.loc['2015-5'].head()
price1 | price2 | price3 | |
---|---|---|---|
2015-05-01 | 1057.487572 | 912.645588 | 1112.760240 |
2015-05-04 | 1052.539710 | 911.484485 | 1127.899948 |
2015-05-05 | 1065.361094 | 913.593538 | 1121.501779 |
2015-05-06 | 1064.154526 | 917.135357 | 1125.727790 |
2015-05-07 | 1049.255429 | 922.545973 | 1135.515160 |
df.loc['2015-12':'2016-01']
price1 | price2 | price3 | |
---|---|---|---|
2015-12-01 | 901.023452 | 904.539104 | 1129.333413 |
2015-12-02 | 895.479921 | 895.717168 | 1135.919600 |
2015-12-03 | 906.573250 | 905.508623 | 1129.993463 |
2015-12-04 | 910.097565 | 910.095176 | 1135.237073 |
2015-12-07 | 903.014103 | 926.729217 | 1125.134069 |
... | ... | ... | ... |
2016-01-25 | 884.219952 | 978.723119 | 1123.653912 |
2016-01-26 | 896.079473 | 962.906120 | 1115.194525 |
2016-01-27 | 899.893775 | 956.377275 | 1118.549986 |
2016-01-28 | 902.807967 | 966.632343 | 1105.583831 |
2016-01-29 | 909.536833 | 959.223724 | 1101.142623 |
44 rows × 3 columns
possible to include timestampe out of index
df.loc['2016-01-02':'2016-01-10']
price1 | price2 | price3 | |
---|---|---|---|
2016-01-04 | 940.646484 | 998.060468 | 1138.470680 |
2016-01-05 | 933.050665 | 987.750545 | 1149.391673 |
2016-01-06 | 938.822515 | 989.134029 | 1133.833442 |
2016-01-07 | 933.242641 | 981.338873 | 1122.444465 |
2016-01-08 | 942.086665 | 987.300053 | 1124.170540 |
select data in datetime.time type
# Generate 100 hourly data
ser = pd.Series(generate_stock_price(100, 0.2, 0, 0.2, 100, 1).T[0],
index=pd.date_range('2016-01-01', periods=100, freq='H'))
ser.head()
2016-01-01 00:00:00 87.630594 2016-01-01 01:00:00 94.592101 2016-01-01 02:00:00 109.935888 2016-01-01 03:00:00 113.025606 2016-01-01 04:00:00 90.536927 Freq: H, dtype: float64
Select specified time data by datetime.time
ser[datetime.time(9, 0)]
2016-01-01 09:00:00 111.702307 2016-01-02 09:00:00 82.899518 2016-01-03 09:00:00 103.516002 2016-01-04 09:00:00 97.888412 Freq: 24H, dtype: float64
In inside .at_time() is working
ser.at_time(datetime.time(9, 0))
2016-01-01 09:00:00 111.702307 2016-01-02 09:00:00 82.899518 2016-01-03 09:00:00 103.516002 2016-01-04 09:00:00 97.888412 Freq: 24H, dtype: float64
To specify time range using .between_time()
ser.between_time(datetime.time(9, 0), datetime.time(12, 0))
2016-01-01 09:00:00 111.702307 2016-01-01 10:00:00 108.773511 2016-01-01 11:00:00 107.222576 2016-01-01 12:00:00 105.180062 2016-01-02 09:00:00 82.899518 ... 2016-01-03 12:00:00 95.556950 2016-01-04 09:00:00 97.888412 2016-01-04 10:00:00 96.704581 2016-01-04 11:00:00 120.614416 2016-01-04 12:00:00 98.740763 dtype: float64
Create OHLC data and covert time range
Convert daily data into monthly data using .resample()
df['price1'].resample('M').ohlc()
open | high | low | close | |
---|---|---|---|---|
2015-01-31 | 989.740629 | 1041.371901 | 959.080269 | 1041.371901 |
2015-02-28 | 1032.892215 | 1053.838209 | 1002.733878 | 1041.320584 |
2015-03-31 | 1035.319210 | 1056.030027 | 1008.716984 | 1041.224075 |
2015-04-30 | 1026.601846 | 1067.136814 | 1000.930056 | 1057.973435 |
2015-05-31 | 1057.487572 | 1065.361094 | 962.684445 | 962.684445 |
... | ... | ... | ... | ... |
2016-08-31 | 944.495570 | 1038.776056 | 923.672432 | 1038.776056 |
2016-09-30 | 1021.796815 | 1021.796815 | 975.163187 | 975.163187 |
2016-10-31 | 975.251349 | 1032.668315 | 975.251349 | 1032.668315 |
2016-11-30 | 1030.241984 | 1091.790552 | 1009.095186 | 1009.095186 |
2016-12-31 | 994.344228 | 1071.370257 | 994.344228 | 1058.799528 |
24 rows × 4 columns
convert an OHLC data into another OHLC data
extract data from Yahoo Finance into a DataFrame
from pandas_datareader import data
try:
gspc = data.DataReader("^GSPC", 'yahoo', datetime.datetime(2006, 1, 1), datetime.datetime(2015, 12, 31))
except Exception:
gspc = pd.read_msgpack('data/^GSPC.mpack')
gspc.head()
Open | High | Low | Close | Volume | Adj Close | |
---|---|---|---|---|---|---|
Date | ||||||
2006-01-03 | 1248.290039 | 1270.219971 | 1245.739990 | 1268.800049 | 2554570000 | 1268.800049 |
2006-01-04 | 1268.800049 | 1275.369995 | 1267.739990 | 1273.459961 | 2515330000 | 1273.459961 |
2006-01-05 | 1273.459961 | 1276.910034 | 1270.300049 | 1273.479980 | 2433340000 | 1273.479980 |
2006-01-06 | 1273.479980 | 1286.089966 | 1273.479980 | 1285.449951 | 2446560000 | 1285.449951 |
2006-01-09 | 1285.449951 | 1290.780029 | 1284.819946 | 1290.150024 | 2301490000 | 1290.150024 |
Run resample().ohlc()
gspc.resample('W').ohlc().head()
Open | High | Low | ... | Close | Volume | Adj Close | |||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
open | high | low | close | open | high | low | close | open | high | ... | low | close | open | high | low | close | open | high | low | close | |
Date | |||||||||||||||||||||
2006-01-08 | 1248.290039 | 1273.479980 | 1248.290039 | 1273.479980 | 1270.219971 | 1286.089966 | 1270.219971 | 1286.089966 | 1245.739990 | 1273.479980 | ... | 1268.800049 | 1285.449951 | 2554570000 | 2554570000 | 2433340000 | 2446560000 | 1268.800049 | 1285.449951 | 1268.800049 | 1285.449951 |
2006-01-15 | 1285.449951 | 1294.180054 | 1285.449951 | 1286.060059 | 1290.780029 | 1294.900024 | 1288.959961 | 1288.959961 | 1284.819946 | 1288.119995 | ... | 1286.060059 | 1287.609985 | 2301490000 | 2406130000 | 2206510000 | 2206510000 | 1290.150024 | 1294.180054 | 1286.060059 | 1287.609985 |
2006-01-22 | 1287.609985 | 1287.609985 | 1277.930054 | 1285.040039 | 1287.609985 | 1287.790039 | 1282.930054 | 1285.040039 | 1278.609985 | 1278.609985 | ... | 1261.489990 | 1261.489990 | 2179970000 | 2845810000 | 2179970000 | 2845810000 | 1282.930054 | 1285.040039 | 1261.489990 | 1261.489990 |
2006-01-29 | 1261.489990 | 1273.829956 | 1261.489990 | 1273.829956 | 1268.189941 | 1286.380005 | 1268.189941 | 1286.380005 | 1261.489990 | 1273.829956 | ... | 1263.819946 | 1283.719971 | 2256070000 | 2856780000 | 2256070000 | 2623620000 | 1263.819946 | 1283.719971 | 1263.819946 | 1283.719971 |
2006-02-05 | 1283.719971 | 1285.199951 | 1270.839966 | 1270.839966 | 1287.939941 | 1287.939941 | 1270.869995 | 1270.869995 | 1283.510010 | 1283.510010 | ... | 1264.030029 | 1264.030029 | 2282730000 | 2708310000 | 2282210000 | 2282210000 | 1285.189941 | 1285.189941 | 1264.030029 | 1264.030029 |
5 rows × 24 columns
Select applicable data from above results
gspc_weekly_ohlc = gspc.resample('W').ohlc()[[('Open', 'open'), ('High', 'high'), ('Low', 'low'), ('Close', 'close')]]
gspc_weekly_ohlc.columns = gspc_weekly_ohlc.columns.droplevel()
gspc_weekly_ohlc.head()
open | high | low | close | |
---|---|---|---|---|
Date | ||||
2006-01-08 | 1248.290039 | 1286.089966 | 1245.739990 | 1285.449951 |
2006-01-15 | 1285.449951 | 1294.900024 | 1282.780029 | 1287.609985 |
2006-01-22 | 1287.609985 | 1287.790039 | 1260.920044 | 1261.489990 |
2006-01-29 | 1261.489990 | 1286.380005 | 1259.420044 | 1283.719971 |
2006-02-05 | 1283.719971 | 1287.939941 | 1261.020020 | 1264.030029 |
Appendix: visualize candle stick chart by HighCharts
python-highchartsはPython3に対応していないので、forkしたリポジトリからインストール
For Python3
Install python-highcharts from forked repository
git clone -b issue37-Python3 https://github.com/JrtPec/python-highcharts.git
cd python-highcharts
python setup.py install
import charts
gspc['Time'] = gspc.index.astype(np.int64)//1000000
series = [{'type':'candlestick',
'name':'SP500',
'data':gspc[['Time', 'Open', 'High', 'Low', 'Close']].values}]
options = {'rangeSelector':{'selected':0},
'xAxis':{'gridLineWidth':1, 'gridLineDashStyle':'Dot'},
'yAxis':{'gridLineWidth':1, 'gridLineDashStyle':'Dot'}}
charts.plot(series, stock=True, show='inline', options=options, display=['SP500'])
Server running in the folder /notebooks at 127.0.0.1:42122