import numpy as np
import pandas as pd
from highcharts import Highstock
ヒストリカルデータ取得 :: デューカスコピー・ジャパン | 自動売買も裁量もJForex!
$ head USDJPY_1\ Min_Bid_2000.01.01_2018.01.01.csv
Time (UTC),Open,High,Low,Close,Volume
2003.05.04 21:00:00,118.94,118.952,118.94,118.952,253
2003.05.04 21:01:00,118.961,118.967,118.958,118.967,154.6
2003.05.04 21:02:00,118.972,118.972,118.955,118.955,219.7
2003.05.04 21:03:00,118.953,118.961,118.949,118.949,309.9
2003.05.04 21:04:00,118.953,118.953,118.946,118.946,229.4
2003.05.04 21:05:00,118.952,118.954,118.944,118.944,112.2
2003.05.04 21:06:00,118.95,118.952,118.945,118.945,170.2
2003.05.04 21:07:00,118.947,118.956,118.947,118.947,124.5
2003.05.04 21:08:00,118.946,118.954,118.934,118.934,355
$ wc -l USDJPY_1\ Min_Bid_2000.01.01_2018.01.01.csv
5509561 USDJPY_1 Min_Bid_2000.01.01_2018.01.01.csv
filepath1 = 'USDJPY_1 Min_Bid_2000.01.01_2018.01.01.csv'
dtype1 = { 'time': str, 'open': float, 'high': float, 'low': float, 'close': float, 'volume': float }
names1 = ['time', 'open', 'high', 'low', 'close', 'volume']
%time df = pd.read_csv(filepath_or_buffer=filepath1, dtype=dtype1, header=0, index_col='time', names=names1, parse_dates=['time'])
# CPU times: user 15.5 s, sys: 3.77 s, total: 19.2 s
# Wall time: 20.5 s
df.head()
CPU times: user 15.4 s, sys: 3.3 s, total: 18.7 s Wall time: 18.9 s
open | high | low | close | volume | |
---|---|---|---|---|---|
time | |||||
2003-05-04 21:00:00 | 118.940 | 118.952 | 118.940 | 118.952 | 253.0 |
2003-05-04 21:01:00 | 118.961 | 118.967 | 118.958 | 118.967 | 154.6 |
2003-05-04 21:02:00 | 118.972 | 118.972 | 118.955 | 118.955 | 219.7 |
2003-05-04 21:03:00 | 118.953 | 118.961 | 118.949 | 118.949 | 309.9 |
2003-05-04 21:04:00 | 118.953 | 118.953 | 118.946 | 118.946 | 229.4 |
UTC → UTC+3
#%time pd.DataFrame({ 'open': df['open'], 'high': df['high'], 'low': df['low'], 'close': df['close'], 'volume': df['volume'] }, index=df.index + pd.DateOffset(hours=3))
#%time pd.DataFrame({ 'open': df['open'].values, 'high': df['high'].values, 'low': df['low'].values, 'close': df['close'].values, 'volume': df['volume'].values }, index=df.index + pd.DateOffset(hours=3))
# index を変える場合、 Series だと DataFrame を生成するのに 6 秒程度、 ndarray だと DataFrame を生成するのに 1 秒程度
data2 = { 'open': df['open'].values, 'high': df['high'].values, 'low': df['low'].values, 'close': df['close'].values, 'volume': df['volume'].values }
columns2 = ['open', 'high', 'low', 'close', 'volume']
index2 = df.index + pd.DateOffset(hours=3)
%time df2 = pd.DataFrame(data=data2, columns=columns2, index=index2)
# CPU times: user 62.5 ms, sys: 453 ms, total: 516 ms
# Wall time: 524 ms
df2.head()
CPU times: user 78.1 ms, sys: 578 ms, total: 656 ms Wall time: 662 ms
open | high | low | close | volume | |
---|---|---|---|---|---|
time | |||||
2003-05-05 00:00:00 | 118.940 | 118.952 | 118.940 | 118.952 | 253.0 |
2003-05-05 00:01:00 | 118.961 | 118.967 | 118.958 | 118.967 | 154.6 |
2003-05-05 00:02:00 | 118.972 | 118.972 | 118.955 | 118.955 | 219.7 |
2003-05-05 00:03:00 | 118.953 | 118.961 | 118.949 | 118.949 | 309.9 |
2003-05-05 00:04:00 | 118.953 | 118.953 | 118.946 | 118.946 | 229.4 |
# 日足の始値、高値、安値、終値を計算する
# Series ごとに始値、高値、安値、終値の計算結果が得られる
# 始値を参照する場合は df['open']['open'] のように記述する
%time df3 = df2.resample(rule='D').ohlc()
# CPU times: user 250 ms, sys: 78.1 ms, total: 328 ms
# Wall time: 348 ms
# 日足の出来高を計算する
%time df4 = df2.resample(rule='D').sum()
# CPU times: user 156 ms, sys: 93.8 ms, total: 250 ms
# Wall time: 241 ms
# 日足の DataFrame
# 土日も計算結果に含まれるため dropna で取り除く
data5 = { 'open': df3['open']['open'].values, 'high': df3['high']['high'].values, 'low': df3['low']['low'].values, 'close': df3['close']['close'].values, 'volume': df4['volume'].values }
columns5 = ['open', 'high', 'low', 'close', 'volume']
%time df5 = pd.DataFrame(data=data5, columns=columns5, index=df3.index).dropna()
# CPU times: user 0 ns, sys: 0 ns, total: 0 ns
# Wall time: 2.91 ms
df5.head()
CPU times: user 250 ms, sys: 125 ms, total: 375 ms Wall time: 410 ms CPU times: user 203 ms, sys: 78.1 ms, total: 281 ms Wall time: 309 ms CPU times: user 15.6 ms, sys: 0 ns, total: 15.6 ms Wall time: 3.83 ms
open | high | low | close | volume | |
---|---|---|---|---|---|
time | |||||
2003-05-05 | 118.940 | 119.046 | 118.461 | 118.603 | 592866.9 |
2003-05-06 | 118.591 | 118.751 | 117.290 | 117.500 | 581707.0 |
2003-05-07 | 117.456 | 117.830 | 116.052 | 116.303 | 584496.2 |
2003-05-08 | 116.311 | 116.969 | 115.940 | 116.823 | 588236.7 |
2003-05-09 | 116.835 | 117.612 | 116.794 | 117.151 | 583132.9 |
# high = pd.rolling_max(df2, 20)
# /path/to/dir/lib/python3.5/site-packages/ipykernel_launcher.py:1: FutureWarning: pd.rolling_max is deprecated for DataFrame and will be removed in a future version, replace with
# DataFrame.rolling(window=20,center=False).max()
# """Entry point for launching an IPython kernel.
# pd.rolling_max と pd.rolling_min は deprecated になったみたい
period = 20
# %time df5.rolling(window=period).max()
# CPU times: user 0 ns, sys: 0 ns, total: 0 ns
# Wall time: 48.3 ms
%time high = df5['high'].rolling(window=period).max()
%time low = df5['low'].rolling(window=period).min()
%time pd.DataFrame(data={ 'high': high, 'low': low }).tail()
CPU times: user 0 ns, sys: 0 ns, total: 0 ns Wall time: 4.02 ms CPU times: user 0 ns, sys: 0 ns, total: 0 ns Wall time: 1.34 ms CPU times: user 15.6 ms, sys: 15.6 ms, total: 31.2 ms Wall time: 2.93 ms
high | low | |
---|---|---|
time | ||
2017-12-28 | 113.75 | 111.993 |
2017-12-29 | 113.75 | 112.030 |
2017-12-30 | 113.75 | 112.030 |
2018-01-01 | 113.75 | 112.030 |
2018-01-02 | 113.75 | 112.030 |
time6 = (df5.index.values // pd.Timedelta('1ms')).astype(np.int64)
data6 = { 'time': time6, 'open': df5['open'].values, 'high': df5['high'].values, 'low': df5['low'].values, 'close': df5['close'].values }
columns6 = ['time', 'open', 'high', 'low', 'close']
df6 = pd.DataFrame(data=data6, columns=columns6)
data7 = { 'time': time6, 'hlhigh': high.values }
columns7 = ['time', 'hlhigh']
df7 = pd.DataFrame(data=data7, columns=columns7)
data8 = { 'time': time6, 'hllow': low.values }
columns8 = ['time', 'hllow']
df8 = pd.DataFrame(data=data8, columns=columns8)
df6.merge(df7).merge(df8).tail()
time | open | high | low | close | hlhigh | hllow | |
---|---|---|---|---|---|---|---|
4098 | 1514419200000 | 113.279 | 113.351 | 112.664 | 112.895 | 113.75 | 111.993 |
4099 | 1514505600000 | 112.894 | 112.969 | 112.472 | 112.678 | 113.75 | 112.030 |
4100 | 1514592000000 | 112.680 | 112.714 | 112.647 | 112.658 | 113.75 | 112.030 |
4101 | 1514764800000 | 112.658 | 112.658 | 112.658 | 112.658 | 113.75 | 112.030 |
4102 | 1514851200000 | 112.658 | 112.789 | 112.570 | 112.773 | 113.75 | 112.030 |
H = Highstock()
H.add_data_set(df6.values.tolist(), 'candlestick', '米ドル/円')
H.add_data_set(df7.values.tolist(), 'line', 'High')
H.add_data_set(df8.values.tolist(), 'line', 'Low')
options = {
'plotOptions': {
'line': {
'lineWidth': 1
}
},
'rangeSelector': {
'selected': 1
},
'title': {
'text': '米ドル/円 daily'
}
}
H.set_dict_options(options)
H
chart types の sma に対応していない。
highstock_types.py の PLOT_OPTION_ALLOWED_ARGS
を見ると sma がない。
H2 = Highstock()
H2.add_data_set(df6.values.tolist(), 'candlestick', '米ドル/円', id='USDJPY')
#H2.add_data_set(linkedTo='USDJPY', params={ 'index': 3, 'period': 20 }, type='sma')
#KeyError: 'sma'
#TypeError: add_data_set() missing 1 required positional argument: 'data'
options2 = {
# 'plotOptions': {
# 'sma': {
# 'linkedTo': 'USDJPY',
# 'lineWidth': 1
# }
# },
'rangeSelector': {
'selected': 1
},
'title': {
'text': '米ドル/円 daily'
}
}
#OptionTypeError: Not An Accepted Option Type: sma
H2.set_dict_options(options2)
H2