In [1]:
# Prepare environment
import os, sys
sys.path.insert(0, os.path.abspath('..'))
from io import StringIO
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
In [2]:
# Prepare input data files
gs1_csv = StringIO("""
symbol, barsize, date, close
GS, 5 min, 2016-07-12 10:35:00-07:00, 140.05
GS, 5 min, 2016-07-12 11:20:00-07:00, 141.34
""")

gs2_csv = StringIO("""
symbol, barSize, datetime, close, volume
GS, 5 min, 2016-07-12 10:35:00-07:00, 140.05, 344428
""")

fb5min_csv = StringIO("""
time, c, vol
2016-07-21 09:30:00, 120.05, 234242
2016-07-21 09:35:00, 120.32, 410842
""")

fb1min_csv = StringIO("""
time, c, vol
2016-07-25 09:40:00, 120.47, 579638
2016-07-25 09:41:00, 120.82, 192476
""")

amzn_csv = StringIO("""
symb, bar, date, close, volume
AMZN, 1 day, 2016-07-21, 749.22, 27917
AMZN, 1 day, 2016-07-22, 738.87, 36662
AMZN, 1 day, 2016-07-23, 727.23, 8766
""")

df_gs1, df_gs2, df_fb5m, df_fb1m, df_amzn = [
    pd.read_csv(f)
    for f in (gs1_csv, gs2_csv, fb5min_csv, fb1min_csv, amzn_csv)]

Example starts here


Input DataFrames

In [3]:
print(df_gs1)
print(df_gs2)
print(df_fb5m)
print(df_fb1m)
print(df_amzn)
  symbol  barsize                        date   close
0     GS    5 min   2016-07-12 10:35:00-07:00  140.05
1     GS    5 min   2016-07-12 11:20:00-07:00  141.34
  symbol  barSize                    datetime   close   volume
0     GS    5 min   2016-07-12 10:35:00-07:00  140.05   344428
                  time       c     vol
0  2016-07-21 09:30:00  120.05  234242
1  2016-07-21 09:35:00  120.32  410842
                  time       c     vol
0  2016-07-25 09:40:00  120.47  579638
1  2016-07-25 09:41:00  120.82  192476
   symb     bar         date   close   volume
0  AMZN   1 day   2016-07-21  749.22    27917
1  AMZN   1 day   2016-07-22  738.87    36662
2  AMZN   1 day   2016-07-23  727.23     8766

Create a MarketDataBlock instance from pandas.DataFrame

Data are stored in self.df, a composition design. Column and index names are standardized for the DataFrame, and a pandas.MultiIndex is created.

In [4]:
import pytz
from ibstract import MarketDataBlock

blk = MarketDataBlock(df_gs1, datatype='TRADES', tz=pytz.timezone('US/Eastern'))
print(blk)
                                                   closing
Symbol DataType BarSize TickerTime                        
GS     TRADES   5m      2016-07-12 13:35:00-04:00   140.05
                        2016-07-12 14:20:00-04:00   141.34

Date/Time index and time zone

The date/time column in strings are converted to pandas.DatetimeIndex. When creating a MarketDataBlock instance, naive time stamps are localized, or a fixed-offset time zone is converted to region-based pytz.timezone.

In [5]:
print("\nTickerTime type:", type(blk.df.index.levels[3]))
print("Time zone:", blk.tzinfo, type(blk.tzinfo))
TickerTime type: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
Time zone: US/Eastern <class 'pytz.tzfile.US/Eastern'>

Update MarketDataBlock from a pandas.DataFrame

Update from a pandas.DataFrame will combine columns. N/A data in integer columns are converted to -1.

In [6]:
blk.update(df_gs2, datatype='TRADES', tz=pytz.timezone('US/Eastern'))
blk
Out[6]:
                                                   closing  volume
Symbol DataType BarSize TickerTime                                
GS     TRADES   5m      2016-07-12 13:35:00-04:00   140.05  344428
                        2016-07-12 14:20:00-04:00   141.34      -1

Update() can deal with a DataFrame having naive time stamps, or different BarSize rows, or without a BarSize column.

In [7]:
blk.update(df_fb5m, symbol='FB', datatype='TRADES', barsize='5m', tz=pytz.timezone('US/Eastern'))
blk.update(df_fb1m, symbol='FB', datatype='TRADES', barsize='1m', tz=pytz.timezone('US/Eastern'))
blk
Out[7]:
                                                   closing  volume
Symbol DataType BarSize TickerTime                                
FB     TRADES   1m      2016-07-25 09:40:00-04:00   120.47  579638
                        2016-07-25 09:41:00-04:00   120.82  192476
                5m      2016-07-21 09:30:00-04:00   120.05  234242
                        2016-07-21 09:35:00-04:00   120.32  410842
GS     TRADES   5m      2016-07-12 13:35:00-04:00   140.05  344428
                        2016-07-12 14:20:00-04:00   141.34      -1

Combine with another MarketDataBlock instance is easier than updating from a DataFrame.

In [8]:
blk_amzn = MarketDataBlock(df_amzn, datatype='TRADES', tz=pytz.timezone('US/Eastern'))
blk.combine(blk_amzn)
blk
Out[8]:
                                                   closing  volume
Symbol DataType BarSize TickerTime                                
AMZN   TRADES   1d      2016-07-21 00:00:00-04:00   749.22   27917
                        2016-07-22 00:00:00-04:00   738.87   36662
                        2016-07-23 00:00:00-04:00   727.23    8766
FB     TRADES   1m      2016-07-25 09:40:00-04:00   120.47  579638
                        2016-07-25 09:41:00-04:00   120.82  192476
                5m      2016-07-21 09:30:00-04:00   120.05  234242
                        2016-07-21 09:35:00-04:00   120.32  410842
GS     TRADES   5m      2016-07-12 13:35:00-04:00   140.05  344428
                        2016-07-12 14:20:00-04:00   141.34      -1