#!/usr/bin/env python # coding: utf-8 # In[1]: # Prepare environment import os, sys sys.path.insert(0, os.path.abspath('..')) from io import StringIO import pandas as pd from IPython.core.interactiveshell import InteractiveShell InteractiveShell.ast_node_interactivity = "all" # In[2]: # Prepare input data files gs1_csv = StringIO(""" symbol, barsize, date, close GS, 5 min, 2016-07-12 10:35:00-07:00, 140.05 GS, 5 min, 2016-07-12 11:20:00-07:00, 141.34 """) gs2_csv = StringIO(""" symbol, barSize, datetime, close, volume GS, 5 min, 2016-07-12 10:35:00-07:00, 140.05, 344428 """) fb5min_csv = StringIO(""" time, c, vol 2016-07-21 09:30:00, 120.05, 234242 2016-07-21 09:35:00, 120.32, 410842 """) fb1min_csv = StringIO(""" time, c, vol 2016-07-25 09:40:00, 120.47, 579638 2016-07-25 09:41:00, 120.82, 192476 """) amzn_csv = StringIO(""" symb, bar, date, close, volume AMZN, 1 day, 2016-07-21, 749.22, 27917 AMZN, 1 day, 2016-07-22, 738.87, 36662 AMZN, 1 day, 2016-07-23, 727.23, 8766 """) df_gs1, df_gs2, df_fb5m, df_fb1m, df_amzn = [ pd.read_csv(f) for f in (gs1_csv, gs2_csv, fb5min_csv, fb1min_csv, amzn_csv)] # ## Example starts here # ------ # ### Input DataFrames # In[3]: print(df_gs1) print(df_gs2) print(df_fb5m) print(df_fb1m) print(df_amzn) # ### Create a MarketDataBlock instance from pandas.DataFrame # **Data are stored in `self.df`, a composition design. Column and index names are standardized for the DataFrame, and a pandas.MultiIndex is created.** # In[4]: import pytz from ibstract import MarketDataBlock blk = MarketDataBlock(df_gs1, datatype='TRADES', tz=pytz.timezone('US/Eastern')) print(blk) # ### Date/Time index and time zone # **The date/time column in strings are converted to ``pandas.DatetimeIndex``. # When creating a MarketDataBlock instance, naive time stamps are localized, or a fixed-offset time zone is converted to region-based ``pytz.timezone``.** # In[5]: print("\nTickerTime type:", type(blk.df.index.levels[3])) print("Time zone:", blk.tzinfo, type(blk.tzinfo)) # ### Update MarketDataBlock from a `pandas.DataFrame` # **Update from a `pandas.DataFrame` will combine columns. N/A data in integer columns are converted to -1.** # In[6]: blk.update(df_gs2, datatype='TRADES', tz=pytz.timezone('US/Eastern')) blk # **Update() can deal with a `DataFrame` having naive time stamps, or different BarSize rows, or without a BarSize column.** # In[7]: blk.update(df_fb5m, symbol='FB', datatype='TRADES', barsize='5m', tz=pytz.timezone('US/Eastern')) blk.update(df_fb1m, symbol='FB', datatype='TRADES', barsize='1m', tz=pytz.timezone('US/Eastern')) blk # ** Combine with another MarketDataBlock instance is easier than updating from a DataFrame. ** # In[8]: blk_amzn = MarketDataBlock(df_amzn, datatype='TRADES', tz=pytz.timezone('US/Eastern')) blk.combine(blk_amzn) blk