Notebook

In [1]:

%%bash
echo "Installing requirements .."
pip install pandas==0.22.0 quandl pandas_datareader alpha_vantage matplotlib plotly sklearn scipy fix_yahoo_finance statsmodels beautifulsoup4 > /dev/null 2>&1
# NOTE: we use pandas 0.22 for now since pandas_datareader don't support 0.23 yet
echo "Done"

Installing requirements ..
Done

In [61]:

import os
import datetime
import numbers
import subprocess
import uuid
import string
import json 
import requests
from io import StringIO
import re

import pandas as pd
import numpy as np
import sklearn as sk
from sklearn import linear_model

import quandl

import pandas_datareader
from pandas_datareader import data as pdr
import fix_yahoo_finance as yf
yf.pdr_override() # <== that's all it takes :-)
import alpha_vantage
from alpha_vantage.timeseries import TimeSeries
from alpha_vantage.cryptocurrencies import CryptoCurrencies

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (20.0, 10.0) # Make plots bigger

import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode()

from pathlib import Path
from bs4 import BeautifulSoup

In [3]:

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

pd.set_option('display.float_format', lambda x: '{:,.2f}'.format(x))
pd.set_option('display.max_rows', 5000)
pd.set_option('display.max_columns', 500)

In [4]:

def pd_from_dict(d):
    return pd.DataFrame.from_dict(d, orient='index').T.sort_index()

In [5]:

# (hack) Global configs
conf_cache = None

class GetConf:
    def __init__(self, splitAdj, divAdj, cache, secondary):
        self.splitAdj = splitAdj
        self.divAdj = divAdj
        self.cache = cache
        self.secondary = secondary

In [6]:

if not "fetchCache" in globals():
    fetchCache = {}

In [7]:

class Symbol:
    def __init__(self, fullname):
        self.fullname = fullname
        parts = fullname.split("!")
        if len(parts) == 2:
            fullname = parts[0]
            self.currency = parts[1]
        else:
            self.currency = ""
        parts = fullname.split("@")
        self.name = parts[0]
        if len(parts) == 2:
            self.source = parts[1]
        else:
            self.source = ""
       
    def __str__(self):
        return self.fullname

In [8]:

import scipy.optimize
from datetime import datetime as dt
def xnpv(rate, values, dates):
    '''Equivalent of Excel's XNPV function.

    >>> from datetime import date
    >>> dates = [date(2010, 12, 29), date(2012, 1, 25), date(2012, 3, 8)]
    >>> values = [-10000, 20, 10100]
    >>> xnpv(0.1, values, dates)
    -966.4345...
    '''
    if rate <= -1.0:
        return float('inf')
    d0 = dates[0]    # or min(dates)
    return sum([ vi / (1.0 + rate)**((di - d0).days / 365.0) for vi, di in zip(values, dates)])


def xirr(values, dates):
    '''Equivalent of Excel's XIRR function.

    >>> from datetime import date
    >>> dates = [date(2010, 12, 29), date(2012, 1, 25), date(2012, 3, 8)]
    >>> values = [-10000, 20, 10100]
    >>> xirr(values, dates)
    0.0100612...
    '''
    # we prefer to try brentq first as newton keeps outputting tolerance warnings
    try:
        return scipy.optimize.brentq(lambda r: xnpv(r, values, dates), -1.0, 1e10)
        #return scipy.optimize.newton(lambda r: xnpv(r, values, dates), 0.0, tol=0.0002)
    except RuntimeError:    # Failed to converge?
        return scipy.optimize.newton(lambda r: xnpv(r, values, dates), 0.0, tol=0.0002)
        #return scipy.optimize.brentq(lambda r: xnpv(r, values, dates), -1.0, 1e10)

#xirr([-100, 100, 200], [dt(2000, 1, 1), dt(2001, 1, 1), dt(2002, 1, 1)])

In [9]:

def curr_price(symbol):
    if symbol in ignoredAssets: return 0
    return get(symbol)[-1]

#def getForex(fromCur, toCur):
#    if fromCur == toCur: return 1
#    if toCur == "USD":
#        return get(fromCur + "=X", "Y")
#    if fromCur == "USD":
#        return get(toCur + "=X", "Y").map(lambda x: 1.0/x)

def getForex(fromCur, toCur):
    if fromCur == toCur: return 1
    tmp = get(fromCur + toCur + "@CUR").s
    tmp = tmp.reindex(pd.date_range(start=tmp.index[0], end=tmp.index[-1]))
    tmp = tmp.fillna(method="ffill")
    return tmp
    #return wrap(tmp, fromCur+toCur)

def convert(value, fromCur, toCur):
    if fromCur == toCur: return value
    return value * getForex(fromCur, toCur)[-1]

In [98]:

def toSymbol(sym):
    if isinstance(sym, Symbol):
        return sym
    if isinstance(sym, str):
        return Symbol(sym)
    assert False, "invalid type for Symbol: " + str(type(sym)) + ", " + str(sym)

class DataSource:
    
    def __init__(self, source):
        self.source = source
    
    def fetch(self, symbol, conf):
        pass
    
    def process(self, symbol, df, conf):
        pass
    
    def get(self, symbol, conf):
        global conf_cache
        
        df = None
        failpath = cache_file(symbol, self.source) + "._FAIL_"
        
        if os.path.isfile(failpath):
            mtime = datetime.datetime.fromtimestamp(os.path.getmtime(failpath))
            diff = datetime.datetime.now() - mtime
            if diff.total_seconds() <= 24 * 3600:
                raise Exception("Fetching has previously failed for {0}, will try again later".format(symbol))
           
        useCache = conf.cache
        if conf_cache != None:
            useCache = conf_cache
        if useCache:
            df = cache_get(symbol, self.source)
        
        try:
            # Attempt to actually fetch the symbol
            if df is None:
                print("Fetching %s from %s .. " % (symbol, self.source), end="")
                df = self.fetch(symbol, conf)
                print("DONE")
            if df is None:
                print("FAILED")
                raise Exception("Failed to fetch symbol: " + str(symbol) + " from " + self.source)
            if len(df) == 0:
                print("FAILED")
                raise Exception("Symbol fetched but is empty: " + str(symbol) + " from " + self.source)
        except Exception as e:
            # save a note that we failed
            Path(failpath).touch()
            raise Exception from e
        
        cache_set(symbol, self.source, df)
        
        res = self.process(symbol, df, conf)
        return res.sort_index()

fred_forex_codes = """
AUD	DEXUSAL
BRL	DEXBZUS
GBP	DEXUSUK
CAD	DEXCAUS
CNY	DEXCHUS
DKK	DEXDNUS
EUR	DEXUSEU
HKD	DEXHKUS
INR	DEXINUS
JPY	DEXJPUS
MYR	DEXMAUS
MXN	DEXMXUS
TWD	DEXTAUS
NOK	DEXNOUS
SGD	DEXSIUS
ZAR	DEXSFUS
KRW	DEXKOUS
LKR	DEXSLUS
SEK	DEXSDUS
CHF	DEXSZUS
VEF	DEXVZUS
"""

boe_forex_codes = """
AUD	XUDLADD
CAD	XUDLCDD
CNY	XUDLBK73
CZK	XUDLBK27
DKK	XUDLDKD
HKD	XUDLHDD
HUF	XUDLBK35
INR	XUDLBK64
NIS	XUDLBK65
JPY	XUDLJYD
LTL	XUDLBK38
MYR	XUDLBK66
NZD	XUDLNDD
NOK	XUDLNKD
PLN	XUDLBK49
GBP	XUDLGBD
RUB	XUDLBK69
SAR	XUDLSRD
SGD	XUDLSGD
ZAR	XUDLZRD
KRW	XUDLBK74
SEK	XUDLSKD
CHF	XUDLSFD
TWD	XUDLTWD
THB	XUDLBK72
TRY	XUDLBK75
"""

# https://blog.quandl.com/api-for-currency-data
class ForexDataSource(DataSource):
    def __init__(self, source):
        self.fred_code_map = dict([s.split("\t") for s in fred_forex_codes.split("\n")[1:-1]])
        self.boe_code_map = dict([s.split("\t") for s in boe_forex_codes.split("\n")[1:-1]])
        self.boe_code_map["ILS"] = self.boe_code_map["NIS"]
        super().__init__(source)
    
    def fetch(self, symbol, conf):
        assert len(symbol.name) == 6
        _from = symbol.name[:3]
        _to = symbol.name[3:]
        if _to != "USD" and _from != "USD":
            raise Exception("Can only convert to/from USD")
        invert = _from == "USD"
        curr = _to if invert else _from
        
        div100 = 1
        if curr == "GBC":
            div100 = 100
            curr = "GBP"
        
        if curr in self.fred_code_map:
            code = self.fred_code_map[curr]
            df = quandl.get("FRED/" + code)
            if code.endswith("US") != invert: # some of the FRED currencies are inverted vs the US dollar, argh..
                df = df.apply(lambda x: 1.0/x)
            return df / div100

        if curr in self.boe_code_map:
            code = self.boe_code_map[curr]
            df = quandl.get("BOE/" + code)
            if not invert: # not sure if some of BEO currencies are NOT inverted vs USD, checked a few and they weren't
                df = df.apply(lambda x: 1.0/x)
            return df / div100

        raise Exception("Currency pair is not supported: " + symbol.name)
        
    def process(self, symbol, df, conf):
        return df.iloc[:, 0]
      
# https://github.com/ranaroussi/fix-yahoo-finance
class YahooDataSource(DataSource):
    def fetch(self, symbol, conf):
        return pdr.get_data_yahoo(symbol.name, progress=False, actions=True)

    def process(self, symbol, df, conf):
        if not conf.splitAdj:
            assert not conf.divAdj
            # Yahoo "Close" data is split adjusted. 
            # We find the unadjusted data using the splits data
            splitMul = df["Stock Splits"][::-1].cumprod().shift().fillna(method="bfill")
            return df["Close"] / splitMul        
        
        assert conf.splitAdj and conf.divAdj
        return df["Adj Close"]
        

class QuandlDataSource(DataSource):
    def fetch(self, symbol, conf):
        return quandl.get(symbol.name)

    def process(self, symbol, df, conf):
        if "Close" in df.columns:
            return df["Close"]
        return df.iloc[:, 0]

    
class GoogleDataSource(DataSource):
    def fetch(self, symbol, conf):
        return pandas_datareader.data.DataReader(symbol.name, 'google')

    def process(self, symbol, df, conf):
        return df["Close"]
    
AV_API_KEY = 'BB18'
class AlphaVantageDataSource(DataSource):

    def fetch(self, symbol, conf):
        ts = TimeSeries(key=AV_API_KEY, output_format='pandas')
        df, meta_data = ts.get_daily_adjusted(symbol.name, outputsize="full")
        df.index = pd.to_datetime(df.index, format="%Y-%m-%d")
        return df

    def process(self, symbol, df, conf):
        return df["5. adjusted close"]
    
class AlphaVantageCryptoDataSource(DataSource):

    def fetch(self, symbol, conf):
        cc = CryptoCurrencies(key=AV_API_KEY, output_format='pandas')
        df, meta_data = cc.get_digital_currency_daily(symbol=symbol.name, market='USD')
        df.index = pd.to_datetime(df.index, format="%Y-%m-%d")
        return df

    def process(self, symbol, df, conf):
        return df['4a. close (USD)']

class CryptoCompareDataSource(DataSource):
    def fetch(self, symbol, conf):
        url = "https://min-api.cryptocompare.com/data/histoday?fsym=__sym__&tsym=USD&limit=600000&aggregate=1&e=CCCAGG"
        d = json.loads(requests.get(url.replace("__sym__", symbol.name)).text)
        df = pd.DataFrame(d["Data"])
        if len(df) == 0:
            return None
        df["time"] = pd.to_datetime(df.time, unit="s")
        df.set_index("time", inplace=True)
        return df

    def process(self, symbol, df, conf):
        return df.close

# NOTE: data is SPLIT adjusted, but has no dividends and is NOT DIVIDEND adjusted 
# NOTE: it has data all the way to the start, but returned result is capped in length for ~20 years
#       and results are trimmed from the END, not from the start. TBD to handle this properly.
#       for now we start at 1.1.2000
class InvestingComDataSource(DataSource):

    def getUrl(self, symbol):
        symbol = symbol.name
        data = {
            'search_text': symbol,
            'term': symbol, 
            'country_id': '0',
            'tab_id': 'All'
        }
        headers = {
                    'Origin': 'https://www.investing.com',
                    'Accept-Encoding': 'gzip, deflate, br',
                    'Accept-Language': 'en-US,en;q=0.9,he;q=0.8',
                    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36',
                    'Content-Type': 'application/x-www-form-urlencoded',
                    'Accept': 'application/json, text/javascript, */*; q=0.01',
                    'Referer': 'https://www.investing.com/search?q=' + symbol,
                    'X-Requested-With': 'XMLHttpRequest',
                    'Connection': 'keep-alive'    
                }
        r = requests.post("https://www.investing.com/search/service/search", data=data, headers=headers)
        res = r.text
        res = json.loads(res)
        return res["All"][0]["link"]
    
    def getCodes(self, url):
        url = "https://www.investing.com" + url + "-historical-data"
        
        headers = {
                    'Origin': 'https://www.investing.com',
                    'Accept-Encoding': 'gzip, deflate, br',
                    'Accept-Language': 'en-US,en;q=0.9,he;q=0.8',
                    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36',
                    'Content-Type': 'application/x-www-form-urlencoded',
                    'Accept': 'application/json, text/javascript, */*; q=0.01',
                    'Referer': 'https://www.investing.com/',
                    'X-Requested-With': 'XMLHttpRequest',
                    'Connection': 'keep-alive'    
                }
        r = requests.get(url,headers=headers)
        text = r.text
        
        m = re.search("smlId:\s+(\d+)", text)
        smlId = m.group(1)
        
        m = re.search("pairId:\s+(\d+)", text)
        pairId = m.group(1)
        
        return pairId, smlId
    
    def getHtml(self, pairId, smlId):
        data = [
            'curr_id=' + pairId,
            'smlID=' + smlId,
            'header=',
            'st_date=01%2F01%2F2000',
            'end_date=01%2F01%2F2100',
            'interval_sec=Daily',
            'sort_col=date',
            'sort_ord=DESC', 
            'action=historical_data'
        ]
        data = "&".join(data)
        headers = {
            'Origin': 'https://www.investing.com',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'en-US,en;q=0.9,he;q=0.8',
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Accept': 'text/plain, */*; q=0.01',
            'Referer': 'https://www.investing.com/',
            'X-Requested-With': 'XMLHttpRequest',
            'Connection': 'keep-alive'    
        }
        r = requests.post("https://www.investing.com/instruments/HistoricalDataAjax", data=data, headers=headers)
        return r.text
    
    def fetch(self, symbol, conf):
        symbolUrl = self.getUrl(symbol)
        
        pairId, smlId = self.getCodes(symbolUrl)
        
        html = self.getHtml(pairId, smlId)
        #print(html)
        parsed_html = BeautifulSoup(html, "lxml")
        df = pd.DataFrame(columns=["date", "price"])
        
        for i, tr in enumerate(parsed_html.find_all("tr")[1:]): # skip header
            data = [x.get("data-real-value") for x in tr.find_all("td")]
            if len(data) == 0 or data[0] is None:
                continue
            date = datetime.datetime.utcfromtimestamp(int(data[0]))
            close = float(data[1].replace(",", ""))
            #open = data[2]
            #high = data[3]
            #low = data[4]
            #volume = data[5]
            df.loc[i] = [date, close]
            
        df = df.set_index("date")
        return df

    def process(self, symbol, df, conf):
        return df['price']

    

In [ ]:

# fetching data

if not "Wrapper" in locals():
    class Wrapper(object):

        def __init__(self, s):
            #self.s = s
            object.__setattr__(self, "s", s)

        def __getattr__(self, name):
            attr = self.s.__getattribute__(name)

            if hasattr(attr, '__call__'):
                def newfunc(*args, **kwargs):
                    result = attr(*args, **kwargs)
                    if type(result) is pd.Series:
                        result = Wrapper(result)
                    return result
                return newfunc

            if type(attr) is pd.Series:
                attr = Wrapper(attr)
            return attr

        def __setattr__(self, name, value):
            self.s.__setattr__(name, value)

        def __getitem__(self, item):
             return wrap(self.s.__getitem__(item), self.s.name)

#         def __truediv__(self, other):
#             divisor = other
#             if type(other) is Wrapper:
#                 divisor = other.s
#             series = self.s / divisor
#             name = self.name
#             if type(other) is Wrapper:
#                 name = self.s.name + " / " + other.s.name
#             return wrap(series, name)

        def __truediv__(self, other):
            return Wrapper.doop(self, other, "/", lambda x, y: x / y)
        def __rtruediv__(self, other):
            return Wrapper.doop(self, other, "/", lambda x, y: x / y, right=True)
        
        def doop(self, other, opname, opLambda, right=False):
            divisor = other
            if type(other) is Wrapper:
                divisor = other.s
            if right:
                series = opLambda(divisor, self.s)
            else:
                series = opLambda(self.s, divisor)
            name = self.name
            if type(other) is Wrapper:
                if right:
                    name = other.s.name + " " + opname + " " + self.s.name
                else:
                    name = self.s.name + " " + opname + " " + other.s.name
            return wrap(series, name)

        def __sub__(self, other):
            return Wrapper.doop(self, other, "-", lambda x, y: x - y)
        #def __rsub__(self, other):
        #    return Wrapper.doop(self, other, "-", lambda x, y: x - y, right=True)

        def __mul__(self, other):
            return Wrapper.doop(self, other, "*", lambda x, y: x * y)
        def __rmul__(self, other):
            return Wrapper.doop(self, other, "*", lambda x, y: x * y, right=True)

def wrap(s, name=""):
    name = name or s.name
    if not name:
        raise Exception("no name")
    if isinstance(s, pd.Series):
        s = Wrapper(s)
        s.name = name
    return s

data_sources = {
    "Y": YahooDataSource("Y"),
    "IC": InvestingComDataSource("IC"),
    "Q": QuandlDataSource("Q"),
    "AV": AlphaVantageDataSource("AV"),
    "CC": CryptoCompareDataSource("CC"),
    "CCAV": AlphaVantageCryptoDataSource("CCAV"),
    "CUR": ForexDataSource("CUR"),
    "G": GoogleDataSource("G")
               }

def getFrom(symbol, conf):
    # special handling for forex
    # if a match, if will recurse and return here with XXXUSD@CUR
    if len(symbol.name) == 6 and not symbol.source:
        parts = symbol.name[:3], symbol.name[3:]
        if parts[0] == "USD" or parts[1] == "USD":
            return wrap(getForex(parts[0], parts[1]), symbol.name)
    
    source = symbol.source or "Y"
    if not source in data_sources:
        raise Exception("Unsupported source: " + source)
    if not conf.secondary:
        return data_sources[source].get(symbol, conf)
    try:
        return data_sources[source].get(symbol, conf)
    except:
        print("Failed to fetch {0} from {1}, trying from {2} .. ".format(symbol, source, conf.secondary), end="")
        res = data_sources[conf.secondary].get(symbol, conf)
        print("DONE")
        return res

def format_filename(s):
    valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
    filename = ''.join(c for c in s if c in valid_chars)
    filename = filename.replace(' ','_')
    return filename
    
def cache_file(symbol, source):
    filepath = os.path.join("symbols", source, format_filename(symbol.name))
    dirpath = os.path.dirname(filepath)
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)
    return filepath

symbols_mem_cache = {}
def cache_get(symbol, source):
    if symbol.name in symbols_mem_cache:
        return symbols_mem_cache[symbol.name]
    filepath = cache_file(symbol, source)
    if os.path.exists(filepath):
        #res = pd.read_csv(filepath, squeeze=True, names=["date", "value"], index_col="date")
        res = pd.read_csv(filepath, squeeze=False, index_col="date")
        res.index = pd.to_datetime(res.index, format="%Y-%m-%d")
        symbols_mem_cache[symbol.name] = res
        return res
    return None

def cache_set(symbol, source, s):
    filepath = cache_file(symbol, source)
    s.to_csv(filepath, date_format="%Y-%m-%d", index_label="date")


def get_port(d, name=None):
    if isinstance(d, str):
        res = parse_portfolio_def(d)
        if not res:
            raise Exception("Invalid portfolio definition: " + d)
        d = res
    if not isinstance(d, dict):
        raise Exception("Portfolio definition must be str or dict, was: " + type(d))        
    df = pd.DataFrame(logret(get(k).s)*v/100 for k,v in d.items()).T.dropna()
    res = Wrapper(i_logret(df.sum(axis=1)))
    res.name = name
    return res

def parse_portfolio_def(s):
    d = {}
    parts = s.split("|")
    for p in parts:
        parts2 = p.split(":")
        if len(parts2) != 2:
            return None
        d[parts2[0]] = float(parts2[1])
    return d

def get(symbol, cache=True, splitAdj=True, divAdj=True, adj=None, secondary="AV"):
    global conf_cache
    if isinstance(symbol, Wrapper) or isinstance(symbol, pd.Series):
        return symbol
    if "ignoredAssets" in globals() and ignoredAssets and symbol in ignoredAssets:
        return wrap(pd.Series(), "<empty>")
    
    # special handing for composite portfolios
    port = parse_portfolio_def(symbol)
    if port:
        return get_port(port, symbol)
    
    symbol = toSymbol(symbol)
    if adj == False:
        splitAdj = False
        divAdj = False

    s = getFrom(symbol, GetConf(splitAdj, divAdj, cache, secondary))
    return wrap(s, symbol.fullname)

In [ ]:

#     def __getattribute__(self,name):
#         s = object.__getattribute__(self, "s")
#         if name == "s":
#             return s
        
#         attr = s.__getattribute__(name)
        
#         if hasattr(attr, '__call__'):
#             def newfunc(*args, **kwargs):
#                 result = attr(*args, **kwargs)
#                 if type(result) is pd.Series:
#                     result = Wrapper(result)
#                 return result
#             return newfunc
        
#         if type(attr) is pd.Series:
#             attr = Wrapper(attr)
#         return attr
    

In [ ]:

# plotting

from plotly.graph_objs import *

def createVerticalLine(xval):
    shape = {
            'type': 'line',
            'xref': 'x',
            'x0': xval,
            'x1': xval,
            'yref': 'paper',
            'y0': 0,
            'y1': 1,
            #'fillcolor': 'blue',
            'opacity': 1,
            'line': {
                'width': 1,
                'color': 'red'
            }
        }
    return shape
    
def createHorizontalLine(yval):
    shape = {
            'type': 'line',
            'xref': 'paper',
            'x0': 0,
            'x1': 1,
            'yref': 'x',
            'y0': yval,
            'y1': yval,
            #'fillcolor': 'blue',
            'opacity': 1,
            'line': {
                'width': 1,
                'color': 'red'
            }
        }
    return shape
    
def plot(*arr, log=True, title=None):
    data = []
    shapes = []
    for val in arr:
        if isinstance(val, Wrapper) or isinstance(val, pd.Series):
            data.append(go.Scatter(x=val.index, y=val, name=val.name))
        elif isinstance(val, datetime.datetime):
            shapes.append(createVerticalLine(val))
        elif isinstance(val, np.datetime64):
            shapes.append(createVerticalLine(val.astype(datetime.datetime)))
        elif isinstance(val, numbers.Real):
            shapes.append(createHorizontalLine(val))
        else:
            raise Exception("unsupported value type: " + str(type(val)))
    
    for d in data:
        d = d.y
        if isinstance(d, Wrapper):
            d = d.s
        if np.any(d <= 0):
            log = False
            
    mar = 30
    margin=go.Margin(
        l=mar,
        r=mar,
        b=mar,
        t=mar,
        pad=0
    )
    legend=dict(x=0,y=1,traceorder='normal',
        bgcolor='#FFFFFFBB',bordercolor='#888888',borderwidth=1,
        font=dict(family='sans-serif',size=12,color='#000'),
    )    
    yaxisScale = "log" if log else None
    layout = go.Layout(legend=legend, margin=margin, yaxis=dict(type=yaxisScale, autorange=True), shapes=shapes, title=title)
    fig = go.Figure(data=data, layout=layout)
    py.iplot(fig)

# show a stacked area chart normalized to 100% of multiple time series
def plotly_area(df, title=None):
    tt = df.div(df.sum(axis=1), axis=0)*100 # normalize to summ 100
    tt = tt.reindex(tt.mean().sort_values(ascending=False).index, axis=1) # sort columns by mean value
    tt = tt.sort_index()
    tt2 = tt.cumsum(axis=1) # calc cum-sum
    data = []
    for col in tt2:
        s = tt2[col]
        trace = go.Scatter(
            name=col,
            x=s.index.to_datetime(),
            y=s.values,
            text=["{:.1f}%".format(v) for v in tt[col].values], # use text as non-cumsum values
            hoverinfo='name+x+text',
            mode='lines',
            fill='tonexty'
        )
        data.append(trace)

    mar = 30
    margin=go.Margin(l=mar,r=mar,b=mar,t=mar,pad=0)
    legend=dict(x=0,y=1,traceorder='reversed',
        bgcolor='#FFFFFFBB',bordercolor='#888888',borderwidth=1,
        font=dict(family='sans-serif',size=12,color='#000'),
    )    
    layout = go.Layout(margin=margin, legend=legend, title=title,
        #showlegend=True,
        xaxis=dict(
            type='date',
        ),
        yaxis=dict(
            type='linear',
            range=[1, 100],
            dtick=20,
            ticksuffix='%'
        )
    )
    fig = go.Figure(data=data, layout=layout)
    py.iplot(fig, filename='stacked-area-plot')
       

In [ ]:

# data processing

def _start(s):
    return s.index[0]

def _end(s):
    return s.index[-1]

def getCommonDate(data, alldata=False):
    if alldata:
        l = [_start(s) for s in data if isinstance(s, Wrapper) or isinstance(s, pd.Series)]
    else:
        l = [_start(s) for s in data if isinstance(s, Wrapper)]
    if not l:
        return None
    return max(l)

def doTrim(data, alldata=False):
    date = getCommonDate(data, alldata=alldata)
    if date is None:
        return data
    newArr = []
    for s in data:
        if isinstance(s, Wrapper) or (alldata and isinstance(s, pd.Series)):
            s = s[date:]
        newArr.append(s)
    return newArr

def doAlign(data):
    date = getCommonDate(data)
    if date is None:
        return data
    newArr = []
    for s in data:
        if isinstance(s, Wrapper):
            s = s / s[date]
        newArr.append(s)
    return newArr

def doClean(data):
    return [s.dropna() if isinstance(s, Wrapper) else s for s in data]

def show(*data, trim=True, align=True, ta=True, **plotArgs):
    items = []
    
    # intercept "cache" arguemnt
    cache = plotArgs.get("cache", None)
    if cache != None:
        del plotArgs["cache"]
    
    for x in data:
        if isinstance(x, pd.DataFrame):
            items += [x[c] for c in x]
        elif isinstance(x, datetime.datetime) or isinstance(x, np.datetime64):
            items.append(x)
        elif isinstance(x, numbers.Real):
            items.append(x)
        else:
            x = get(x, cache) if cache != None else get(x)
            items.append(x)
    data = items
    #data = [get(s) for s in data] # converts string to symbols
    data = doClean(data)
    if not ta:
        trim = False
        align = False
    if trim: data = doTrim(data)
    if align: data = doAlign(data)
    plot(*data, **plotArgs)

def ma(s, n):
    return s.rolling(n).mean()

def mm(s, n):
    return s.rolling(n).median()

def ret(s):
    return s.pct_change()

def logret(s):
    res = np.log(s) - np.log(s.shift(1))
    res.name = "logret(" + s.name + ")"
    return res

def i_logret(s):
    return np.exp(np.cumsum(s))

def lrret(regressors, target, sum1=False):
    regressors = [get(x) for x in regressors]
    target = get(target)
    all = [logret(x).s for x in (regressors + [target])]
    
    # based on: https://stats.stackexchange.com/questions/21565/how-do-i-fit-a-constrained-regression-in-r-so-that-coefficients-total-1?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
    # NOTE: note finished, not working
    if sum1:
        allOrig = all
        last = all[-2]
        all = [r - last for r in (all[:-2] + [all[-1]])]
        
    data = pd.DataFrame(all).T
    data = data.dropna()
    y = data.iloc[:, -1]
    X = data.iloc[:, :-1]

    regr = linear_model.LinearRegression(fit_intercept=False)
    regr.fit(X, y)
    
    if sum1:
        weights = np.append(regr.coef_, 1-np.sum(regr.coef_))
        
        all = allOrig
        data = pd.DataFrame(all).T
        data = data.dropna()
        y = data.iloc[:, -1]
        X = data.iloc[:, :-1]
        regr = linear_model.LinearRegression(fit_intercept=False)
        regr.fit(X, y)
        
        regr.coef_ = weights
    
    y_pred = regr.predict(X)

    
    print('Regressors:', [s.name for s in regressors])
    print('Coefficients:', regr.coef_)
    #print('Coefficients*:', list(regr.coef_) + [1-np.sum(regr.coef_)])
    #print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred))
    print('Variance score r^2: %.3f' % sk.metrics.r2_score(y, y_pred))

    y_pred = i_logret(pd.Series(y_pred, X.index))
    y_pred.name = target.name + " fit"
    #y_pred = "fit"
    y_pred = Wrapper(y_pred)
    show(target , y_pred)
    return y_pred
    
def dd(x):
    if isinstance(x, Wrapper): # not sure why Wrapper doesn't work
        x = x.s
    res = (x / np.maximum.accumulate(x) - 1) * 100
    return res
    

In [ ]:

from IPython.core.display import Javascript
import time, os, stat

def publish(name=None):
    def file_age_in_seconds(pathname):
        return time.time() - os.stat(pathname)[stat.ST_MTIME]

    filename = !ls -t *.ipynb | grep -v /$ | head -1
    filename = filename[0]

    age = file_age_in_seconds(filename)
    min_age = 5
    if age > min_age:
        print(filename + " file age is " + str(age) + " seconds, auto saving current notebook ..")
        Javascript('console.log(document.querySelector("div#save-notbook button").click())')
        print("save requested, sleeping to ensure execution ..")
        time.sleep(15)
        print("done")
        filename = !ls -t *.ipynb | grep -v /$ | head -1
        filename = filename[0]
    
    if not name:
        name = str(uuid.uuid4().hex.upper())
    save()
    print("Publishing " + filename + " ..")
    res = subprocess.call(['bash', './publish.sh', name])
    if res == 0:
        print("published successfuly!")
        print("https://nbviewer.jupyter.org/github/ertpload/test/blob/master/__name__.ipynb".replace("__name__", name))
    else:
        print("Failed!")
    

In [ ]:

from IPython.display import display,Javascript 
def save():
    display(Javascript('IPython.notebook.save_checkpoint();'))

In [ ]:

# make the plotly graphs look wider on mobile
from IPython.core.display import display, HTML
s = """
<style>
div.rendered_html {
    max-width: 10000px;
}
</style>
"""
display(HTML(s))

In [ ]:

# interception to auto-fetch hardcoded symbols e.g:
# show(SPY)
# this should run last in the framework code, or it attempts to download unrelated symbols :)

from IPython.core.inputtransformer import *
intercept = True
if intercept and not "my_transformer_tokens_instance" in locals():
    #print("transformation hook init")
    attempted_implied_fetches = set()
    
    ip = get_ipython()

    @StatelessInputTransformer.wrap
    def my_transformer(line):
        if line.startswith("x"):
            return "specialcommand(" + repr(line) + ")"
        return line

    @TokenInputTransformer.wrap
    def my_transformer_tokens(tokens):
        for i, x in enumerate(tokens):
            if x.type == 1 and x.string.isupper() and x.string.isalpha(): ## type=1 is NAME token
                if i < len(tokens)-1 and tokens[i+1].type == 53 and tokens[i+1].string == "=":
                    attempted_implied_fetches.add(x.string)
                    continue
                if x.string in attempted_implied_fetches or x.string in ip.user_ns:
                    continue
                try:
                    ip.user_ns[x.string] = get(x.string)
                except:
                    print("Failed to fetch implied symbol: " + x.string)
                    attempted_implied_fetches.add(x.string)
        return tokens

    my_transformer_tokens_instance = my_transformer_tokens()
    
    ip.input_splitter.logical_line_transforms.append(my_transformer_tokens_instance)
    ip.input_transformer_manager.logical_line_transforms.append(my_transformer_tokens_instance)

In [ ]:

def date(s):
    return pd.to_datetime(s, format="%Y-%m-%d")

another options for interception:

class VarWatcher(object):
    def __init__(self, ip):
        self.shell = ip
        self.last_x = None

    def pre_execute(self):
        if False:
            for k in dir(self.shell):
                print(k, ":", getattr(self.shell, k))
                print()
        #print("\n".join(dir(self.shell)))
        if "content" in self.shell.parent_header:
            code = self.shell.parent_header['content']['code']
            self.shell.user_ns[code] = 42
        #print(self.shell.user_ns.get('ASDF', None))

    def post_execute(self):
        pass
        #if self.shell.user_ns.get('x', None) != self.last_x:
        #    print("x changed!")

def load_ipython_extension(ip):
    vw = VarWatcher(ip)
    ip.events.register('pre_execute', vw.pre_execute)
    ip.events.register('post_execute', vw.post_execute)

ip = get_ipython()

load_ipython_extension(ip)