%%bash
echo "Installing requirements .."
pip install pandas==0.22.0 quandl pandas_datareader alpha_vantage matplotlib plotly sklearn scipy fix_yahoo_finance statsmodels beautifulsoup4 > /dev/null 2>&1
# NOTE: we use pandas 0.22 for now since pandas_datareader don't support 0.23 yet
echo "Done"
Installing requirements .. Done
import os
import datetime
import numbers
import subprocess
import uuid
import string
import json
import requests
from io import StringIO
import re
import pandas as pd
import numpy as np
import sklearn as sk
from sklearn import linear_model
import quandl
import pandas_datareader
from pandas_datareader import data as pdr
import fix_yahoo_finance as yf
yf.pdr_override() # <== that's all it takes :-)
import alpha_vantage
from alpha_vantage.timeseries import TimeSeries
from alpha_vantage.cryptocurrencies import CryptoCurrencies
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (20.0, 10.0) # Make plots bigger
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode()
from pathlib import Path
from bs4 import BeautifulSoup
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.float_format', lambda x: '{:,.2f}'.format(x))
pd.set_option('display.max_rows', 5000)
pd.set_option('display.max_columns', 500)
def pd_from_dict(d):
return pd.DataFrame.from_dict(d, orient='index').T.sort_index()
# (hack) Global configs
conf_cache = None
class GetConf:
def __init__(self, splitAdj, divAdj, cache, secondary):
self.splitAdj = splitAdj
self.divAdj = divAdj
self.cache = cache
self.secondary = secondary
if not "fetchCache" in globals():
fetchCache = {}
class Symbol:
def __init__(self, fullname):
self.fullname = fullname
parts = fullname.split("!")
if len(parts) == 2:
fullname = parts[0]
self.currency = parts[1]
else:
self.currency = ""
parts = fullname.split("@")
self.name = parts[0]
if len(parts) == 2:
self.source = parts[1]
else:
self.source = ""
def __str__(self):
return self.fullname
import scipy.optimize
from datetime import datetime as dt
def xnpv(rate, values, dates):
'''Equivalent of Excel's XNPV function.
>>> from datetime import date
>>> dates = [date(2010, 12, 29), date(2012, 1, 25), date(2012, 3, 8)]
>>> values = [-10000, 20, 10100]
>>> xnpv(0.1, values, dates)
-966.4345...
'''
if rate <= -1.0:
return float('inf')
d0 = dates[0] # or min(dates)
return sum([ vi / (1.0 + rate)**((di - d0).days / 365.0) for vi, di in zip(values, dates)])
def xirr(values, dates):
'''Equivalent of Excel's XIRR function.
>>> from datetime import date
>>> dates = [date(2010, 12, 29), date(2012, 1, 25), date(2012, 3, 8)]
>>> values = [-10000, 20, 10100]
>>> xirr(values, dates)
0.0100612...
'''
# we prefer to try brentq first as newton keeps outputting tolerance warnings
try:
return scipy.optimize.brentq(lambda r: xnpv(r, values, dates), -1.0, 1e10)
#return scipy.optimize.newton(lambda r: xnpv(r, values, dates), 0.0, tol=0.0002)
except RuntimeError: # Failed to converge?
return scipy.optimize.newton(lambda r: xnpv(r, values, dates), 0.0, tol=0.0002)
#return scipy.optimize.brentq(lambda r: xnpv(r, values, dates), -1.0, 1e10)
#xirr([-100, 100, 200], [dt(2000, 1, 1), dt(2001, 1, 1), dt(2002, 1, 1)])
def curr_price(symbol):
if symbol in ignoredAssets: return 0
return get(symbol)[-1]
#def getForex(fromCur, toCur):
# if fromCur == toCur: return 1
# if toCur == "USD":
# return get(fromCur + "=X", "Y")
# if fromCur == "USD":
# return get(toCur + "=X", "Y").map(lambda x: 1.0/x)
def getForex(fromCur, toCur):
if fromCur == toCur: return 1
tmp = get(fromCur + toCur + "@CUR").s
tmp = tmp.reindex(pd.date_range(start=tmp.index[0], end=tmp.index[-1]))
tmp = tmp.fillna(method="ffill")
return tmp
#return wrap(tmp, fromCur+toCur)
def convert(value, fromCur, toCur):
if fromCur == toCur: return value
return value * getForex(fromCur, toCur)[-1]
def toSymbol(sym):
if isinstance(sym, Symbol):
return sym
if isinstance(sym, str):
return Symbol(sym)
assert False, "invalid type for Symbol: " + str(type(sym)) + ", " + str(sym)
class DataSource:
def __init__(self, source):
self.source = source
def fetch(self, symbol, conf):
pass
def process(self, symbol, df, conf):
pass
def get(self, symbol, conf):
global conf_cache
df = None
failpath = cache_file(symbol, self.source) + "._FAIL_"
if os.path.isfile(failpath):
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(failpath))
diff = datetime.datetime.now() - mtime
if diff.total_seconds() <= 24 * 3600:
raise Exception("Fetching has previously failed for {0}, will try again later".format(symbol))
useCache = conf.cache
if conf_cache != None:
useCache = conf_cache
if useCache:
df = cache_get(symbol, self.source)
try:
# Attempt to actually fetch the symbol
if df is None:
print("Fetching %s from %s .. " % (symbol, self.source), end="")
df = self.fetch(symbol, conf)
print("DONE")
if df is None:
print("FAILED")
raise Exception("Failed to fetch symbol: " + str(symbol) + " from " + self.source)
if len(df) == 0:
print("FAILED")
raise Exception("Symbol fetched but is empty: " + str(symbol) + " from " + self.source)
except Exception as e:
# save a note that we failed
Path(failpath).touch()
raise Exception from e
cache_set(symbol, self.source, df)
res = self.process(symbol, df, conf)
return res.sort_index()
fred_forex_codes = """
AUD DEXUSAL
BRL DEXBZUS
GBP DEXUSUK
CAD DEXCAUS
CNY DEXCHUS
DKK DEXDNUS
EUR DEXUSEU
HKD DEXHKUS
INR DEXINUS
JPY DEXJPUS
MYR DEXMAUS
MXN DEXMXUS
TWD DEXTAUS
NOK DEXNOUS
SGD DEXSIUS
ZAR DEXSFUS
KRW DEXKOUS
LKR DEXSLUS
SEK DEXSDUS
CHF DEXSZUS
VEF DEXVZUS
"""
boe_forex_codes = """
AUD XUDLADD
CAD XUDLCDD
CNY XUDLBK73
CZK XUDLBK27
DKK XUDLDKD
HKD XUDLHDD
HUF XUDLBK35
INR XUDLBK64
NIS XUDLBK65
JPY XUDLJYD
LTL XUDLBK38
MYR XUDLBK66
NZD XUDLNDD
NOK XUDLNKD
PLN XUDLBK49
GBP XUDLGBD
RUB XUDLBK69
SAR XUDLSRD
SGD XUDLSGD
ZAR XUDLZRD
KRW XUDLBK74
SEK XUDLSKD
CHF XUDLSFD
TWD XUDLTWD
THB XUDLBK72
TRY XUDLBK75
"""
# https://blog.quandl.com/api-for-currency-data
class ForexDataSource(DataSource):
def __init__(self, source):
self.fred_code_map = dict([s.split("\t") for s in fred_forex_codes.split("\n")[1:-1]])
self.boe_code_map = dict([s.split("\t") for s in boe_forex_codes.split("\n")[1:-1]])
self.boe_code_map["ILS"] = self.boe_code_map["NIS"]
super().__init__(source)
def fetch(self, symbol, conf):
assert len(symbol.name) == 6
_from = symbol.name[:3]
_to = symbol.name[3:]
if _to != "USD" and _from != "USD":
raise Exception("Can only convert to/from USD")
invert = _from == "USD"
curr = _to if invert else _from
div100 = 1
if curr == "GBC":
div100 = 100
curr = "GBP"
if curr in self.fred_code_map:
code = self.fred_code_map[curr]
df = quandl.get("FRED/" + code)
if code.endswith("US") != invert: # some of the FRED currencies are inverted vs the US dollar, argh..
df = df.apply(lambda x: 1.0/x)
return df / div100
if curr in self.boe_code_map:
code = self.boe_code_map[curr]
df = quandl.get("BOE/" + code)
if not invert: # not sure if some of BEO currencies are NOT inverted vs USD, checked a few and they weren't
df = df.apply(lambda x: 1.0/x)
return df / div100
raise Exception("Currency pair is not supported: " + symbol.name)
def process(self, symbol, df, conf):
return df.iloc[:, 0]
# https://github.com/ranaroussi/fix-yahoo-finance
class YahooDataSource(DataSource):
def fetch(self, symbol, conf):
return pdr.get_data_yahoo(symbol.name, progress=False, actions=True)
def process(self, symbol, df, conf):
if not conf.splitAdj:
assert not conf.divAdj
# Yahoo "Close" data is split adjusted.
# We find the unadjusted data using the splits data
splitMul = df["Stock Splits"][::-1].cumprod().shift().fillna(method="bfill")
return df["Close"] / splitMul
assert conf.splitAdj and conf.divAdj
return df["Adj Close"]
class QuandlDataSource(DataSource):
def fetch(self, symbol, conf):
return quandl.get(symbol.name)
def process(self, symbol, df, conf):
if "Close" in df.columns:
return df["Close"]
return df.iloc[:, 0]
class GoogleDataSource(DataSource):
def fetch(self, symbol, conf):
return pandas_datareader.data.DataReader(symbol.name, 'google')
def process(self, symbol, df, conf):
return df["Close"]
AV_API_KEY = 'BB18'
class AlphaVantageDataSource(DataSource):
def fetch(self, symbol, conf):
ts = TimeSeries(key=AV_API_KEY, output_format='pandas')
df, meta_data = ts.get_daily_adjusted(symbol.name, outputsize="full")
df.index = pd.to_datetime(df.index, format="%Y-%m-%d")
return df
def process(self, symbol, df, conf):
return df["5. adjusted close"]
class AlphaVantageCryptoDataSource(DataSource):
def fetch(self, symbol, conf):
cc = CryptoCurrencies(key=AV_API_KEY, output_format='pandas')
df, meta_data = cc.get_digital_currency_daily(symbol=symbol.name, market='USD')
df.index = pd.to_datetime(df.index, format="%Y-%m-%d")
return df
def process(self, symbol, df, conf):
return df['4a. close (USD)']
class CryptoCompareDataSource(DataSource):
def fetch(self, symbol, conf):
url = "https://min-api.cryptocompare.com/data/histoday?fsym=__sym__&tsym=USD&limit=600000&aggregate=1&e=CCCAGG"
d = json.loads(requests.get(url.replace("__sym__", symbol.name)).text)
df = pd.DataFrame(d["Data"])
if len(df) == 0:
return None
df["time"] = pd.to_datetime(df.time, unit="s")
df.set_index("time", inplace=True)
return df
def process(self, symbol, df, conf):
return df.close
# NOTE: data is SPLIT adjusted, but has no dividends and is NOT DIVIDEND adjusted
# NOTE: it has data all the way to the start, but returned result is capped in length for ~20 years
# and results are trimmed from the END, not from the start. TBD to handle this properly.
# for now we start at 1.1.2000
class InvestingComDataSource(DataSource):
def getUrl(self, symbol):
symbol = symbol.name
data = {
'search_text': symbol,
'term': symbol,
'country_id': '0',
'tab_id': 'All'
}
headers = {
'Origin': 'https://www.investing.com',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9,he;q=0.8',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Referer': 'https://www.investing.com/search?q=' + symbol,
'X-Requested-With': 'XMLHttpRequest',
'Connection': 'keep-alive'
}
r = requests.post("https://www.investing.com/search/service/search", data=data, headers=headers)
res = r.text
res = json.loads(res)
return res["All"][0]["link"]
def getCodes(self, url):
url = "https://www.investing.com" + url + "-historical-data"
headers = {
'Origin': 'https://www.investing.com',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9,he;q=0.8',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Referer': 'https://www.investing.com/',
'X-Requested-With': 'XMLHttpRequest',
'Connection': 'keep-alive'
}
r = requests.get(url,headers=headers)
text = r.text
m = re.search("smlId:\s+(\d+)", text)
smlId = m.group(1)
m = re.search("pairId:\s+(\d+)", text)
pairId = m.group(1)
return pairId, smlId
def getHtml(self, pairId, smlId):
data = [
'curr_id=' + pairId,
'smlID=' + smlId,
'header=',
'st_date=01%2F01%2F2000',
'end_date=01%2F01%2F2100',
'interval_sec=Daily',
'sort_col=date',
'sort_ord=DESC',
'action=historical_data'
]
data = "&".join(data)
headers = {
'Origin': 'https://www.investing.com',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9,he;q=0.8',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded',
'Accept': 'text/plain, */*; q=0.01',
'Referer': 'https://www.investing.com/',
'X-Requested-With': 'XMLHttpRequest',
'Connection': 'keep-alive'
}
r = requests.post("https://www.investing.com/instruments/HistoricalDataAjax", data=data, headers=headers)
return r.text
def fetch(self, symbol, conf):
symbolUrl = self.getUrl(symbol)
pairId, smlId = self.getCodes(symbolUrl)
html = self.getHtml(pairId, smlId)
#print(html)
parsed_html = BeautifulSoup(html, "lxml")
df = pd.DataFrame(columns=["date", "price"])
for i, tr in enumerate(parsed_html.find_all("tr")[1:]): # skip header
data = [x.get("data-real-value") for x in tr.find_all("td")]
if len(data) == 0 or data[0] is None:
continue
date = datetime.datetime.utcfromtimestamp(int(data[0]))
close = float(data[1].replace(",", ""))
#open = data[2]
#high = data[3]
#low = data[4]
#volume = data[5]
df.loc[i] = [date, close]
df = df.set_index("date")
return df
def process(self, symbol, df, conf):
return df['price']
# fetching data
if not "Wrapper" in locals():
class Wrapper(object):
def __init__(self, s):
#self.s = s
object.__setattr__(self, "s", s)
def __getattr__(self, name):
attr = self.s.__getattribute__(name)
if hasattr(attr, '__call__'):
def newfunc(*args, **kwargs):
result = attr(*args, **kwargs)
if type(result) is pd.Series:
result = Wrapper(result)
return result
return newfunc
if type(attr) is pd.Series:
attr = Wrapper(attr)
return attr
def __setattr__(self, name, value):
self.s.__setattr__(name, value)
def __getitem__(self, item):
return wrap(self.s.__getitem__(item), self.s.name)
# def __truediv__(self, other):
# divisor = other
# if type(other) is Wrapper:
# divisor = other.s
# series = self.s / divisor
# name = self.name
# if type(other) is Wrapper:
# name = self.s.name + " / " + other.s.name
# return wrap(series, name)
def __truediv__(self, other):
return Wrapper.doop(self, other, "/", lambda x, y: x / y)
def __rtruediv__(self, other):
return Wrapper.doop(self, other, "/", lambda x, y: x / y, right=True)
def doop(self, other, opname, opLambda, right=False):
divisor = other
if type(other) is Wrapper:
divisor = other.s
if right:
series = opLambda(divisor, self.s)
else:
series = opLambda(self.s, divisor)
name = self.name
if type(other) is Wrapper:
if right:
name = other.s.name + " " + opname + " " + self.s.name
else:
name = self.s.name + " " + opname + " " + other.s.name
return wrap(series, name)
def __sub__(self, other):
return Wrapper.doop(self, other, "-", lambda x, y: x - y)
#def __rsub__(self, other):
# return Wrapper.doop(self, other, "-", lambda x, y: x - y, right=True)
def __mul__(self, other):
return Wrapper.doop(self, other, "*", lambda x, y: x * y)
def __rmul__(self, other):
return Wrapper.doop(self, other, "*", lambda x, y: x * y, right=True)
def wrap(s, name=""):
name = name or s.name
if not name:
raise Exception("no name")
if isinstance(s, pd.Series):
s = Wrapper(s)
s.name = name
return s
data_sources = {
"Y": YahooDataSource("Y"),
"IC": InvestingComDataSource("IC"),
"Q": QuandlDataSource("Q"),
"AV": AlphaVantageDataSource("AV"),
"CC": CryptoCompareDataSource("CC"),
"CCAV": AlphaVantageCryptoDataSource("CCAV"),
"CUR": ForexDataSource("CUR"),
"G": GoogleDataSource("G")
}
def getFrom(symbol, conf):
# special handling for forex
# if a match, if will recurse and return here with XXXUSD@CUR
if len(symbol.name) == 6 and not symbol.source:
parts = symbol.name[:3], symbol.name[3:]
if parts[0] == "USD" or parts[1] == "USD":
return wrap(getForex(parts[0], parts[1]), symbol.name)
source = symbol.source or "Y"
if not source in data_sources:
raise Exception("Unsupported source: " + source)
if not conf.secondary:
return data_sources[source].get(symbol, conf)
try:
return data_sources[source].get(symbol, conf)
except:
print("Failed to fetch {0} from {1}, trying from {2} .. ".format(symbol, source, conf.secondary), end="")
res = data_sources[conf.secondary].get(symbol, conf)
print("DONE")
return res
def format_filename(s):
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
filename = ''.join(c for c in s if c in valid_chars)
filename = filename.replace(' ','_')
return filename
def cache_file(symbol, source):
filepath = os.path.join("symbols", source, format_filename(symbol.name))
dirpath = os.path.dirname(filepath)
if not os.path.exists(dirpath):
os.makedirs(dirpath)
return filepath
symbols_mem_cache = {}
def cache_get(symbol, source):
if symbol.name in symbols_mem_cache:
return symbols_mem_cache[symbol.name]
filepath = cache_file(symbol, source)
if os.path.exists(filepath):
#res = pd.read_csv(filepath, squeeze=True, names=["date", "value"], index_col="date")
res = pd.read_csv(filepath, squeeze=False, index_col="date")
res.index = pd.to_datetime(res.index, format="%Y-%m-%d")
symbols_mem_cache[symbol.name] = res
return res
return None
def cache_set(symbol, source, s):
filepath = cache_file(symbol, source)
s.to_csv(filepath, date_format="%Y-%m-%d", index_label="date")
def get_port(d, name=None):
if isinstance(d, str):
res = parse_portfolio_def(d)
if not res:
raise Exception("Invalid portfolio definition: " + d)
d = res
if not isinstance(d, dict):
raise Exception("Portfolio definition must be str or dict, was: " + type(d))
df = pd.DataFrame(logret(get(k).s)*v/100 for k,v in d.items()).T.dropna()
res = Wrapper(i_logret(df.sum(axis=1)))
res.name = name
return res
def parse_portfolio_def(s):
d = {}
parts = s.split("|")
for p in parts:
parts2 = p.split(":")
if len(parts2) != 2:
return None
d[parts2[0]] = float(parts2[1])
return d
def get(symbol, cache=True, splitAdj=True, divAdj=True, adj=None, secondary="AV"):
global conf_cache
if isinstance(symbol, Wrapper) or isinstance(symbol, pd.Series):
return symbol
if "ignoredAssets" in globals() and ignoredAssets and symbol in ignoredAssets:
return wrap(pd.Series(), "<empty>")
# special handing for composite portfolios
port = parse_portfolio_def(symbol)
if port:
return get_port(port, symbol)
symbol = toSymbol(symbol)
if adj == False:
splitAdj = False
divAdj = False
s = getFrom(symbol, GetConf(splitAdj, divAdj, cache, secondary))
return wrap(s, symbol.fullname)
# def __getattribute__(self,name):
# s = object.__getattribute__(self, "s")
# if name == "s":
# return s
# attr = s.__getattribute__(name)
# if hasattr(attr, '__call__'):
# def newfunc(*args, **kwargs):
# result = attr(*args, **kwargs)
# if type(result) is pd.Series:
# result = Wrapper(result)
# return result
# return newfunc
# if type(attr) is pd.Series:
# attr = Wrapper(attr)
# return attr
# plotting
from plotly.graph_objs import *
def createVerticalLine(xval):
shape = {
'type': 'line',
'xref': 'x',
'x0': xval,
'x1': xval,
'yref': 'paper',
'y0': 0,
'y1': 1,
#'fillcolor': 'blue',
'opacity': 1,
'line': {
'width': 1,
'color': 'red'
}
}
return shape
def createHorizontalLine(yval):
shape = {
'type': 'line',
'xref': 'paper',
'x0': 0,
'x1': 1,
'yref': 'x',
'y0': yval,
'y1': yval,
#'fillcolor': 'blue',
'opacity': 1,
'line': {
'width': 1,
'color': 'red'
}
}
return shape
def plot(*arr, log=True, title=None):
data = []
shapes = []
for val in arr:
if isinstance(val, Wrapper) or isinstance(val, pd.Series):
data.append(go.Scatter(x=val.index, y=val, name=val.name))
elif isinstance(val, datetime.datetime):
shapes.append(createVerticalLine(val))
elif isinstance(val, np.datetime64):
shapes.append(createVerticalLine(val.astype(datetime.datetime)))
elif isinstance(val, numbers.Real):
shapes.append(createHorizontalLine(val))
else:
raise Exception("unsupported value type: " + str(type(val)))
for d in data:
d = d.y
if isinstance(d, Wrapper):
d = d.s
if np.any(d <= 0):
log = False
mar = 30
margin=go.Margin(
l=mar,
r=mar,
b=mar,
t=mar,
pad=0
)
legend=dict(x=0,y=1,traceorder='normal',
bgcolor='#FFFFFFBB',bordercolor='#888888',borderwidth=1,
font=dict(family='sans-serif',size=12,color='#000'),
)
yaxisScale = "log" if log else None
layout = go.Layout(legend=legend, margin=margin, yaxis=dict(type=yaxisScale, autorange=True), shapes=shapes, title=title)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)
# show a stacked area chart normalized to 100% of multiple time series
def plotly_area(df, title=None):
tt = df.div(df.sum(axis=1), axis=0)*100 # normalize to summ 100
tt = tt.reindex(tt.mean().sort_values(ascending=False).index, axis=1) # sort columns by mean value
tt = tt.sort_index()
tt2 = tt.cumsum(axis=1) # calc cum-sum
data = []
for col in tt2:
s = tt2[col]
trace = go.Scatter(
name=col,
x=s.index.to_datetime(),
y=s.values,
text=["{:.1f}%".format(v) for v in tt[col].values], # use text as non-cumsum values
hoverinfo='name+x+text',
mode='lines',
fill='tonexty'
)
data.append(trace)
mar = 30
margin=go.Margin(l=mar,r=mar,b=mar,t=mar,pad=0)
legend=dict(x=0,y=1,traceorder='reversed',
bgcolor='#FFFFFFBB',bordercolor='#888888',borderwidth=1,
font=dict(family='sans-serif',size=12,color='#000'),
)
layout = go.Layout(margin=margin, legend=legend, title=title,
#showlegend=True,
xaxis=dict(
type='date',
),
yaxis=dict(
type='linear',
range=[1, 100],
dtick=20,
ticksuffix='%'
)
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='stacked-area-plot')
# data processing
def _start(s):
return s.index[0]
def _end(s):
return s.index[-1]
def getCommonDate(data, alldata=False):
if alldata:
l = [_start(s) for s in data if isinstance(s, Wrapper) or isinstance(s, pd.Series)]
else:
l = [_start(s) for s in data if isinstance(s, Wrapper)]
if not l:
return None
return max(l)
def doTrim(data, alldata=False):
date = getCommonDate(data, alldata=alldata)
if date is None:
return data
newArr = []
for s in data:
if isinstance(s, Wrapper) or (alldata and isinstance(s, pd.Series)):
s = s[date:]
newArr.append(s)
return newArr
def doAlign(data):
date = getCommonDate(data)
if date is None:
return data
newArr = []
for s in data:
if isinstance(s, Wrapper):
s = s / s[date]
newArr.append(s)
return newArr
def doClean(data):
return [s.dropna() if isinstance(s, Wrapper) else s for s in data]
def show(*data, trim=True, align=True, ta=True, **plotArgs):
items = []
# intercept "cache" arguemnt
cache = plotArgs.get("cache", None)
if cache != None:
del plotArgs["cache"]
for x in data:
if isinstance(x, pd.DataFrame):
items += [x[c] for c in x]
elif isinstance(x, datetime.datetime) or isinstance(x, np.datetime64):
items.append(x)
elif isinstance(x, numbers.Real):
items.append(x)
else:
x = get(x, cache) if cache != None else get(x)
items.append(x)
data = items
#data = [get(s) for s in data] # converts string to symbols
data = doClean(data)
if not ta:
trim = False
align = False
if trim: data = doTrim(data)
if align: data = doAlign(data)
plot(*data, **plotArgs)
def ma(s, n):
return s.rolling(n).mean()
def mm(s, n):
return s.rolling(n).median()
def ret(s):
return s.pct_change()
def logret(s):
res = np.log(s) - np.log(s.shift(1))
res.name = "logret(" + s.name + ")"
return res
def i_logret(s):
return np.exp(np.cumsum(s))
def lrret(regressors, target, sum1=False):
regressors = [get(x) for x in regressors]
target = get(target)
all = [logret(x).s for x in (regressors + [target])]
# based on: https://stats.stackexchange.com/questions/21565/how-do-i-fit-a-constrained-regression-in-r-so-that-coefficients-total-1?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
# NOTE: note finished, not working
if sum1:
allOrig = all
last = all[-2]
all = [r - last for r in (all[:-2] + [all[-1]])]
data = pd.DataFrame(all).T
data = data.dropna()
y = data.iloc[:, -1]
X = data.iloc[:, :-1]
regr = linear_model.LinearRegression(fit_intercept=False)
regr.fit(X, y)
if sum1:
weights = np.append(regr.coef_, 1-np.sum(regr.coef_))
all = allOrig
data = pd.DataFrame(all).T
data = data.dropna()
y = data.iloc[:, -1]
X = data.iloc[:, :-1]
regr = linear_model.LinearRegression(fit_intercept=False)
regr.fit(X, y)
regr.coef_ = weights
y_pred = regr.predict(X)
print('Regressors:', [s.name for s in regressors])
print('Coefficients:', regr.coef_)
#print('Coefficients*:', list(regr.coef_) + [1-np.sum(regr.coef_)])
#print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred))
print('Variance score r^2: %.3f' % sk.metrics.r2_score(y, y_pred))
y_pred = i_logret(pd.Series(y_pred, X.index))
y_pred.name = target.name + " fit"
#y_pred = "fit"
y_pred = Wrapper(y_pred)
show(target , y_pred)
return y_pred
def dd(x):
if isinstance(x, Wrapper): # not sure why Wrapper doesn't work
x = x.s
res = (x / np.maximum.accumulate(x) - 1) * 100
return res
from IPython.core.display import Javascript
import time, os, stat
def publish(name=None):
def file_age_in_seconds(pathname):
return time.time() - os.stat(pathname)[stat.ST_MTIME]
filename = !ls -t *.ipynb | grep -v /$ | head -1
filename = filename[0]
age = file_age_in_seconds(filename)
min_age = 5
if age > min_age:
print(filename + " file age is " + str(age) + " seconds, auto saving current notebook ..")
Javascript('console.log(document.querySelector("div#save-notbook button").click())')
print("save requested, sleeping to ensure execution ..")
time.sleep(15)
print("done")
filename = !ls -t *.ipynb | grep -v /$ | head -1
filename = filename[0]
if not name:
name = str(uuid.uuid4().hex.upper())
save()
print("Publishing " + filename + " ..")
res = subprocess.call(['bash', './publish.sh', name])
if res == 0:
print("published successfuly!")
print("https://nbviewer.jupyter.org/github/ertpload/test/blob/master/__name__.ipynb".replace("__name__", name))
else:
print("Failed!")
from IPython.display import display,Javascript
def save():
display(Javascript('IPython.notebook.save_checkpoint();'))
# make the plotly graphs look wider on mobile
from IPython.core.display import display, HTML
s = """
<style>
div.rendered_html {
max-width: 10000px;
}
</style>
"""
display(HTML(s))
# interception to auto-fetch hardcoded symbols e.g:
# show(SPY)
# this should run last in the framework code, or it attempts to download unrelated symbols :)
from IPython.core.inputtransformer import *
intercept = True
if intercept and not "my_transformer_tokens_instance" in locals():
#print("transformation hook init")
attempted_implied_fetches = set()
ip = get_ipython()
@StatelessInputTransformer.wrap
def my_transformer(line):
if line.startswith("x"):
return "specialcommand(" + repr(line) + ")"
return line
@TokenInputTransformer.wrap
def my_transformer_tokens(tokens):
for i, x in enumerate(tokens):
if x.type == 1 and x.string.isupper() and x.string.isalpha(): ## type=1 is NAME token
if i < len(tokens)-1 and tokens[i+1].type == 53 and tokens[i+1].string == "=":
attempted_implied_fetches.add(x.string)
continue
if x.string in attempted_implied_fetches or x.string in ip.user_ns:
continue
try:
ip.user_ns[x.string] = get(x.string)
except:
print("Failed to fetch implied symbol: " + x.string)
attempted_implied_fetches.add(x.string)
return tokens
my_transformer_tokens_instance = my_transformer_tokens()
ip.input_splitter.logical_line_transforms.append(my_transformer_tokens_instance)
ip.input_transformer_manager.logical_line_transforms.append(my_transformer_tokens_instance)
def date(s):
return pd.to_datetime(s, format="%Y-%m-%d")
another options for interception:
class VarWatcher(object):
def __init__(self, ip):
self.shell = ip
self.last_x = None
def pre_execute(self):
if False:
for k in dir(self.shell):
print(k, ":", getattr(self.shell, k))
print()
#print("\n".join(dir(self.shell)))
if "content" in self.shell.parent_header:
code = self.shell.parent_header['content']['code']
self.shell.user_ns[code] = 42
#print(self.shell.user_ns.get('ASDF', None))
def post_execute(self):
pass
#if self.shell.user_ns.get('x', None) != self.last_x:
# print("x changed!")
def load_ipython_extension(ip):
vw = VarWatcher(ip)
ip.events.register('pre_execute', vw.pre_execute)
ip.events.register('post_execute', vw.post_execute)
ip = get_ipython()
load_ipython_extension(ip)