#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_cell_magic('bash', '', 'echo "Installing requirements .."\npip install pandas==0.22.0 quandl pandas_datareader alpha_vantage matplotlib plotly sklearn scipy fix_yahoo_finance statsmodels beautifulsoup4 > /dev/null 2>&1\n# NOTE: we use pandas 0.22 for now since pandas_datareader don\'t support 0.23 yet\necho "Done"\n') # In[61]: import os import datetime import numbers import subprocess import uuid import string import json import requests from io import StringIO import re import pandas as pd import numpy as np import sklearn as sk from sklearn import linear_model import quandl import pandas_datareader from pandas_datareader import data as pdr import fix_yahoo_finance as yf yf.pdr_override() # <== that's all it takes :-) import alpha_vantage from alpha_vantage.timeseries import TimeSeries from alpha_vantage.cryptocurrencies import CryptoCurrencies import matplotlib import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') matplotlib.rcParams['figure.figsize'] = (20.0, 10.0) # Make plots bigger import plotly.offline as py import plotly.graph_objs as go py.init_notebook_mode() from pathlib import Path from bs4 import BeautifulSoup # In[3]: from IPython.core.interactiveshell import InteractiveShell InteractiveShell.ast_node_interactivity = "all" pd.set_option('display.float_format', lambda x: '{:,.2f}'.format(x)) pd.set_option('display.max_rows', 5000) pd.set_option('display.max_columns', 500) # In[4]: def pd_from_dict(d): return pd.DataFrame.from_dict(d, orient='index').T.sort_index() # In[5]: # (hack) Global configs conf_cache = None class GetConf: def __init__(self, splitAdj, divAdj, cache, secondary): self.splitAdj = splitAdj self.divAdj = divAdj self.cache = cache self.secondary = secondary # In[6]: if not "fetchCache" in globals(): fetchCache = {} # In[7]: class Symbol: def __init__(self, fullname): self.fullname = fullname parts = fullname.split("!") if len(parts) == 2: fullname = parts[0] self.currency = parts[1] else: self.currency = "" parts = fullname.split("@") self.name = parts[0] if len(parts) == 2: self.source = parts[1] else: self.source = "" def __str__(self): return self.fullname # In[8]: import scipy.optimize from datetime import datetime as dt def xnpv(rate, values, dates): '''Equivalent of Excel's XNPV function. >>> from datetime import date >>> dates = [date(2010, 12, 29), date(2012, 1, 25), date(2012, 3, 8)] >>> values = [-10000, 20, 10100] >>> xnpv(0.1, values, dates) -966.4345... ''' if rate <= -1.0: return float('inf') d0 = dates[0] # or min(dates) return sum([ vi / (1.0 + rate)**((di - d0).days / 365.0) for vi, di in zip(values, dates)]) def xirr(values, dates): '''Equivalent of Excel's XIRR function. >>> from datetime import date >>> dates = [date(2010, 12, 29), date(2012, 1, 25), date(2012, 3, 8)] >>> values = [-10000, 20, 10100] >>> xirr(values, dates) 0.0100612... ''' # we prefer to try brentq first as newton keeps outputting tolerance warnings try: return scipy.optimize.brentq(lambda r: xnpv(r, values, dates), -1.0, 1e10) #return scipy.optimize.newton(lambda r: xnpv(r, values, dates), 0.0, tol=0.0002) except RuntimeError: # Failed to converge? return scipy.optimize.newton(lambda r: xnpv(r, values, dates), 0.0, tol=0.0002) #return scipy.optimize.brentq(lambda r: xnpv(r, values, dates), -1.0, 1e10) #xirr([-100, 100, 200], [dt(2000, 1, 1), dt(2001, 1, 1), dt(2002, 1, 1)]) # In[9]: def curr_price(symbol): if symbol in ignoredAssets: return 0 return get(symbol)[-1] #def getForex(fromCur, toCur): # if fromCur == toCur: return 1 # if toCur == "USD": # return get(fromCur + "=X", "Y") # if fromCur == "USD": # return get(toCur + "=X", "Y").map(lambda x: 1.0/x) def getForex(fromCur, toCur): if fromCur == toCur: return 1 tmp = get(fromCur + toCur + "@CUR").s tmp = tmp.reindex(pd.date_range(start=tmp.index[0], end=tmp.index[-1])) tmp = tmp.fillna(method="ffill") return tmp #return wrap(tmp, fromCur+toCur) def convert(value, fromCur, toCur): if fromCur == toCur: return value return value * getForex(fromCur, toCur)[-1] # In[98]: def toSymbol(sym): if isinstance(sym, Symbol): return sym if isinstance(sym, str): return Symbol(sym) assert False, "invalid type for Symbol: " + str(type(sym)) + ", " + str(sym) class DataSource: def __init__(self, source): self.source = source def fetch(self, symbol, conf): pass def process(self, symbol, df, conf): pass def get(self, symbol, conf): global conf_cache df = None failpath = cache_file(symbol, self.source) + "._FAIL_" if os.path.isfile(failpath): mtime = datetime.datetime.fromtimestamp(os.path.getmtime(failpath)) diff = datetime.datetime.now() - mtime if diff.total_seconds() <= 24 * 3600: raise Exception("Fetching has previously failed for {0}, will try again later".format(symbol)) useCache = conf.cache if conf_cache != None: useCache = conf_cache if useCache: df = cache_get(symbol, self.source) try: # Attempt to actually fetch the symbol if df is None: print("Fetching %s from %s .. " % (symbol, self.source), end="") df = self.fetch(symbol, conf) print("DONE") if df is None: print("FAILED") raise Exception("Failed to fetch symbol: " + str(symbol) + " from " + self.source) if len(df) == 0: print("FAILED") raise Exception("Symbol fetched but is empty: " + str(symbol) + " from " + self.source) except Exception as e: # save a note that we failed Path(failpath).touch() raise Exception from e cache_set(symbol, self.source, df) res = self.process(symbol, df, conf) return res.sort_index() fred_forex_codes = """ AUD DEXUSAL BRL DEXBZUS GBP DEXUSUK CAD DEXCAUS CNY DEXCHUS DKK DEXDNUS EUR DEXUSEU HKD DEXHKUS INR DEXINUS JPY DEXJPUS MYR DEXMAUS MXN DEXMXUS TWD DEXTAUS NOK DEXNOUS SGD DEXSIUS ZAR DEXSFUS KRW DEXKOUS LKR DEXSLUS SEK DEXSDUS CHF DEXSZUS VEF DEXVZUS """ boe_forex_codes = """ AUD XUDLADD CAD XUDLCDD CNY XUDLBK73 CZK XUDLBK27 DKK XUDLDKD HKD XUDLHDD HUF XUDLBK35 INR XUDLBK64 NIS XUDLBK65 JPY XUDLJYD LTL XUDLBK38 MYR XUDLBK66 NZD XUDLNDD NOK XUDLNKD PLN XUDLBK49 GBP XUDLGBD RUB XUDLBK69 SAR XUDLSRD SGD XUDLSGD ZAR XUDLZRD KRW XUDLBK74 SEK XUDLSKD CHF XUDLSFD TWD XUDLTWD THB XUDLBK72 TRY XUDLBK75 """ # https://blog.quandl.com/api-for-currency-data class ForexDataSource(DataSource): def __init__(self, source): self.fred_code_map = dict([s.split("\t") for s in fred_forex_codes.split("\n")[1:-1]]) self.boe_code_map = dict([s.split("\t") for s in boe_forex_codes.split("\n")[1:-1]]) self.boe_code_map["ILS"] = self.boe_code_map["NIS"] super().__init__(source) def fetch(self, symbol, conf): assert len(symbol.name) == 6 _from = symbol.name[:3] _to = symbol.name[3:] if _to != "USD" and _from != "USD": raise Exception("Can only convert to/from USD") invert = _from == "USD" curr = _to if invert else _from div100 = 1 if curr == "GBC": div100 = 100 curr = "GBP" if curr in self.fred_code_map: code = self.fred_code_map[curr] df = quandl.get("FRED/" + code) if code.endswith("US") != invert: # some of the FRED currencies are inverted vs the US dollar, argh.. df = df.apply(lambda x: 1.0/x) return df / div100 if curr in self.boe_code_map: code = self.boe_code_map[curr] df = quandl.get("BOE/" + code) if not invert: # not sure if some of BEO currencies are NOT inverted vs USD, checked a few and they weren't df = df.apply(lambda x: 1.0/x) return df / div100 raise Exception("Currency pair is not supported: " + symbol.name) def process(self, symbol, df, conf): return df.iloc[:, 0] # https://github.com/ranaroussi/fix-yahoo-finance class YahooDataSource(DataSource): def fetch(self, symbol, conf): return pdr.get_data_yahoo(symbol.name, progress=False, actions=True) def process(self, symbol, df, conf): if not conf.splitAdj: assert not conf.divAdj # Yahoo "Close" data is split adjusted. # We find the unadjusted data using the splits data splitMul = df["Stock Splits"][::-1].cumprod().shift().fillna(method="bfill") return df["Close"] / splitMul assert conf.splitAdj and conf.divAdj return df["Adj Close"] class QuandlDataSource(DataSource): def fetch(self, symbol, conf): return quandl.get(symbol.name) def process(self, symbol, df, conf): if "Close" in df.columns: return df["Close"] return df.iloc[:, 0] class GoogleDataSource(DataSource): def fetch(self, symbol, conf): return pandas_datareader.data.DataReader(symbol.name, 'google') def process(self, symbol, df, conf): return df["Close"] AV_API_KEY = 'BB18' class AlphaVantageDataSource(DataSource): def fetch(self, symbol, conf): ts = TimeSeries(key=AV_API_KEY, output_format='pandas') df, meta_data = ts.get_daily_adjusted(symbol.name, outputsize="full") df.index = pd.to_datetime(df.index, format="%Y-%m-%d") return df def process(self, symbol, df, conf): return df["5. adjusted close"] class AlphaVantageCryptoDataSource(DataSource): def fetch(self, symbol, conf): cc = CryptoCurrencies(key=AV_API_KEY, output_format='pandas') df, meta_data = cc.get_digital_currency_daily(symbol=symbol.name, market='USD') df.index = pd.to_datetime(df.index, format="%Y-%m-%d") return df def process(self, symbol, df, conf): return df['4a. close (USD)'] class CryptoCompareDataSource(DataSource): def fetch(self, symbol, conf): url = "https://min-api.cryptocompare.com/data/histoday?fsym=__sym__&tsym=USD&limit=600000&aggregate=1&e=CCCAGG" d = json.loads(requests.get(url.replace("__sym__", symbol.name)).text) df = pd.DataFrame(d["Data"]) if len(df) == 0: return None df["time"] = pd.to_datetime(df.time, unit="s") df.set_index("time", inplace=True) return df def process(self, symbol, df, conf): return df.close # NOTE: data is SPLIT adjusted, but has no dividends and is NOT DIVIDEND adjusted # NOTE: it has data all the way to the start, but returned result is capped in length for ~20 years # and results are trimmed from the END, not from the start. TBD to handle this properly. # for now we start at 1.1.2000 class InvestingComDataSource(DataSource): def getUrl(self, symbol): symbol = symbol.name data = { 'search_text': symbol, 'term': symbol, 'country_id': '0', 'tab_id': 'All' } headers = { 'Origin': 'https://www.investing.com', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.9,he;q=0.8', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Referer': 'https://www.investing.com/search?q=' + symbol, 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive' } r = requests.post("https://www.investing.com/search/service/search", data=data, headers=headers) res = r.text res = json.loads(res) return res["All"][0]["link"] def getCodes(self, url): url = "https://www.investing.com" + url + "-historical-data" headers = { 'Origin': 'https://www.investing.com', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.9,he;q=0.8', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Referer': 'https://www.investing.com/', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive' } r = requests.get(url,headers=headers) text = r.text m = re.search("smlId:\s+(\d+)", text) smlId = m.group(1) m = re.search("pairId:\s+(\d+)", text) pairId = m.group(1) return pairId, smlId def getHtml(self, pairId, smlId): data = [ 'curr_id=' + pairId, 'smlID=' + smlId, 'header=', 'st_date=01%2F01%2F2000', 'end_date=01%2F01%2F2100', 'interval_sec=Daily', 'sort_col=date', 'sort_ord=DESC', 'action=historical_data' ] data = "&".join(data) headers = { 'Origin': 'https://www.investing.com', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.9,he;q=0.8', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'text/plain, */*; q=0.01', 'Referer': 'https://www.investing.com/', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive' } r = requests.post("https://www.investing.com/instruments/HistoricalDataAjax", data=data, headers=headers) return r.text def fetch(self, symbol, conf): symbolUrl = self.getUrl(symbol) pairId, smlId = self.getCodes(symbolUrl) html = self.getHtml(pairId, smlId) #print(html) parsed_html = BeautifulSoup(html, "lxml") df = pd.DataFrame(columns=["date", "price"]) for i, tr in enumerate(parsed_html.find_all("tr")[1:]): # skip header data = [x.get("data-real-value") for x in tr.find_all("td")] if len(data) == 0 or data[0] is None: continue date = datetime.datetime.utcfromtimestamp(int(data[0])) close = float(data[1].replace(",", "")) #open = data[2] #high = data[3] #low = data[4] #volume = data[5] df.loc[i] = [date, close] df = df.set_index("date") return df def process(self, symbol, df, conf): return df['price'] # In[ ]: # fetching data if not "Wrapper" in locals(): class Wrapper(object): def __init__(self, s): #self.s = s object.__setattr__(self, "s", s) def __getattr__(self, name): attr = self.s.__getattribute__(name) if hasattr(attr, '__call__'): def newfunc(*args, **kwargs): result = attr(*args, **kwargs) if type(result) is pd.Series: result = Wrapper(result) return result return newfunc if type(attr) is pd.Series: attr = Wrapper(attr) return attr def __setattr__(self, name, value): self.s.__setattr__(name, value) def __getitem__(self, item): return wrap(self.s.__getitem__(item), self.s.name) # def __truediv__(self, other): # divisor = other # if type(other) is Wrapper: # divisor = other.s # series = self.s / divisor # name = self.name # if type(other) is Wrapper: # name = self.s.name + " / " + other.s.name # return wrap(series, name) def __truediv__(self, other): return Wrapper.doop(self, other, "/", lambda x, y: x / y) def __rtruediv__(self, other): return Wrapper.doop(self, other, "/", lambda x, y: x / y, right=True) def doop(self, other, opname, opLambda, right=False): divisor = other if type(other) is Wrapper: divisor = other.s if right: series = opLambda(divisor, self.s) else: series = opLambda(self.s, divisor) name = self.name if type(other) is Wrapper: if right: name = other.s.name + " " + opname + " " + self.s.name else: name = self.s.name + " " + opname + " " + other.s.name return wrap(series, name) def __sub__(self, other): return Wrapper.doop(self, other, "-", lambda x, y: x - y) #def __rsub__(self, other): # return Wrapper.doop(self, other, "-", lambda x, y: x - y, right=True) def __mul__(self, other): return Wrapper.doop(self, other, "*", lambda x, y: x * y) def __rmul__(self, other): return Wrapper.doop(self, other, "*", lambda x, y: x * y, right=True) def wrap(s, name=""): name = name or s.name if not name: raise Exception("no name") if isinstance(s, pd.Series): s = Wrapper(s) s.name = name return s data_sources = { "Y": YahooDataSource("Y"), "IC": InvestingComDataSource("IC"), "Q": QuandlDataSource("Q"), "AV": AlphaVantageDataSource("AV"), "CC": CryptoCompareDataSource("CC"), "CCAV": AlphaVantageCryptoDataSource("CCAV"), "CUR": ForexDataSource("CUR"), "G": GoogleDataSource("G") } def getFrom(symbol, conf): # special handling for forex # if a match, if will recurse and return here with XXXUSD@CUR if len(symbol.name) == 6 and not symbol.source: parts = symbol.name[:3], symbol.name[3:] if parts[0] == "USD" or parts[1] == "USD": return wrap(getForex(parts[0], parts[1]), symbol.name) source = symbol.source or "Y" if not source in data_sources: raise Exception("Unsupported source: " + source) if not conf.secondary: return data_sources[source].get(symbol, conf) try: return data_sources[source].get(symbol, conf) except: print("Failed to fetch {0} from {1}, trying from {2} .. ".format(symbol, source, conf.secondary), end="") res = data_sources[conf.secondary].get(symbol, conf) print("DONE") return res def format_filename(s): valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) filename = ''.join(c for c in s if c in valid_chars) filename = filename.replace(' ','_') return filename def cache_file(symbol, source): filepath = os.path.join("symbols", source, format_filename(symbol.name)) dirpath = os.path.dirname(filepath) if not os.path.exists(dirpath): os.makedirs(dirpath) return filepath symbols_mem_cache = {} def cache_get(symbol, source): if symbol.name in symbols_mem_cache: return symbols_mem_cache[symbol.name] filepath = cache_file(symbol, source) if os.path.exists(filepath): #res = pd.read_csv(filepath, squeeze=True, names=["date", "value"], index_col="date") res = pd.read_csv(filepath, squeeze=False, index_col="date") res.index = pd.to_datetime(res.index, format="%Y-%m-%d") symbols_mem_cache[symbol.name] = res return res return None def cache_set(symbol, source, s): filepath = cache_file(symbol, source) s.to_csv(filepath, date_format="%Y-%m-%d", index_label="date") def get_port(d, name=None): if isinstance(d, str): res = parse_portfolio_def(d) if not res: raise Exception("Invalid portfolio definition: " + d) d = res if not isinstance(d, dict): raise Exception("Portfolio definition must be str or dict, was: " + type(d)) df = pd.DataFrame(logret(get(k).s)*v/100 for k,v in d.items()).T.dropna() res = Wrapper(i_logret(df.sum(axis=1))) res.name = name return res def parse_portfolio_def(s): d = {} parts = s.split("|") for p in parts: parts2 = p.split(":") if len(parts2) != 2: return None d[parts2[0]] = float(parts2[1]) return d def get(symbol, cache=True, splitAdj=True, divAdj=True, adj=None, secondary="AV"): global conf_cache if isinstance(symbol, Wrapper) or isinstance(symbol, pd.Series): return symbol if "ignoredAssets" in globals() and ignoredAssets and symbol in ignoredAssets: return wrap(pd.Series(), "") # special handing for composite portfolios port = parse_portfolio_def(symbol) if port: return get_port(port, symbol) symbol = toSymbol(symbol) if adj == False: splitAdj = False divAdj = False s = getFrom(symbol, GetConf(splitAdj, divAdj, cache, secondary)) return wrap(s, symbol.fullname) # In[ ]: # def __getattribute__(self,name): # s = object.__getattribute__(self, "s") # if name == "s": # return s # attr = s.__getattribute__(name) # if hasattr(attr, '__call__'): # def newfunc(*args, **kwargs): # result = attr(*args, **kwargs) # if type(result) is pd.Series: # result = Wrapper(result) # return result # return newfunc # if type(attr) is pd.Series: # attr = Wrapper(attr) # return attr # In[ ]: # plotting from plotly.graph_objs import * def createVerticalLine(xval): shape = { 'type': 'line', 'xref': 'x', 'x0': xval, 'x1': xval, 'yref': 'paper', 'y0': 0, 'y1': 1, #'fillcolor': 'blue', 'opacity': 1, 'line': { 'width': 1, 'color': 'red' } } return shape def createHorizontalLine(yval): shape = { 'type': 'line', 'xref': 'paper', 'x0': 0, 'x1': 1, 'yref': 'x', 'y0': yval, 'y1': yval, #'fillcolor': 'blue', 'opacity': 1, 'line': { 'width': 1, 'color': 'red' } } return shape def plot(*arr, log=True, title=None): data = [] shapes = [] for val in arr: if isinstance(val, Wrapper) or isinstance(val, pd.Series): data.append(go.Scatter(x=val.index, y=val, name=val.name)) elif isinstance(val, datetime.datetime): shapes.append(createVerticalLine(val)) elif isinstance(val, np.datetime64): shapes.append(createVerticalLine(val.astype(datetime.datetime))) elif isinstance(val, numbers.Real): shapes.append(createHorizontalLine(val)) else: raise Exception("unsupported value type: " + str(type(val))) for d in data: d = d.y if isinstance(d, Wrapper): d = d.s if np.any(d <= 0): log = False mar = 30 margin=go.Margin( l=mar, r=mar, b=mar, t=mar, pad=0 ) legend=dict(x=0,y=1,traceorder='normal', bgcolor='#FFFFFFBB',bordercolor='#888888',borderwidth=1, font=dict(family='sans-serif',size=12,color='#000'), ) yaxisScale = "log" if log else None layout = go.Layout(legend=legend, margin=margin, yaxis=dict(type=yaxisScale, autorange=True), shapes=shapes, title=title) fig = go.Figure(data=data, layout=layout) py.iplot(fig) # show a stacked area chart normalized to 100% of multiple time series def plotly_area(df, title=None): tt = df.div(df.sum(axis=1), axis=0)*100 # normalize to summ 100 tt = tt.reindex(tt.mean().sort_values(ascending=False).index, axis=1) # sort columns by mean value tt = tt.sort_index() tt2 = tt.cumsum(axis=1) # calc cum-sum data = [] for col in tt2: s = tt2[col] trace = go.Scatter( name=col, x=s.index.to_datetime(), y=s.values, text=["{:.1f}%".format(v) for v in tt[col].values], # use text as non-cumsum values hoverinfo='name+x+text', mode='lines', fill='tonexty' ) data.append(trace) mar = 30 margin=go.Margin(l=mar,r=mar,b=mar,t=mar,pad=0) legend=dict(x=0,y=1,traceorder='reversed', bgcolor='#FFFFFFBB',bordercolor='#888888',borderwidth=1, font=dict(family='sans-serif',size=12,color='#000'), ) layout = go.Layout(margin=margin, legend=legend, title=title, #showlegend=True, xaxis=dict( type='date', ), yaxis=dict( type='linear', range=[1, 100], dtick=20, ticksuffix='%' ) ) fig = go.Figure(data=data, layout=layout) py.iplot(fig, filename='stacked-area-plot') # In[ ]: # data processing def _start(s): return s.index[0] def _end(s): return s.index[-1] def getCommonDate(data, alldata=False): if alldata: l = [_start(s) for s in data if isinstance(s, Wrapper) or isinstance(s, pd.Series)] else: l = [_start(s) for s in data if isinstance(s, Wrapper)] if not l: return None return max(l) def doTrim(data, alldata=False): date = getCommonDate(data, alldata=alldata) if date is None: return data newArr = [] for s in data: if isinstance(s, Wrapper) or (alldata and isinstance(s, pd.Series)): s = s[date:] newArr.append(s) return newArr def doAlign(data): date = getCommonDate(data) if date is None: return data newArr = [] for s in data: if isinstance(s, Wrapper): s = s / s[date] newArr.append(s) return newArr def doClean(data): return [s.dropna() if isinstance(s, Wrapper) else s for s in data] def show(*data, trim=True, align=True, ta=True, **plotArgs): items = [] # intercept "cache" arguemnt cache = plotArgs.get("cache", None) if cache != None: del plotArgs["cache"] for x in data: if isinstance(x, pd.DataFrame): items += [x[c] for c in x] elif isinstance(x, datetime.datetime) or isinstance(x, np.datetime64): items.append(x) elif isinstance(x, numbers.Real): items.append(x) else: x = get(x, cache) if cache != None else get(x) items.append(x) data = items #data = [get(s) for s in data] # converts string to symbols data = doClean(data) if not ta: trim = False align = False if trim: data = doTrim(data) if align: data = doAlign(data) plot(*data, **plotArgs) def ma(s, n): return s.rolling(n).mean() def mm(s, n): return s.rolling(n).median() def ret(s): return s.pct_change() def logret(s): res = np.log(s) - np.log(s.shift(1)) res.name = "logret(" + s.name + ")" return res def i_logret(s): return np.exp(np.cumsum(s)) def lrret(regressors, target, sum1=False): regressors = [get(x) for x in regressors] target = get(target) all = [logret(x).s for x in (regressors + [target])] # based on: https://stats.stackexchange.com/questions/21565/how-do-i-fit-a-constrained-regression-in-r-so-that-coefficients-total-1?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa # NOTE: note finished, not working if sum1: allOrig = all last = all[-2] all = [r - last for r in (all[:-2] + [all[-1]])] data = pd.DataFrame(all).T data = data.dropna() y = data.iloc[:, -1] X = data.iloc[:, :-1] regr = linear_model.LinearRegression(fit_intercept=False) regr.fit(X, y) if sum1: weights = np.append(regr.coef_, 1-np.sum(regr.coef_)) all = allOrig data = pd.DataFrame(all).T data = data.dropna() y = data.iloc[:, -1] X = data.iloc[:, :-1] regr = linear_model.LinearRegression(fit_intercept=False) regr.fit(X, y) regr.coef_ = weights y_pred = regr.predict(X) print('Regressors:', [s.name for s in regressors]) print('Coefficients:', regr.coef_) #print('Coefficients*:', list(regr.coef_) + [1-np.sum(regr.coef_)]) #print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred)) print('Variance score r^2: %.3f' % sk.metrics.r2_score(y, y_pred)) y_pred = i_logret(pd.Series(y_pred, X.index)) y_pred.name = target.name + " fit" #y_pred = "fit" y_pred = Wrapper(y_pred) show(target , y_pred) return y_pred def dd(x): if isinstance(x, Wrapper): # not sure why Wrapper doesn't work x = x.s res = (x / np.maximum.accumulate(x) - 1) * 100 return res # In[ ]: from IPython.core.display import Javascript import time, os, stat def publish(name=None): def file_age_in_seconds(pathname): return time.time() - os.stat(pathname)[stat.ST_MTIME] filename = get_ipython().getoutput('ls -t *.ipynb | grep -v /$ | head -1') filename = filename[0] age = file_age_in_seconds(filename) min_age = 5 if age > min_age: print(filename + " file age is " + str(age) + " seconds, auto saving current notebook ..") Javascript('console.log(document.querySelector("div#save-notbook button").click())') print("save requested, sleeping to ensure execution ..") time.sleep(15) print("done") filename = get_ipython().getoutput('ls -t *.ipynb | grep -v /$ | head -1') filename = filename[0] if not name: name = str(uuid.uuid4().hex.upper()) save() print("Publishing " + filename + " ..") res = subprocess.call(['bash', './publish.sh', name]) if res == 0: print("published successfuly!") print("https://nbviewer.jupyter.org/github/ertpload/test/blob/master/__name__.ipynb".replace("__name__", name)) else: print("Failed!") # In[ ]: from IPython.display import display,Javascript def save(): display(Javascript('IPython.notebook.save_checkpoint();')) # In[ ]: # make the plotly graphs look wider on mobile from IPython.core.display import display, HTML s = """ """ display(HTML(s)) # In[ ]: # interception to auto-fetch hardcoded symbols e.g: # show(SPY) # this should run last in the framework code, or it attempts to download unrelated symbols :) from IPython.core.inputtransformer import * intercept = True if intercept and not "my_transformer_tokens_instance" in locals(): #print("transformation hook init") attempted_implied_fetches = set() ip = get_ipython() @StatelessInputTransformer.wrap def my_transformer(line): if line.startswith("x"): return "specialcommand(" + repr(line) + ")" return line @TokenInputTransformer.wrap def my_transformer_tokens(tokens): for i, x in enumerate(tokens): if x.type == 1 and x.string.isupper() and x.string.isalpha(): ## type=1 is NAME token if i < len(tokens)-1 and tokens[i+1].type == 53 and tokens[i+1].string == "=": attempted_implied_fetches.add(x.string) continue if x.string in attempted_implied_fetches or x.string in ip.user_ns: continue try: ip.user_ns[x.string] = get(x.string) except: print("Failed to fetch implied symbol: " + x.string) attempted_implied_fetches.add(x.string) return tokens my_transformer_tokens_instance = my_transformer_tokens() ip.input_splitter.logical_line_transforms.append(my_transformer_tokens_instance) ip.input_transformer_manager.logical_line_transforms.append(my_transformer_tokens_instance) # In[ ]: def date(s): return pd.to_datetime(s, format="%Y-%m-%d") # another options for interception: # ```python # class VarWatcher(object): # def __init__(self, ip): # self.shell = ip # self.last_x = None # # def pre_execute(self): # if False: # for k in dir(self.shell): # print(k, ":", getattr(self.shell, k)) # print() # #print("\n".join(dir(self.shell))) # if "content" in self.shell.parent_header: # code = self.shell.parent_header['content']['code'] # self.shell.user_ns[code] = 42 # #print(self.shell.user_ns.get('ASDF', None)) # # def post_execute(self): # pass # #if self.shell.user_ns.get('x', None) != self.last_x: # # print("x changed!") # # def load_ipython_extension(ip): # vw = VarWatcher(ip) # ip.events.register('pre_execute', vw.pre_execute) # ip.events.register('post_execute', vw.post_execute) # # ip = get_ipython() # # load_ipython_extension(ip) # # ```