#!/usr/bin/env python # coding: utf-8 # # Algorithmic Trading Basics # # #
# ## Notebook by [Marco Tavora](https://marcotavora.me/) # # ### Table of contents # # 1. [Summary](#Summary) # 1. [Definitions](#Definitions) # 3. [Modules](#Modules) # 4. [Time Series](#Time-Series) # 5. [Strategy](#Strategy) # 6. [Moving Windows](#Moving-Windows) # ## Summary # [[go back to the top]](#Table-of-contents) # # I will briefly describe: # - How to build a [trend trading](https://en.wikipedia.org/wiki/Trend_following) strategy # - How to backtest it # - How we can optimize it # # ### Definitions # [[go back to the top]](#Table-of-contents) # # # Trend trading can be [defined as](https://en.wikipedia.org/wiki/Trend_following): # # > ... a trading strategy according to which one should buy an asset when its price trend goes up, and sell when its trend goes down, expecting price movements to continue. # # Again borrowing from [Wikipedia](#https://en.wikipedia.org/wiki/Backtesting), backtesting: # > ... seeks to estimate the performance of a strategy or model if it had been employed during a past period. This requires simulating past conditions with sufficient detail, making one limitation of backtesting the need for detailed historical data. # ### Modules # [[go back to the top]](#Table-of-contents) # In[1]: get_ipython().run_line_magic('run', 'modules_algo_trading_v10.ipynb') # ## Data from `yahoo` or `Quandl` # [[go back to the top]](#Table-of-contents) # In[2]: start, end = datetime.datetime(2006, 10, 1), datetime.datetime(2012, 1, 1) apple = pdr.get_data_yahoo('AAPL', start=start, end=end) apple.head() # #### Checking for null values # In[3]: apple.isnull().any().unique() # In[4]: start, end ="2006-10-01", "2011-01-01" apple = quandl.get("WIKI/AAPL", start_date=start, end_date=end) apple.columns = [el.lower().replace('.', '').replace(' ', '_') for el in apple.columns] apple.head() # In[5]: plt.rcParams['figure.figsize'] = 16, 8 apple['close'].plot(grid=False, rot=90); plt.show(); # ## Moving Average Crossover Strategy # [[go back to the top]](#Table-of-contents) # # # In general, a trading strategy involves going into long and/or short positions following some plan. One example, maybe the simplest, is the **moving average crossover** strategy. Following this strategy, one decides to buy or sell when the time series of two moving averages, with different lookback periods, cross. More concretely: # - When the short moving average (SMA) becomes greater than the long moving average (LMA), one enter (i.e. buys, or goes long) # - When the long moving average (LMA) becomes greater than the short moving average (SMA), one exits # # Roughly speaking, the *rationale* behind this strategy is the following. Short-term trends are captured using SMA. When the SMA crosses above the LMA, one identifies a short-term upward trend and the stock is purchased. When the LMA crosses above the SMA one does the opposite. # # An example of the use of moving averages follows. Consider the first five rows and a window of size 3: # In[10]: ten_rows = apple[['close']].head() ten_rows # In[7]: ten_rows['close'].rolling(window=3, min_periods=1,center=False).mean() # The third entry is: # In[12]: window_size = 3 (ten_rows.iloc[0,:] + ten_rows.iloc[1, :] + ten_rows.iloc[2, :])/window_size # Note that the rows with index smaller than the window are unaltered since the moving average needs are least 3 elements (the window size) to be calculated. # In[14]: import fix_yahoo_finance as yf yf.pdr_override() # The code implementation of this strategy consists in the following steps: # - First set the sizes of the short moving window `smw` and long moving window `lmw` # - Create an empty `DataFrame` for signals (called `sig` here) and fill the columns of `sig` with the SMA and LMA values from the `close` price column # # The close price column is: # In[15]: apple[['close']].head() # The two steps above are: # In[16]: smw, lmw = 40, 100 signal_df = pd.DataFrame(index=apple.index, columns = ['sma','lma' ]) signal_df['signal'], signal_df['sma'], signal_df['lma'] = 0.0, 0.0, 0.0 signal_df['sma'] = apple['close'].rolling(window=smw, min_periods=1, center=False).mean() signal_df['lma'] = apple['close'].rolling(window=lmw, min_periods=1, center=False).mean() # - Fill the `signal` column inserting 1s when the value of column `sma` is larger than `lma` only for the period greater than `smw`. # # For that we use the `np.where` function. A simple example of the latter is: # In[17]: lst = np.arange(5,10) print('lst is:', lst) print('Insert 1s in the positions of the elements in lst that are smaller than 7 and insert 0s otherwise:') print(np.where(lst < 7, 1, 0)) # In[18]: signal_df['signal'][smw:] = np.where(signal_df['sma'][smw:] > signal_df['lma'][smw:], 1.0, 0.0) signal_df.iloc[smw:, :].head(6) # - Create a column of positions `pos`. Rows correspondig to long positions will have 1s. Notice below e.g. that on 2000-02-07, `sma`>`lma` and one changes the position (buys the stock). In the following day, one still has `sma`>`lma` so the position is kept and the entry in the position column will be 0. The meaning of the `.diff` method is illustrated below. Since: # # signal_df.iloc[smw:, :]['sma'][4] = 103.934 # signal_df.iloc[smw:, :]['sma'][3] = 103.206 # # -> signal_df.iloc[smw:, :]['sma'][4]-signal_df.iloc[smw:, :]['sma'][3] = 0.728 # In[19]: round(signal_df.iloc[smw:, :]['sma'].diff()[4], 3) round(signal_df.iloc[smw:, :]['sma'][4] - signal_df.iloc[smw:, :]['sma'][3], 3) # In[20]: signal_df['pos'] = signal_df['signal'].diff() signal_df.iloc[smw:, :].head(20) # In[21]: apple.head() # In[22]: apple.shape # In[23]: ylabel, col, cols_ma ='price', 'close', ['sma', 'lma'] afa.plot_function_new(apple, signal_df, ylabel, col, cols_ma, 0, apple.shape[0]) # A subsection of the plot makes the strategy clearer: # In[24]: afa.plot_function_new(apple, signal_df, ylabel, col, cols_ma, 950, 1050) # ## Backtesting Steps # [[go back to the top]](#Table-of-contents) # Steps: # - We first set the initial capital to, say, 2MM # - Buy a $N$ shares # - Initialize the portfolio with value owned initially and store the difference in shares owned # - Add a `holdings` column containing the values of the positions bought times the adjusted closed price # # `cash`, `total` and `return` to portfolio # - Plot equity curve # - Plot the "buy" and "sell" trades versus the equity curve # In[25]: aux = pd.concat([signal_df[['signal']].iloc[smw+3:smw+10, :], apple[['adj_close']].iloc[smw+3:smw+10, :]], axis=1) aux['holdings'] = 100*aux['signal']*aux['adj_close'] aux # In[26]: initial_capital, N = 1000000.0, 100 pos = pd.DataFrame(index=signal_df.index).fillna(0.0) pos['AAPL'] = 100*signal_df['signal'] ptf = pos.multiply(apple['adj_close'], axis=0) ptf.iloc[smw+3:smw+10, :] # In[27]: pos_diff = pos.diff() ptf['holdings'] = (pos.multiply(apple['adj_close'], axis=0)).sum(axis=1) ptf['cash'] = initial_capital - (pos_diff.multiply(apple['adj_close'], axis=0)).sum(axis=1).cumsum() ptf['tot'] = (ptf['cash'] + ptf['holdings'])/1000000 ptf['return'] = (ptf['tot'].pct_change()) # In[28]: ptf # In[34]: df, ylabel, col = ptf, 'portfolio value (MM)', 'tot' afa.plot_function_3(df, signal_df, ylabel, col, pos, 0, -1) # In[37]: ptf[['return']].plot(); # ### Sharpe Ratio and Maximum Drawdown # [[go back to the top]](#Table-of-contents) # # The Sharpe ration reads: # # $${S_a} = \frac{{E[{R_a} - {R_b}]}}{{{\sigma _a}}} = \frac{{E[{R_a} - {R_b}]}}{{\sqrt {{\rm{var}}[{R_a} - {R_b}]} }},$$ # # where $R_{a}$ is the asset return and $R_b$ is the risk free rate. From Wikipedia: # # > The Sharpe ratio characterizes how well the return of an asset compensates the investor for the risk taken. When comparing two assets versus a common benchmark, the one with a higher Sharpe ratio provides better return for the same risk (or, equivalently, the same return for lower risk). # # Using $R_b=0$ for simplicity: # In[38]: returns = ptf['return'] sharpe_ratio = np.sqrt(252)*(returns.mean() / returns.std()) print('sharpe_ratio is:',round(sharpe_ratio, 3)) # The maximum drawdown measures the largest drop in portfolio value of a portfolio. # In[52]: import auxiliar as af window = 252 rolling_max = apple['adj_close'].rolling(window, min_periods=1).max() daily_drawdown = (apple['adj_close']/rolling_max - 1.0) max_daily_drawdown = daily_drawdown.rolling(window, min_periods=1).min() daily_drawdown.plot() # In[53]: max_daily_drawdown.plot() plt.show(); # In[54]: daily_drawdown.plot() max_daily_drawdown.plot() plt.show(); # In[51]: af.s_to_df(max_daily_drawdown, 'daily_drawdown').sort_values(by = 'daily_drawdown', ascending=True)