#!/usr/bin/env python # coding: utf-8 # In[2]: import QuantLib as ql #QuantLib-Python 1.11 import csv import datetime from collections import OrderedDict from typing import Dict, List import numpy as np #numpy 1.11.3 import pandas as pd #pandas 0.18.1 from timeit import default_timer as timer #Python 3.5.2 # ## Load the Data # Quandl or the Fed itself are the places to find US interest rate data. Relevant Quandl URL set out below. As for stock data you can still get it from yahoo if you use a Pandas data reader "fix" as per the pipi reference below. Failing which simply download manually from Yahoo. Alphavantage is an interesting new free datasource for stocks - unfortunately as far as the S&P 500 is concerned I was only able to get prices back to 2000 from that source. # In[2]: #https://www.quandl.com/data/FRED/DTB3-3-Month-Treasury-Bill-Secondary-Market-Rate tbill = pd.read_csv('..\\data\\Stocks\\FRED_DTB3.csv',parse_dates=["Date"]) tbill.Value = tbill.Value/100 tbill = tbill.rename(columns={'Value': 'TBill'}) tbill.tail(1) # In[3]: #https://finance.yahoo.com/quote/%5EGSPC/history?p=%5EGSPC #https://www.alphavantage.co/ #https://pypi.python.org/pypi/fix-yahoo-finance/0.0.4 SPX = pd.read_csv('..\\data\\Stocks\\SPX.csv',parse_dates=["Date"]) SPX.head(1) # # Drop obsolete columns and calculate the daily returns # I only use the open and closing prices from which I calculate the daily return necessary to calculate volatility # In[4]: cols=[2,3,5,6] SPX.drop(SPX.columns[cols], axis=1, inplace=True) SPX['Return'] =SPX['Close'].pct_change() SPX=SPX.fillna(method = 'backfill') SPX.tail(1) # ## Combine tBill and Stock data into one Dataframe # In[5]: combineddata = SPX.merge(tbill, how='left', left_on='Date', right_on='Date').fillna( method='ffill') combineddata.tail(1) # ## Setting Strike Prices # My Data ends in 2017. I want to make sure Options can be calulated 24 months out (through to end 2019). For the process of calculated strike prices I need to ensure I include "forecast" index levels for 2018, 2019 and 2020 # In[7]: #forecast index levels for 2018,2019 ad 2020 future_calc = OrderedDict([ ('Date', [2018, 2019, 2020]), ('Close', [2581.0, 2581.0, 2581.0])]) future_calc = pd.DataFrame.from_dict(future_calc) future_calc.set_index(keys='Date', drop=True, append=False, inplace=True, verify_integrity=False) future_calc # ## Record each year's index high # I base strike prices for a given maturity on the index high for the relevant year they start trading. # So I need to extract each year's high # In[8]: #record historic index highs for each year on my data strike_start = 1985 strike_end = 2018 strike_calc={} a=0 #take the historic high of the closing price for each year for i in range(strike_start,strike_end): strike_calc[a]=(i,combineddata['Close'][combineddata.Date.dt.year ==i].max() ) a=a+1 strike_roll = pd.DataFrame.from_dict(strike_calc, orient='index', dtype=None) strike_roll.columns = ['Date', 'Close'] strike_roll.set_index(keys='Date', drop=True, append=False, inplace=True, verify_integrity=False) #add in my forecast index levels for 2018,2019 and 2020 strike_roll = pd.concat([strike_roll,future_calc]) strike_roll.tail() # ## Strike Prices # You may well wish to achieve finer granularity around the index level on a given day for ATM options, especially for those with short maturities. Adjust "strike_percentage" accordingly. Here is the code used later on to calculate the strike prices from the then current index level: "strike = round(strike_percentage*strike_roll.ix[x.year,'Close'])" # In[9]: strike_percentage = [0.05,0.1,0.15,0.2,0.25,0.3,0.32,0.34,0.36,0.38,0.4,0.42,0.44,0.46,0.48,0.5, 0.52,0.54,0.56,0.58,0.6,0.62,0.64,0.66,0.68,0.7,0.72,0.74, 0.76,0.78,0.8,0.82,0.84,0.86,0.88,0.9,0.92,0.94,0.96,0.98,1, 1.02,1.04,1.06,1.08,1.1,1.12,1.14,1.16,1.18,1.2,1.22,1.24, 1.26,1.28,1.3,1.32,1.34,1.36,1.38,1.4,1.50,1.60,1.70,1.80, 1.90, 2.0] strike_percentage = pd.DataFrame({'Strike':strike_percentage}) strike_percentage.shape # ## Dividend Data # Record historic SPX dividend data (percentage) and add forecasts for 2018, 2019 and 2020. These will be used in the Black Scoles model to calculate option prices # In[12]: divs = [0.04,0.0333,0.0276,0.0345,0.0314,0.0325,0.03,0.0289,0.0268,0.027498879,0.023359532, 0.020047344,0.01547191,0.012328052,0.010463432,0.010241579,0.011514514,0.015464024, 0.016593709,0.019454529,0.017992297,0.01910639,0.017776754,0.021176745,0.023578469, 0.022055675,0.019228185,0.022724277,0.020750464,0.019436528,0.020244296,0.021584811, 0.0195,0.0195,0.0195,0.0195] div_year = pd.date_range(start = '1985', periods=36, freq = 'A').year div_year # ## Combine Dividend data with relevant year # into a dataframe for later use # In[13]: div =pd.DataFrame({'Dividends':divs},index=div_year) div.tail() # ## Cut down the data to a specific year # You can of course calculate options for the entire price series in one go but it take a whie and produces a very large csv file! # In[14]: start = 2017 end = 2018 test = combineddata[(combineddata.Date.dt.year >=start) & (combineddata.Date.dt.year <=end)].loc[:,['Date','Open','Close','Return','TBill']] test.tail() # ## Writing a dataframe for the trade dates and expiries # For each trading day, up to 24 expiries are listed. # # Cut this right down if you want to look at shorter term options only: # "(if x >=pd.Timestamp(row.Date) and x<= pd.Timestamp(row.Date) + pd.DateOffset(months=24))" # # For each expiry on each day I calculate a differnt volatility for use in the Black Scholes formula later on. If the days to expiry is 5 I have used 5 days for the standard deviation calculation, if 252 days until expiry then historic volatility is calulated for 252 days. # # In otherwords differing periods of "historic" volatility are used to calculate price. It does not make sense (to me at least) to use 20 day volatility to estimate future volatility over the next 700 days. This still takes no account of the fact that implied volatility is often higher than historic and therefore the option prices calculated here are likley to be too low. I may attempt to estimate the premium of implied volatility over historic in a later draft. # # Note also that no account is taken of the usual "volatility smile" prevalent in the US equity markets post 1987. In other words I have asumed the same volatility for all strikes. In practice, OTM and ITM strikes tend to have a higher implied volatility hence higher prices than shown here. Possibly because of the realisation after Black Monday 1987 that fat tails need to be priced in. # In[23]: starttimer = timer() data_dict = {} expiries_list_dict={} calendar = ql.UnitedStates(ql.UnitedStates.NYSE) a=0 b=0 c=0 d=0 x=0 us_busdays=0 for h,row in enumerate(test.itertuples(),0): for i in range(start, end+2): for j in range(1,13): x=pd.Timestamp(str(calendar.adjust(ql.Date.nthWeekday(3,6,j,i),ql.Preceding ))) if x >=pd.Timestamp(row.Date) and x<= pd.Timestamp(row.Date) + pd.DateOffset(months=24): strike = round(strike_percentage*strike_roll.ix[x.year,'Close']) us_busdays = calendar.businessDaysBetween(ql.Date(row.Date.day, row.Date.month, row.Date.year),ql.Date(x.day, x.month, x.year) ) b = test.iloc[[h]].index.values c=b[0]-us_busdays vol=SPX.Return.loc[c:b[0]].std()*np.sqrt(252) expiries_list_dict[d]=(row.Date,x) d=d+1 for k, line in enumerate(strike.itertuples(),0): data_dict[a]=(row.Date,x, row.Open, row.Close, row.TBill,div.ix[row.Date.year,'Dividends'],vol,line.Strike) a=a+1 endtimer = timer() print(endtimer - starttimer,a) # ## The expiries list # Simply so you can see what expiries have been listed for each trading day. A smaller file that data file which inclused strikes. # In[25]: expiries_list =pd.DataFrame.from_dict(data = expiries_list_dict ,orient ='index') expiries_list.to_csv('expiries_list.csv') expiries_list.tail() # ## Produce the complete dataframe for Black Scoles Calculations # In[26]: tmp_data = pd.DataFrame.from_dict(data = data_dict,orient ='index') tmp_data.rename(columns={0: 'Date',1:'Expiration', 2:'Open',3:'Close',4:'TBill',5:'Dividends',6: 'Volatility',7:'Strike'},inplace=True) tmp_data['Call']=np.zeros(len(tmp_data)) tmp_data['Put']=np.zeros(len(tmp_data)) tmp_data.head() # ## Option Price Calculations # Probably at some stage I should think about vectorisation # ## Calculate Calls # In[27]: start = timer() option_type = ql.Option.Call day_count = ql.Actual365Fixed() calendar = ql.UnitedStates() tmp_price={} for h,row in enumerate(tmp_data.itertuples(),0): maturity_date = ql.Date(row.Expiration.day, row.Expiration.month, row.Expiration.year) spot_price = row.Close strike_price = row.Strike volatility = row.Volatility dividend_rate = row.Dividends risk_free_rate = row.TBill calculation_date = ql.Date(row.Date.day, row.Date.month, row.Date.year) ql.Settings.instance().evaluationDate = calculation_date payoff = ql.PlainVanillaPayoff(option_type, strike_price) exercise = ql.EuropeanExercise(maturity_date) european_option = ql.VanillaOption(payoff, exercise) spot_handle = ql.QuoteHandle(ql.SimpleQuote(spot_price)) flat_ts = ql.YieldTermStructureHandle(ql.FlatForward(calculation_date, risk_free_rate, day_count)) dividend_yield = ql.YieldTermStructureHandle(ql.FlatForward(calculation_date, dividend_rate, day_count)) flat_vol_ts = ql.BlackVolTermStructureHandle(ql.BlackConstantVol(calculation_date, calendar, volatility, day_count)) bsm_process = ql.BlackScholesMertonProcess(spot_handle,dividend_yield,flat_ts,flat_vol_ts) european_option.setPricingEngine(ql.AnalyticEuropeanEngine(bsm_process)) bs_price = european_option.NPV() tmp_price[h]=ql.ClosestRounding(2)(bs_price+0.0001) #print(tmp_price[h]) #tmp_price tmp_data['Call']=pd.Series(tmp_price) #tmp_data.tail() end = timer() print(end - start) # ## Calculate Puts # In[28]: start = timer() option_type = ql.Option.Put day_count = ql.Actual365Fixed() calendar = ql.UnitedStates() tmp_price={} for h,row in enumerate(tmp_data.itertuples(),0): maturity_date = ql.Date(row.Expiration.day, row.Expiration.month, row.Expiration.year) spot_price = row.Close strike_price = row.Strike volatility = row.Volatility dividend_rate = row.Dividends risk_free_rate = row.TBill calculation_date = ql.Date(row.Date.day, row.Date.month, row.Date.year) ql.Settings.instance().evaluationDate = calculation_date payoff = ql.PlainVanillaPayoff(option_type, strike_price) exercise = ql.EuropeanExercise(maturity_date) european_option = ql.VanillaOption(payoff, exercise) spot_handle = ql.QuoteHandle(ql.SimpleQuote(spot_price)) flat_ts = ql.YieldTermStructureHandle(ql.FlatForward(calculation_date, risk_free_rate, day_count)) dividend_yield = ql.YieldTermStructureHandle(ql.FlatForward(calculation_date, dividend_rate, day_count)) flat_vol_ts = ql.BlackVolTermStructureHandle(ql.BlackConstantVol(calculation_date, calendar, volatility, day_count)) bsm_process = ql.BlackScholesMertonProcess(spot_handle,dividend_yield,flat_ts,flat_vol_ts) european_option.setPricingEngine(ql.AnalyticEuropeanEngine(bsm_process)) bs_price = european_option.NPV() tmp_price[h]=ql.ClosestRounding(2)(bs_price+0.0001) #tmp_price[h]=bs_price+0.001 #tmp_price tmp_data['Put']=pd.Series(tmp_price) tmp_data.tail() end = timer() print(end - start) # In[29]: tmp_data.shape # ## Set option prices for the expiration day # This can not done by Black Scholes # In[30]: #Set option prices for the expiration day - this is not done by Quanlib / Black Sscholes mask = (tmp_data['Date']==tmp_data['Expiration']) & (tmp_data['Strike']>=tmp_data['Open']) tmp_data.loc[mask,'Put']=(tmp_data.loc[mask,'Strike']-tmp_data.loc[mask,'Open']).round(2) mask1 = (tmp_data['Date']==tmp_data['Expiration']) & (tmp_data['Strike']<=tmp_data['Open']) tmp_data.loc[mask1,'Call']=(tmp_data.loc[mask1,'Open']-tmp_data.loc[mask1,'Strike']).round(2) # ## Final Product # And here are the results # In[31]: tmp_data.to_csv('tmp.csv') # In[32]: tmp_data.tail()