In [2]:
import QuantLib as ql 
#QuantLib-Python           1.11
import csv
import datetime
from collections import OrderedDict
from typing import Dict, List
import numpy as np
#numpy                     1.11.3
import pandas as pd
#pandas                    0.18.1 
from timeit import default_timer as timer
#Python 3.5.2

Load the Data

Quandl or the Fed itself are the places to find US interest rate data. Relevant Quandl URL set out below. As for stock data you can still get it from yahoo if you use a Pandas data reader "fix" as per the pipi reference below. Failing which simply download manually from Yahoo. Alphavantage is an interesting new free datasource for stocks - unfortunately as far as the S&P 500 is concerned I was only able to get prices back to 2000 from that source.

In [2]:
#https://www.quandl.com/data/FRED/DTB3-3-Month-Treasury-Bill-Secondary-Market-Rate
tbill = pd.read_csv('..\\data\\Stocks\\FRED_DTB3.csv',parse_dates=["Date"])
tbill.Value = tbill.Value/100
tbill = tbill.rename(columns={'Value': 'TBill'})
tbill.tail(1)
Out[2]:
Date TBill
15939 2017-10-17 0.0107
In [3]:
#https://finance.yahoo.com/quote/%5EGSPC/history?p=%5EGSPC
#https://www.alphavantage.co/
#https://pypi.python.org/pypi/fix-yahoo-finance/0.0.4
SPX = pd.read_csv('..\\data\\Stocks\\SPX.csv',parse_dates=["Date"])
SPX.head(1)
Out[3]:
Date Open High Low Close Adj Close Volume
0 1985-10-30 189.229996 190.089996 189.139999 190.070007 190.070007 120400000

Drop obsolete columns and calculate the daily returns

I only use the open and closing prices from which I calculate the daily return necessary to calculate volatility

In [4]:
cols=[2,3,5,6]
SPX.drop(SPX.columns[cols], axis=1, inplace=True)
SPX['Return'] =SPX['Close'].pct_change()
SPX=SPX.fillna(method = 'backfill')
SPX.tail(1)
Out[4]:
Date Open Close Return
8066 2017-10-27 2570.26001 2581.070068 0.008073

Combine tBill and Stock data into one Dataframe

In [5]:
combineddata = SPX.merge(tbill, how='left', left_on='Date', right_on='Date').fillna( method='ffill')
combineddata.tail(1)
Out[5]:
Date Open Close Return TBill
8066 2017-10-27 2570.26001 2581.070068 0.008073 0.0107

Setting Strike Prices

My Data ends in 2017. I want to make sure Options can be calulated 24 months out (through to end 2019). For the process of calculated strike prices I need to ensure I include "forecast" index levels for 2018, 2019 and 2020

In [7]:
#forecast index levels for 2018,2019 ad 2020
future_calc = OrderedDict([ ('Date', [2018, 2019, 2020]), ('Close', [2581.0, 2581.0, 2581.0])])         
future_calc = pd.DataFrame.from_dict(future_calc)
future_calc.set_index(keys='Date', drop=True, append=False, inplace=True, verify_integrity=False)
future_calc
Out[7]:
Close
Date
2018 2581.0
2019 2581.0
2020 2581.0

Record each year's index high

I base strike prices for a given maturity on the index high for the relevant year they start trading. So I need to extract each year's high

In [8]:
#record historic index highs for each year on my data
strike_start = 1985
strike_end = 2018
strike_calc={}
a=0   
#take the historic high of the closing price for each year
for i in range(strike_start,strike_end): 
    strike_calc[a]=(i,combineddata['Close'][combineddata.Date.dt.year ==i].max()  )    
    a=a+1
strike_roll = pd.DataFrame.from_dict(strike_calc, orient='index', dtype=None)
strike_roll.columns = ['Date', 'Close']
strike_roll.set_index(keys='Date', drop=True, append=False, inplace=True, verify_integrity=False)

#add in my forecast index levels for 2018,2019 and 2020
strike_roll = pd.concat([strike_roll,future_calc])
strike_roll.tail()
Out[8]:
Close
Date
2016 2271.719971
2017 2581.070068
2018 2581.000000
2019 2581.000000
2020 2581.000000

Strike Prices

You may well wish to achieve finer granularity around the index level on a given day for ATM options, especially for those with short maturities. Adjust "strike_percentage" accordingly. Here is the code used later on to calculate the strike prices from the then current index level: "strike = round(strike_percentage*strike_roll.ix[x.year,'Close'])"

In [9]:
strike_percentage = [0.05,0.1,0.15,0.2,0.25,0.3,0.32,0.34,0.36,0.38,0.4,0.42,0.44,0.46,0.48,0.5,
                     0.52,0.54,0.56,0.58,0.6,0.62,0.64,0.66,0.68,0.7,0.72,0.74,
                     0.76,0.78,0.8,0.82,0.84,0.86,0.88,0.9,0.92,0.94,0.96,0.98,1,
                     1.02,1.04,1.06,1.08,1.1,1.12,1.14,1.16,1.18,1.2,1.22,1.24,
                     1.26,1.28,1.3,1.32,1.34,1.36,1.38,1.4,1.50,1.60,1.70,1.80,
                     1.90, 2.0]
strike_percentage = pd.DataFrame({'Strike':strike_percentage})
strike_percentage.shape
Out[9]:
(67, 1)

Dividend Data

Record historic SPX dividend data (percentage) and add forecasts for 2018, 2019 and 2020. These will be used in the Black Scoles model to calculate option prices

In [12]:
divs = [0.04,0.0333,0.0276,0.0345,0.0314,0.0325,0.03,0.0289,0.0268,0.027498879,0.023359532,
        0.020047344,0.01547191,0.012328052,0.010463432,0.010241579,0.011514514,0.015464024,
        0.016593709,0.019454529,0.017992297,0.01910639,0.017776754,0.021176745,0.023578469,
        0.022055675,0.019228185,0.022724277,0.020750464,0.019436528,0.020244296,0.021584811,
        0.0195,0.0195,0.0195,0.0195]
div_year = pd.date_range(start = '1985', periods=36, freq = 'A').year
div_year
Out[12]:
array([1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995,
       1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
       2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017,
       2018, 2019, 2020])

Combine Dividend data with relevant year

into a dataframe for later use

In [13]:
div =pd.DataFrame({'Dividends':divs},index=div_year)
div.tail()
Out[13]:
Dividends
2016 0.021585
2017 0.019500
2018 0.019500
2019 0.019500
2020 0.019500

Cut down the data to a specific year

You can of course calculate options for the entire price series in one go but it take a whie and produces a very large csv file!

In [14]:
start = 2017
end = 2018
test = combineddata[(combineddata.Date.dt.year >=start) & 
                    (combineddata.Date.dt.year <=end)].loc[:,['Date','Open','Close','Return','TBill']]
test.tail()
Out[14]:
Date Open Close Return TBill
8062 2017-10-23 2578.080078 2564.979980 -0.003972 0.0107
8063 2017-10-24 2568.659912 2569.129883 0.001618 0.0107
8064 2017-10-25 2566.520020 2557.149902 -0.004663 0.0107
8065 2017-10-26 2560.080078 2560.399902 0.001271 0.0107
8066 2017-10-27 2570.260010 2581.070068 0.008073 0.0107

Writing a dataframe for the trade dates and expiries

For each trading day, up to 24 expiries are listed.

Cut this right down if you want to look at shorter term options only: "(if x >=pd.Timestamp(row.Date) and x<= pd.Timestamp(row.Date) + pd.DateOffset(months=24))"

For each expiry on each day I calculate a differnt volatility for use in the Black Scholes formula later on. If the days to expiry is 5 I have used 5 days for the standard deviation calculation, if 252 days until expiry then historic volatility is calulated for 252 days.

In otherwords differing periods of "historic" volatility are used to calculate price. It does not make sense (to me at least) to use 20 day volatility to estimate future volatility over the next 700 days. This still takes no account of the fact that implied volatility is often higher than historic and therefore the option prices calculated here are likley to be too low. I may attempt to estimate the premium of implied volatility over historic in a later draft.

Note also that no account is taken of the usual "volatility smile" prevalent in the US equity markets post 1987. In other words I have asumed the same volatility for all strikes. In practice, OTM and ITM strikes tend to have a higher implied volatility hence higher prices than shown here. Possibly because of the realisation after Black Monday 1987 that fat tails need to be priced in.

In [23]:
starttimer = timer()
data_dict = {}
expiries_list_dict={}
calendar = ql.UnitedStates(ql.UnitedStates.NYSE)
a=0
b=0
c=0
d=0
x=0
us_busdays=0

for h,row in enumerate(test.itertuples(),0):   
    for i in range(start, end+2):  
        for j in range(1,13):           
            x=pd.Timestamp(str(calendar.adjust(ql.Date.nthWeekday(3,6,j,i),ql.Preceding )))
            
            if x >=pd.Timestamp(row.Date) and x<= pd.Timestamp(row.Date) + pd.DateOffset(months=24):                             
                strike = round(strike_percentage*strike_roll.ix[x.year,'Close'])
                us_busdays = calendar.businessDaysBetween(ql.Date(row.Date.day, row.Date.month, 
                            row.Date.year),ql.Date(x.day, x.month, x.year) )
                b = test.iloc[[h]].index.values
                c=b[0]-us_busdays
                vol=SPX.Return.loc[c:b[0]].std()*np.sqrt(252)
                expiries_list_dict[d]=(row.Date,x)
                d=d+1
                for k, line in enumerate(strike.itertuples(),0): 
                    data_dict[a]=(row.Date,x, row.Open, row.Close, row.TBill,div.ix[row.Date.year,'Dividends'],vol,line.Strike)
                    a=a+1
endtimer = timer()
print(endtimer - starttimer,a) 
48.64829184252221 335871

The expiries list

Simply so you can see what expiries have been listed for each trading day. A smaller file that data file which inclused strikes.

In [25]:
expiries_list =pd.DataFrame.from_dict(data = expiries_list_dict ,orient ='index') 
expiries_list.to_csv('expiries_list.csv')
expiries_list.tail()
Out[25]:
0 1
5008 2017-10-27 2019-06-21
5009 2017-10-27 2019-07-19
5010 2017-10-27 2019-08-16
5011 2017-10-27 2019-09-20
5012 2017-10-27 2019-10-18

Produce the complete dataframe for Black Scoles Calculations

In [26]:
tmp_data = pd.DataFrame.from_dict(data = data_dict,orient ='index') 
tmp_data.rename(columns={0: 'Date',1:'Expiration', 2:'Open',3:'Close',4:'TBill',5:'Dividends',6: 'Volatility',7:'Strike'},inplace=True)
tmp_data['Call']=np.zeros(len(tmp_data))
tmp_data['Put']=np.zeros(len(tmp_data))

tmp_data.head()
Out[26]:
Date Expiration Open Close TBill Dividends Volatility Strike Call Put
0 2017-01-03 2017-01-20 2251.570068 2257.830078 0.0053 0.0195 0.076404 129.0 0.0 0.0
1 2017-01-03 2017-01-20 2251.570068 2257.830078 0.0053 0.0195 0.076404 258.0 0.0 0.0
2 2017-01-03 2017-01-20 2251.570068 2257.830078 0.0053 0.0195 0.076404 387.0 0.0 0.0
3 2017-01-03 2017-01-20 2251.570068 2257.830078 0.0053 0.0195 0.076404 516.0 0.0 0.0
4 2017-01-03 2017-01-20 2251.570068 2257.830078 0.0053 0.0195 0.076404 645.0 0.0 0.0

Option Price Calculations

Probably at some stage I should think about vectorisation

Calculate Calls

In [27]:
start = timer()

option_type = ql.Option.Call
day_count = ql.Actual365Fixed()
calendar = ql.UnitedStates()
tmp_price={}
for h,row in enumerate(tmp_data.itertuples(),0):  
    maturity_date = ql.Date(row.Expiration.day, row.Expiration.month, row.Expiration.year)
    spot_price = row.Close
    strike_price = row.Strike
    volatility = row.Volatility
    dividend_rate =  row.Dividends
    risk_free_rate = row.TBill
    calculation_date = ql.Date(row.Date.day, row.Date.month, row.Date.year)
    ql.Settings.instance().evaluationDate = calculation_date
    payoff = ql.PlainVanillaPayoff(option_type, strike_price)
    exercise = ql.EuropeanExercise(maturity_date)
    european_option = ql.VanillaOption(payoff, exercise)
    spot_handle = ql.QuoteHandle(ql.SimpleQuote(spot_price))
    flat_ts = ql.YieldTermStructureHandle(ql.FlatForward(calculation_date, risk_free_rate, day_count))
    dividend_yield = ql.YieldTermStructureHandle(ql.FlatForward(calculation_date, dividend_rate, day_count))
    flat_vol_ts = ql.BlackVolTermStructureHandle(ql.BlackConstantVol(calculation_date, calendar, volatility, day_count))
    bsm_process = ql.BlackScholesMertonProcess(spot_handle,dividend_yield,flat_ts,flat_vol_ts)
    european_option.setPricingEngine(ql.AnalyticEuropeanEngine(bsm_process))
    bs_price = european_option.NPV()
    tmp_price[h]=ql.ClosestRounding(2)(bs_price+0.0001)
    #print(tmp_price[h])
#tmp_price
tmp_data['Call']=pd.Series(tmp_price)
#tmp_data.tail()
end = timer()
print(end - start) 
129.2285610853312

Calculate Puts

In [28]:
start = timer()

option_type = ql.Option.Put
day_count = ql.Actual365Fixed()
calendar = ql.UnitedStates()
tmp_price={}
for h,row in enumerate(tmp_data.itertuples(),0):  
    maturity_date = ql.Date(row.Expiration.day, row.Expiration.month, row.Expiration.year)
    spot_price = row.Close
    strike_price = row.Strike
    volatility = row.Volatility
    dividend_rate =  row.Dividends
    risk_free_rate = row.TBill
    calculation_date = ql.Date(row.Date.day, row.Date.month, row.Date.year)
    ql.Settings.instance().evaluationDate = calculation_date
    payoff = ql.PlainVanillaPayoff(option_type, strike_price)
    exercise = ql.EuropeanExercise(maturity_date)
    european_option = ql.VanillaOption(payoff, exercise)
    spot_handle = ql.QuoteHandle(ql.SimpleQuote(spot_price))
    flat_ts = ql.YieldTermStructureHandle(ql.FlatForward(calculation_date, risk_free_rate, day_count))
    dividend_yield = ql.YieldTermStructureHandle(ql.FlatForward(calculation_date, dividend_rate, day_count))
    flat_vol_ts = ql.BlackVolTermStructureHandle(ql.BlackConstantVol(calculation_date, calendar, volatility, day_count))
    bsm_process = ql.BlackScholesMertonProcess(spot_handle,dividend_yield,flat_ts,flat_vol_ts)
    european_option.setPricingEngine(ql.AnalyticEuropeanEngine(bsm_process))
    bs_price = european_option.NPV()
    tmp_price[h]=ql.ClosestRounding(2)(bs_price+0.0001)
    #tmp_price[h]=bs_price+0.001
#tmp_price
tmp_data['Put']=pd.Series(tmp_price)
tmp_data.tail()
end = timer()
print(end - start) 
117.60858093075694
In [29]:
tmp_data.shape
Out[29]:
(335871, 10)

Set option prices for the expiration day

This can not done by Black Scholes

In [30]:
#Set option prices for the expiration day - this is not done by Quanlib / Black Sscholes
mask = (tmp_data['Date']==tmp_data['Expiration']) & (tmp_data['Strike']>=tmp_data['Open'])
tmp_data.loc[mask,'Put']=(tmp_data.loc[mask,'Strike']-tmp_data.loc[mask,'Open']).round(2)
mask1 = (tmp_data['Date']==tmp_data['Expiration']) & (tmp_data['Strike']<=tmp_data['Open'])
tmp_data.loc[mask1,'Call']=(tmp_data.loc[mask1,'Open']-tmp_data.loc[mask1,'Strike']).round(2)

Final Product

And here are the results

In [31]:
tmp_data.to_csv('tmp.csv')
In [32]:
tmp_data.tail()
Out[32]:
Date Expiration Open Close TBill Dividends Volatility Strike Call Put
335866 2017-10-27 2019-10-18 2570.26001 2581.070068 0.0107 0.0195 0.11207 4130.0 0.14 1560.22
335867 2017-10-27 2019-10-18 2570.26001 2581.070068 0.0107 0.0195 0.11207 4388.0 0.03 1812.72
335868 2017-10-27 2019-10-18 2570.26001 2581.070068 0.0107 0.0195 0.11207 4646.0 0.01 2065.30
335869 2017-10-27 2019-10-18 2570.26001 2581.070068 0.0107 0.0195 0.11207 4904.0 0.00 2317.90
335870 2017-10-27 2019-10-18 2570.26001 2581.070068 0.0107 0.0195 0.11207 5162.0 0.00 2570.50