Introduction to Monto Carlo Simulation of a S&P 500-like investment¶

Starting with 10,000 and investing an additional 10,000 annually, what is the probability that you will have at least 1,000,000 after 30 years of investing in the S&P 500 etf?¶

You can download the Anaconda platform from http://continuum.io ¶

In [1]:

# 1. import needed libraries, set plots to display in notebook

import numpy as np
from pandas import Series, DataFrame
%matplotlib inline
import matplotlib.pyplot as plots
# allows currency formatting
import locale
locale.setlocale(locale.LC_ALL, 'en_CA.UTF-8')

Out[1]:

'en_CA.UTF-8'

In [2]:

# 2. A traditional savings calculator approach

pv = 10000
time_horizon = 30
i =.07
additions = 10000

for year in range(time_horizon):
    ending = pv * (1+i) + additions
    print(locale.currency(ending, grouping=True))
    pv = ending

$20,700.00
$32,149.00
$44,399.43
$57,507.39
$71,532.91
$86,540.21
$102,598.03
$119,779.89
$138,164.48
$157,835.99
$178,884.51
$201,406.43
$225,504.88
$251,290.22
$278,880.54
$308,402.17
$339,990.33
$373,789.65
$409,954.92
$448,651.77
$490,057.39
$534,361.41
$581,766.71
$632,490.38
$686,764.70
$744,838.23
$806,976.91
$873,465.29
$944,607.86
$1,020,730.41

In [ ]:

In [7]:

# 3. Generating one possible future value based on market history; I will use 9% expected return with 18% volatility

pv = 10000
expected_return = .09
volatility = .18
time_horizon = 30
annual_addition = 10000

print("\tReturn", "\t\tEnding Value".rjust(18))
for year in range(time_horizon):
    market_return = np.random.normal(expected_return, volatility)
    fv = pv * (1 + market_return) + annual_addition
    print("\t{}".ljust(10).format(round(market_return,4)), "\t{}".rjust(10).format(locale.currency(fv, grouping=True)))
    pv = fv

	Return     		Ending Value
	-0.2048               	$17,952.35
	0.1713               	$31,027.71
	0.025               	$41,803.57
	0.3934               	$68,247.53
	0.1311               	$87,193.78
	0.059               	$102,338.96
	0.0492               	$117,370.59
	0.1776               	$148,211.52
	0.1447               	$179,660.96
	0.3233               	$247,744.05
	0.0131               	$260,984.80
	0.0031               	$271,804.67
	0.101               	$309,265.20
	0.059               	$337,521.01
	0.13               	$391,406.15
	-0.1634               	$337,440.21
	0.1988               	$414,529.00
	0.1478               	$485,804.51
	-0.0644               	$464,501.76
	0.3089               	$617,969.40
	0.1163               	$699,851.96
	0.0967               	$777,561.94
	0.1271               	$886,361.09
	0.2286               	$1,099,006.92
	-0.1747               	$917,010.86
	0.1719               	$1,084,648.77
	0.1578               	$1,265,833.32
	0.1391               	$1,451,968.19
	-0.0102               	$1,447,163.05
	-0.0651               	$1,362,992.89

In [8]:

# 4. Simulate portfolio ending market values 

sim = DataFrame()
iterations = 5000


for x in range(iterations):
    expected_return = .09
    volatility = .18
    time_horizon = 30
    pv = 10000
    annual_investment = 10000
    stream = []
    for i in range(time_horizon):
        end = round(pv * (1 + np.random.normal(expected_return,volatility)) + annual_investment,2)
        
        stream.append(end)

        pv = end

    
    sim[x] = stream

In [ ]:

In [11]:

# 5. Sample first five streams of annual ending values
first_five = list(range(5))
sim[first_five]

Out[11]:

	0	1	2	3	4
0	20608.84	25270.36	22524.66	19888.64	24937.49
1	33740.72	35372.69	32873.72	36562.31	41552.59
2	45225.73	44210.06	45421.19	46184.55	51021.79
3	58379.60	61796.20	80086.09	55155.90	86519.77
4	85781.28	70098.26	68538.91	63000.86	108460.91
5	87940.43	82285.84	60522.85	71575.43	110073.63
6	82747.60	102489.51	96294.97	89848.70	133817.78
7	82756.72	123217.12	109410.25	135515.92	182665.42
8	144639.26	136355.22	142602.06	127311.36	214785.48
9	184432.55	169326.84	148948.71	136936.95	286522.75
10	209482.38	174629.00	183539.10	146726.65	354484.07
11	249300.45	176012.46	230442.52	139957.36	370285.78
12	312886.35	173168.94	284435.34	162090.12	530016.35
13	268715.01	167641.51	327392.49	180188.15	684339.57
14	226320.18	158084.61	338060.78	171399.13	719974.57
15	269760.74	165789.84	435316.26	196710.71	791131.04
16	378558.20	187355.21	609670.59	259472.33	670071.12
17	513870.43	193416.70	631204.26	265128.09	301802.36
18	415726.47	243682.94	640962.19	324803.89	246695.60
19	456953.67	326003.49	848527.30	461958.23	280869.29
20	447794.01	361800.47	810154.49	419162.23	285238.23
21	483183.11	438958.81	905952.35	527859.73	313787.79
22	502885.58	401016.84	784807.42	665936.48	300198.54
23	457012.27	441272.03	1015493.55	759131.46	319280.84
24	518696.43	542515.32	778863.92	821615.70	381654.14
25	525934.55	662417.47	795153.07	797219.66	391283.95
26	648207.61	694768.54	995491.81	632315.42	326099.13
27	592420.63	717087.52	897694.34	721284.01	367282.34
28	699679.32	699155.21	839630.49	626382.54	538488.72
29	840002.22	971588.23	1032396.34	890854.62	564475.89

In [12]:

# 6. Plot first five simulated portfolios
plots.plot(sim[first_five])

Out[12]:

[<matplotlib.lines.Line2D at 0x112161048>,
 <matplotlib.lines.Line2D at 0x112161208>,
 <matplotlib.lines.Line2D at 0x1121613c8>,
 <matplotlib.lines.Line2D at 0x1121615c0>,
 <matplotlib.lines.Line2D at 0x1121617b8>]

In [ ]:

In [13]:

# 7. Generate summary statistics with numpy functions

print("Count:", len(sim.loc[29]))
print("Mean: ", locale.currency(np.mean(sim.loc[29]),grouping=True))
print("SD: ",locale.currency(np.std(sim.loc[29]),grouping=True))
print("Max: ",locale.currency(np.max(sim.loc[29]), grouping=True))
print("Min: ", locale.currency(np.min(sim.loc[29]), grouping=True))

Count: 5000
Mean:  $1,511,783.96
SD:  $1,207,664.05
Max:  $15,360,616.84
Min:  $113,779.44

In [14]:

# 8. Generating more comprehensive summary statistics with pandas describe function
ending_values = sim.loc[29]
ending_values.describe()

Out[14]:

count    5.000000e+03
mean     1.511784e+06
std      1.207785e+06
min      1.137794e+05
25%      7.462566e+05
50%      1.184005e+06
75%      1.867706e+06
max      1.536062e+07
Name: 29, dtype: float64

In [17]:

# 9. Get a visualization of the distribution of ending values

plots.hist(ending_values, bins=100)

Out[17]:

(array([  63.,  229.,  342.,  526.,  495.,  442.,  392.,  403.,  299.,
         261.,  203.,  182.,  150.,  134.,  107.,   97.,   90.,   73.,
          64.,   44.,   65.,   53.,   31.,   25.,   24.,   22.,   20.,
          21.,   15.,   16.,    4.,    7.,   14.,    7.,    7.,    1.,
           8.,    3.,    5.,    3.,    3.,    4.,    4.,    3.,    2.,
           4.,    2.,    1.,    2.,    2.,    2.,    4.,    1.,    1.,
           2.,    1.,    1.,    1.,    2.,    1.,    0.,    0.,    1.,
           1.,    2.,    0.,    0.,    0.,    1.,    1.,    0.,    0.,
           1.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
           0.,    1.,    0.,    0.,    0.,    0.,    1.,    0.,    0.,
           0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    1.]),
 array([   113779.44 ,    266247.814,    418716.188,    571184.562,
           723652.936,    876121.31 ,   1028589.684,   1181058.058,
          1333526.432,   1485994.806,   1638463.18 ,   1790931.554,
          1943399.928,   2095868.302,   2248336.676,   2400805.05 ,
          2553273.424,   2705741.798,   2858210.172,   3010678.546,
          3163146.92 ,   3315615.294,   3468083.668,   3620552.042,
          3773020.416,   3925488.79 ,   4077957.164,   4230425.538,
          4382893.912,   4535362.286,   4687830.66 ,   4840299.034,
          4992767.408,   5145235.782,   5297704.156,   5450172.53 ,
          5602640.904,   5755109.278,   5907577.652,   6060046.026,
          6212514.4  ,   6364982.774,   6517451.148,   6669919.522,
          6822387.896,   6974856.27 ,   7127324.644,   7279793.018,
          7432261.392,   7584729.766,   7737198.14 ,   7889666.514,
          8042134.888,   8194603.262,   8347071.636,   8499540.01 ,
          8652008.384,   8804476.758,   8956945.132,   9109413.506,
          9261881.88 ,   9414350.254,   9566818.628,   9719287.002,
          9871755.376,  10024223.75 ,  10176692.124,  10329160.498,
         10481628.872,  10634097.246,  10786565.62 ,  10939033.994,
         11091502.368,  11243970.742,  11396439.116,  11548907.49 ,
         11701375.864,  11853844.238,  12006312.612,  12158780.986,
         12311249.36 ,  12463717.734,  12616186.108,  12768654.482,
         12921122.856,  13073591.23 ,  13226059.604,  13378527.978,
         13530996.352,  13683464.726,  13835933.1  ,  13988401.474,
         14140869.848,  14293338.222,  14445806.596,  14598274.97 ,
         14750743.344,  14903211.718,  15055680.092,  15208148.466,
         15360616.84 ]),
 <a list of 100 Patch objects>)

In [26]:

# 10. Calculate probability of seeing a specific ending_value or less, 
# for example get close to the 75%ile, or $1,000,000

len(ending_values[ending_values<1000000]) / len(ending_values)

Out[26]:

0.405

In [22]:

# 11. You can't really get a point estimate, but you can get a range ending values

len(ending_values[(ending_values> 800000) & (ending_values< 1100000)]) /len(ending_values)

Out[22]:

0.1776

In [23]:

# 12. You can get a more comprehensive table of percentiles easily using numpy's percentile function

p_tiles = np.percentile(ending_values,[5,10,15,25,75,85,90, 95])
for p in range(len(p_tiles)):
    l = [5,10,15,25,75,85,90,95]
    print( "{}%-ile: ".format(l[p]).rjust(15),"{}".format(locale.currency(p_tiles[p], grouping=True)))

       5%-ile:  $400,786.79
      10%-ile:  $517,436.68
      15%-ile:  $606,444.07
      25%-ile:  $746,256.62
      75%-ile:  $1,867,705.61
      85%-ile:  $2,443,746.84
      90%-ile:  $2,879,132.81
      95%-ile:  $3,657,034.78

In [ ]:

Introduction to Monto Carlo Simulation of a S&P 500-like investment¶

Starting with 10,000 and investing an additional 10,000 annually, what is the probability that you will have at least 1,000,000 after 30 years of investing in the S&P 500 etf?¶

You can download the Anaconda platform from http://continuum.io¶

You can download the Anaconda platform from http://continuum.io ¶