Monte Carlo Simulation with Python¶

Notebook to accompany article on Practical Business Python

Update to use numpy for faster loops based on comments here

In [1]:

import pandas as pd
import numpy as np
import seaborn as sns

In [2]:

sns.set_style('whitegrid')

In [3]:

# Define the variables for the Percent to target based on historical results
avg = 1
std_dev = .1
num_reps = 500
num_simulations = 100000

In [4]:

# Show an example of calculating the percent to target
pct_to_target = np.random.normal(
    avg,
    std_dev,
    size=(num_reps, num_simulations)
)

In [5]:

pct_to_target[0:10]

Out[5]:

array([[1.00168889, 0.84322867, 0.89764919, ..., 1.01640703, 1.20373747,
        0.97037204],
       [1.028913  , 0.90536114, 0.924947  , ..., 1.04301131, 1.00578572,
        0.82234308],
       [0.92013836, 0.89966565, 0.9061088 , ..., 0.95344094, 1.04113555,
        1.19014847],
       ...,
       [0.90638344, 0.92727774, 1.06040697, ..., 0.98925006, 0.7699789 ,
        1.13980837],
       [1.05623333, 0.90113527, 1.17673089, ..., 0.88190302, 0.75553421,
        1.01923352],
       [1.0517061 , 1.02548497, 1.06281586, ..., 1.01414038, 1.03138342,
        1.01631576]])

In [6]:

# Another example for the sales target distribution
sales_target_values = [75_000, 100_000, 200_000, 300_000, 400_000, 500_000]
sales_target_prob = [.3, .3, .2, .1, .05, .05]
sales_target = np.random.choice(sales_target_values, p=sales_target_prob, 
                                size=(num_reps, num_simulations))

In [7]:

sales_target[0:10]

Out[7]:

array([[ 75000, 400000,  75000, ..., 100000, 400000,  75000],
       [ 75000, 300000, 200000, ..., 200000, 200000, 500000],
       [ 75000,  75000,  75000, ..., 200000, 100000, 300000],
       ...,
       [ 75000, 100000, 200000, ...,  75000, 200000, 200000],
       [ 75000,  75000,  75000, ..., 500000,  75000,  75000],
       [ 75000,  75000,  75000, ...,  75000, 100000, 500000]])

In [8]:

commission_percentages = np.take(
    np.array([0.02, 0.03, 0.04]),
    np.digitize(pct_to_target, bins=[.9, .99, 10])
)

In [9]:

commission_percentages[0:10]

Out[9]:

array([[0.04, 0.02, 0.02, ..., 0.04, 0.04, 0.03],
       [0.04, 0.03, 0.03, ..., 0.04, 0.04, 0.02],
       [0.03, 0.02, 0.03, ..., 0.03, 0.04, 0.04],
       ...,
       [0.03, 0.03, 0.04, ..., 0.03, 0.02, 0.04],
       [0.04, 0.03, 0.04, ..., 0.02, 0.02, 0.04],
       [0.04, 0.04, 0.04, ..., 0.04, 0.04, 0.04]])

In [10]:

total_commissions = (commission_percentages * sales_target).sum(axis=0)

In [11]:

total_commissions.std()

Out[11]:

96204.21537404737

In [12]:

# Show how to create the dataframe
df = pd.DataFrame(data={'Total_Commissions': total_commissions})
df.head()

Out[12]:

	Total_Commissions
0	2982500.0
1	3003500.0
2	2950250.0
3	2902000.0
4	2762500.0

In [13]:

df.plot(kind='hist', title='Commissions Distribution')

Out[13]:

<matplotlib.axes._subplots.AxesSubplot at 0x7f6e5f13d2e8>

In [14]:

df.describe()

Out[14]:

	Total_Commissions
count	1.000000e+05
mean	2.832281e+06
std	9.620470e+04
min	2.447500e+06
25%	2.766750e+06
50%	2.831250e+06
75%	2.896500e+06
max	3.279750e+06

In [ ]: