# Generate a list of values sampled from a Poisson distribution
from scipy.stats import poisson
import numpy as np
def gen_invoices(poisson_mu, num_vendors):
# poisson_mu is the lambda parameter of the Poisson distribution used to generate
# the number of invoices for the period
# Set poisson_mu appropriately to result in approximately the right number of invoices
# for a given time period
# num_vendors is the number of vendors Accounts Payable deals with
invoices_per_vendor = poisson.rvs(poisson_mu, size=num_vendors)
total_invoices_per_period = np.sum(invoices_per_vendor)
return total_invoices_per_period
# Generate the workload over a period of time
# It could be for any period of time -- substitute the appropriate number in period_duration
# This produces the number of invoices that were routed (both correctly or incorrectly)
# and the number of invoices that couldn't be tackled that day
from scipy.stats import norm # the Gaussian distribution
import math # for floor and ceil functions
def get_period_workload(num_invoices_input,
mean_std_dev,
period_duration,
routing_success_rate):
# num_invoices_input is the arriving number of invoices for the period
# It can be a straight number or can be a number generated by gen_invoices function above
# mean_std_dev is an array [mean, std_dev] -- the Gaussian parameters
# for how long it takes to route an invoice
mean = mean_std_dev[0]
std_dev = mean_std_dev[1]
# num_vendors is the number of vendors Accounts Payable deals with
# num_seconds_in_period is the amount of time worked in total by all of the
# Accounts Payable people who are routing invoices.
# routing_success_rate is the rate at which invoices are routed correctly
# Array of time it takes to route each invoice
time_array = norm.rvs(loc=mean, scale=std_dev, size=num_invoices_input)
print("Time to route the first few invoices...{} ... and the last few {}".\
format(time_array[0:5], time_array[-5:-1]))
# Cumulative sums of the time_array (this is not strictly the right model, but a good approx)
time_array_cu = np.cumsum(time_array)
print("Cumulative time to route the first few invoices...{} ... and the last few {}".\
format(time_array_cu[0:5], time_array_cu[-5:-1]))
# Split the time_array_cu into two parts:
# 1) The list of items where the times are less than or equal to NUM_SECS_WORKED_DAILY
# 2) The list of items where the times are greater than NUM_SECS_WORKED_DAILY
invoices_routed = [x for x in time_array_cu if x <= period_duration]
num_invoices_routed = len(invoices_routed)
print("Number of invoices routed = {}".format(num_invoices_routed))
# Of the number of the invoices_routed, the ones successfully/correctly routed
num_correctly_routed = int(math.floor(routing_success_rate * num_invoices_routed))
print("Number of invoices correctly routed = {}".format(num_correctly_routed))
# The number of invoices not correctly routed
num_not_correctly_routed = num_invoices_routed - num_correctly_routed
print("Number of invoices NOT correctly routed = {}".format(num_not_correctly_routed))
# Number of invoices not gotten to because time ran out in the period
num_invoices_not_routed = len([x for x in time_array_cu if x > period_duration])
print("Number of invoices not handled due to lack of time = {}".format(num_invoices_not_routed))
# How much extra time was left over in the period (if any)?
# If this value is negative it means it will take this much more time to
# route the invoices that were not handled due to lack of time in the period
time_left_in_period = period_duration - time_array_cu[-1]
print("Extra time left over in the period = {}".format(time_left_in_period))
return [num_invoices_input,
num_invoices_routed,
num_correctly_routed,
num_not_correctly_routed,
num_invoices_not_routed,
time_left_in_period
]
# The number of vendors who are sending invoices to Accounts Payable
NUM_VENDORS = 2500
# Poisson Rate - Rate at which a vendor generates invoices
# Choose it so invoices_per_day for the number of vendoors
# matches the real numbers seen by the organization
MU = 0.35
# Number of AP Specialists routing invoices
NUM_AP_SPECIALISTS = 4
# Number of working seconds in a day
# Number of hours worked by a single AP Specialist in a day
NUM_HOURS_WORKED_DAILY_PER_PERSON = 6.5
NUM_HOURS_WORKED_DAILY = NUM_HOURS_WORKED_DAILY_PER_PERSON * NUM_AP_SPECIALISTS
NUM_SECS_WORKED_DAILY = NUM_HOURS_WORKED_DAILY * 60 * 60
print("Number of seconds available for routing invoices per day = {}".format(NUM_SECS_WORKED_DAILY))
# Number of business days in a month
MONTH_DAYS = 20
NUM_SECS_WORKED_MONTHLY = NUM_SECS_WORKED_DAILY * MONTH_DAYS
print("Number of seconds available for routing invoices per month = {}".format(NUM_SECS_WORKED_MONTHLY))
# Invoices for a given month
invoices = [gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS for i in range(10)]
invoices
# The time it takes to route an invoice is distributed as a Gaussian
# Mean of the Gaussian distribution for the time it takes to route an invoice
MEAN = 80
# Standard Deviation for the time it takes to route an invoice
STD_DEV = 20
# The time it takes a machine learning system route an invoice is distributed as a Gaussian
# Mean of the Gaussian distribution for the time it takes to route an invoice
MEAN_ML = 0.5
# Standard Deviation for the time it takes to route an invoice
STD_DEV_ML = .002
ROUTING_SUCCESS_RATE = 0.95
# For the incorrectly routed invoices, it take a much longer time for them to get sorted out.
# Hence, the values of the mean and standard deviation for the normal distribution
# of the times it takes to resolve this are much larger than MEAN and STD_DEV
# 3 hours = 10,800 seconds
LARGE_MEAN = 10800
# 30 minutes = 1,800 seconds
LARGE_STD_DEV = 1800
# Assume that the success rate for re-routing is 1
LARGE_ROUTING_SUCCESS_RATE = 1.0
# 1 hour = 3600 seconds
# 2 hours = 7200 seconds
# 3 hours = 10,800 seconds
# 4 hours = 14,400 seconds
TIMES_TO_FIX = [[3600, 600], [7200, 1200], [10800, 1800], [14400, 2400]]
This is the baseline against which to compare the benefits of the machine learning solution.
# First round of processing by AP Specialists
res1 = get_period_workload(gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS, \
[MEAN, STD_DEV], NUM_SECS_WORKED_MONTHLY, ROUTING_SUCCESS_RATE)
num_high_touch = res1[3]
time_remaining = res1[5]
# Handling the high touch in the second round in the time remaining
res2 = [get_period_workload(num_high_touch, time_to_fix, time_remaining, \
LARGE_ROUTING_SUCCESS_RATE) for time_to_fix in TIMES_TO_FIX]
res2
ML_SYS_ACC_50 = 0.5
# First round of processing by the ML system
res1 = get_period_workload(gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS, \
[MEAN_ML, STD_DEV_ML], NUM_SECS_WORKED_MONTHLY, ML_SYS_ACC_50)
num_high_touch1 = res1[3]
time_remaining1 = res1[5] # not used -- AP Specialists have the same time as before
# Second round - AP specialists deal with the num_high_touch that ML spat out
res2 = get_period_workload(num_high_touch1, [MEAN, STD_DEV], NUM_SECS_WORKED_MONTHLY, \
ROUTING_SUCCESS_RATE)
num_high_touch2 = res2[3]
time_remaining2 = res2[5]
# Handling the high touch in the next round in the time remaining
res3 = [get_period_workload(num_high_touch2, time_to_fix, time_remaining2, LARGE_ROUTING_SUCCESS_RATE) \
for time_to_fix in TIMES_TO_FIX]
res3
ML_SYS_ACC_60 = 0.6
# First round of processing by the ML system
res1 = get_period_workload(gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS, \
[MEAN_ML, STD_DEV_ML], NUM_SECS_WORKED_MONTHLY, ML_SYS_ACC_60)
num_high_touch1 = res1[3]
time_remaining1 = res1[5] # not used -- AP Specialists have the same time as before
# Second round - AP specialists deal with the num_high_touch that ML spat out
res2 = get_period_workload(num_high_touch1, [MEAN, STD_DEV], NUM_SECS_WORKED_MONTHLY, \
ROUTING_SUCCESS_RATE)
num_high_touch2 = res2[3]
time_remaining2 = res2[5]
# Handling the high touch in the next round in the time remaining
res3 = [get_period_workload(num_high_touch2, time_to_fix, time_remaining2, LARGE_ROUTING_SUCCESS_RATE) \
for time_to_fix in TIMES_TO_FIX]
res3
ML_SYS_ACC_70 = 0.7
# First round of processing by the ML system
res1 = get_period_workload(gen_invoices(MU, NUM_VENDORS) * MONTH_DAYS, \
[MEAN_ML, STD_DEV_ML], NUM_SECS_WORKED_MONTHLY, ML_SYS_ACC_70)
num_high_touch1 = res1[3]
time_remaining1 = res1[5] # not used -- AP Specialists have the same time as before
# Second round - AP specialists deal with the num_high_touch that ML spat out
res2 = get_period_workload(num_high_touch1, [MEAN, STD_DEV], NUM_SECS_WORKED_MONTHLY, \
ROUTING_SUCCESS_RATE)
num_high_touch2 = res2[3]
time_remaining2 = res2[5]
# Handling the high touch in the next round in the time remaining
res3 = [get_period_workload(num_high_touch2, time_to_fix, time_remaining2, LARGE_ROUTING_SUCCESS_RATE) \
for time_to_fix in TIMES_TO_FIX]
res3