%load_ext autoreload
%autoreload 2
import numpy as np
import scipy as sp
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import (
ColumnDataSource, HoverTool
)
from bokeh.palettes import Blues, Reds
from bokeh.tile_providers import get_provider, Vendors
from sklearn.preprocessing import MinMaxScaler
from pyproj import Proj, transform
output_notebook()
On 11 March 2020, the World Health Organization officiallly declared the coronavirus (COVID-19 or SARS-CoV-2) a pandemic. But, fear not, this is not yet another opinionated response on the failing of society and the end of times. This is all about Mathematics!
path = 'COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-{}.csv'
features = ['Confirmed', 'Deaths', 'Recovered']
dataframes = dict()
for feature in features:
dataframes[feature] = pd.read_csv(path.format(feature))
Let's take a took at confirmed cases and prepare the dataset
df = dataframes['Confirmed'].copy()
df['total'] = df[df.columns[-1]]
# in order to easily transform from longitude/latitude (EPSG:4326) into another projection (for example WebMercator EPSG:3857
inProj, outProj = Proj(init="epsg:4326"), Proj("epsg:3857")
df['x'], df['y'] = transform(inProj, outProj, df['Long'].tolist(), df['Lat'].tolist())
#scale within 0 and 1
df['size'] = 150 * MinMaxScaler().fit_transform(df[['total']])
/Users/g4brielvs/.local/share/virtualenvs/COVID-19-xLAGSW6c/lib/python3.8/site-packages/pyproj/crs/crs.py:55: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6 return _prepare_from_string(" ".join(pjargs))
Let's prepare the map using Bokeh,
p = figure(
title="COVID-19: Confirmed Cases",
x_axis_type="mercator",
y_axis_type="mercator",
width=800,
tools='pan,box_zoom,wheel_zoom,save,help')
CARTODBPOSITRON = get_provider(Vendors.CARTODBPOSITRON)
p.add_tile(CARTODBPOSITRON)
source = ColumnDataSource(data=dict(
x=list(df['x']),
y=list(df['y']),
size=list(df['size']),
total=list(df['total']),
region=list(df['Country/Region']),
province=list(df['Province/State'])))
hover = HoverTool(tooltips=[
("Country/Region", "@region"),
("Province/State", "@province"),
("Confirmed Cases", "@total")
])
p.add_tools(hover)
p.circle(x ='x', y='y', source=source,size='size', line_color="#FF0000", fill_color="#FF0000", fill_alpha=0.05)
#p.toolbar.logo = None
p.xaxis.visible = False
p.yaxis.visible = False
show(p)
df = dataframes['Confirmed'].copy()
# unpivot dataframe
df = df.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], var_name="date", value_name="count")
df = df.groupby(['Country/Region', 'date']).agg(sum)
# set time series
df = df.reset_index()
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date').sort_index()
Let's take at closer look at China at the beginning of the epidemic,
df = df[df.index < pd.Timestamp('2020-02-15')]
df = df[df['Country/Region'] == 'China']
Let's fit the data to an exponential,
# expoenential
exp = lambda x, a, b : np.exp(a*(x-b))
# fit
y = df['count']
x = np.arange(len(y))
popt, pcov = sp.optimize.curve_fit(exp, x, y)
# put it back
df['fit'] = exp(x, *popt)
Let's prepare the map using Bokeh,
p = figure(title="COVID-19: Confirmed Cases in China and Exponential", x_axis_type="datetime", y_axis_type="linear", plot_width=800, plot_height=400)
source = ColumnDataSource(df)
p.scatter('date', 'count', source=source, legend_label='Confirmed Cases')
p.line('date', 'fit', source=source, line_dash='dotted')
hover = HoverTool(tooltips=[
("Confirmed Cases", "@count")
])
p.add_tools(hover)
p.xaxis.axis_label = 'Date'
p.yaxis.axis_label = 'Cases'
p.legend.location = 'center_left'
show(p)
An epidemic is a textbook example of an exponential growth. Not only an example, but its underlying principle is a centerpiece in the history of Mathematics and its discovery was fundamental for breakthroughs in Probability Theory, Mathematical Analysis, Differential Equations, Physics, Chemistry and Biology and even Pop Culture.
The idea is rather straightforward: it has do with the way a quantity changes. When the rate on which a given quantity changes is proportional to the quantity itself at any given moment, mathematicians called it exponential.
the rate of change is proportional to the quantity at any given time
Probably many in the past observed this behavior on how populations expand, diseases spread, biology decays or even how science progresses, but it was formally brought to life in the work on logarithms. Later, while studying compound interest - more money you have, more money you make - Bernoulli discovered a constant ironically known as Euler's number.
We will have the opportunity to talk about the multiple "origins" of that constant in the future.
Coming back to the virus. In our case, having an exponential growth suggests that the larger the infectious population, quicker we will have new cases and the infectious population will grow. We pratically experience that whenever people get sick around us (or when all of our friends start getting pregnant at the same...no! it is not exponential...or is it?)
Scientists denote ideas using mathematical script for convenience (sometimes laziness). Let us do it then,
the rate of infection is proportional to the infectious population
Well, stating a relation of proportionality does not give us much. We still are not able to answer any questions. Is this virus dangerous? How contagious is it? Is it deadly? Are we all going to die?
We are going to formulate a model using variables and parameters to quantify how proportial that relation is.
Using our intuition for the model's formulation, those are the two parameters we will take into account. The rate on which people are infected should grow the more contact they have to each other and should decay as they recover. In another words, the rate of change is positively proportional to $\beta$ (contact rate) and negatively proportional to $\gamma$ (recovery rate)
Finally, we have our equation,
$$ \begin{equation} \frac{dI}{dt} = \left(\beta - \gamma\right) I \end{equation} $$Now let us take some time to check if we can take some conclusions from it.
If the recovery rate is greater than contact rate, the rate of change will always be negative, so the infection will eventually dissipate and even not outbreak. On the other hand, if contact rate is greater recovery rate, we will have a disaster. Apparently, those paramaters indicate how strong an epidemic can be.
Now, if $\gamma$ is the recovery rate, then $1/\gamma$ is the infectious/recovery period or the period of time during which an infected individual can pass it on, consider the product between the $\beta$ (contact rate) and $1/\gamma$ (infectious period/recovery) and we will have the average number of people an infected patient will pass it on.
$$ \begin{equation} R_{0} = \frac {\beta}{\gamma} \end{equation} $$In fact, that indicator is known as the basic reproduction number. Note that when $R_0 > 1$ the infection will be able to start spreading in a population, but not if $R_0 < 1$.
Probably everybody has that number in their minds at the present. When trying to mitigiate, we will see how important bringing it down is in order to slow down the epidemic.
After the outbreak in China, studies found that COVID-19's basic reproduction number is between 1.4-3.9, which means that, on avarage, a sick patient transmits the infection to 1.4 and 3.9 others. The recovery period is around 10 days.
We solved our equation numerically inputting different parameters to see how it plays out.
# total population
N = 10000000
# initial number of infected and recovered
I0, R0 = 1, 0
# initial susceptible
S0 = N - I0 - R0
# contact rate and recovery rate
beta, gamma = 0.2, 1./10
# time
t = np.linspace(0, 45, 45)
# gradient vector
def gradient(y, t, N, beta, gamma):
I = y
dI = (beta - gamma) * I
return dI
# initial conditions vector
y0 = I0
betas = [0.20, 0.25, 0.30, 0.35, 0.4, 0.41, 0.42]
results = dict()
for beta in betas:
y = sp.integrate.odeint(gradient, y0, t, args=(N, beta, gamma))
I = y.T[0]
results[beta] = I
We wil solve the equation for multiple parameters,
title = "The I Model: Basic Reproductive Number Sensibility"
p = figure(title=title, x_axis_type="linear", y_axis_type="linear", plot_width=750, plot_height=400)
for beta, color in zip(betas, reversed(Reds[len(betas)])):
p.line(t, results[beta]/1000, color=color, legend_label=f'{beta*10}')
p.xaxis.axis_label = 'Days'
p.yaxis.axis_label = 'Infected Population (in thousands)'
p.legend.location = "top_left"
p.legend.click_policy = 'hide'
p.legend.inactive_fill_alpha = 0.6
show(p)
We visually check how small decrements basic reproduction number will have a huge accumulative gain over time.That is reason why we should take agressive action, either by social distancing, avoiding and cancelling events.
We saw that the basic reproduction number depends on contact rate and recovery rate. Since the recovery rate is barely in our control as it is typicallly a biological charateristic of the virus, controlling the contact rate is our best chance to flatten the curve. That does not mean we should not invest in treatments, best case scenario will bring both down.
But here is the plot twist: an epidemic is not purely exponential! Our first model assumed that the infection will spread indefinitely, the population is infinite and there is no immunity or cure.
Of course, that is nonsense. An rather evil way of think is that once the entire population is infected, there is none left to be infected!
Let us address that in a new model by assuming the following,
In COVID's case, it is not clear how long the protection lasts. But we will assume it lasts longer than our simulated timeframe.
We will use a compartmental model. The idea is to divide the population into group (or compartments) and describe how the infection moves from one to another. It was first introduced by Anderson Ogilvy Kermack and Anderson Gray McKendrick.
Let the SIR model be as follows,
S(t): susceptible but not yet infected population
I(t): infectious population
R(t): recovered population
$\beta$: contact rate or how many people an infected person comes into contact with in given time
$\gamma$: recovery rate or how many people recover in given time
N: total population
First, the equation S tell us that as the population is infected, the susceptible population becames smaller.
$$ \begin{equation} \frac{dS}{dt} = - \frac{\beta SI}{N} \end{equation} $$Second, the equation I is similar to our original model's, adding a factor of susceptibility.
$$ \begin{equation} \frac{dI}{dt} = \frac{\beta SI}{N} - \gamma I \end{equation} $$Last, the equation R represents that recovered population is proportinal to the recovery rate.
$$ \begin{equation} \frac{dR}{dt} = \gamma I \end{equation} $$Unfortunately, that system of differential equations non-linear and does not have analytical solution. Fortunately, we solve it numerically with the following initial conditions,
# total population
N = 10000000
# initial number of infected and recovered
I0, R0 = 1, 0
# initial susceptible
S0 = N - I0 - R0
# contact rate and recovery rate
beta, gamma = 0.2, 1./10
# time
t = np.linspace(0, 180, 180)
# gradient vector
def gradient(y, t, N, beta, gamma):
S, I, R = y = y
dS = -beta * S * I / N
dI = beta * S * I / N - gamma * I
dR = gamma * I
return dS, dI, dR
# initial conditions vector
y0 = S0, I0, R0
betas = [0.20, 0.25, 0.30, 0.35, 0.4, 0.41, 0.42]
results = dict()
for beta in betas:
y = sp.integrate.odeint(gradient, y0, t, args=(N, beta, gamma))
S, I, R = y.T
results[beta] = (S, I, R)
title = "The SIR Model: R = 4"
p = figure(title=title, x_axis_type="linear", y_axis_type="linear", plot_width=750, plot_height=400)
p.line(t, results[0.4][0]/1000, color='black', line_alpha=0.5, line_dash='dotted', legend_label=f'Susceptible')
p.line(t, results[0.4][1]/1000, color='red', legend_label=f'Infected')
p.line(t, results[0.4][2]/1000, color='green', line_alpha=0.5, line_dash='dotted', legend_label=f'Recovered')
p.xaxis.axis_label = 'Days'
p.yaxis.axis_label = 'Population (in thousands)'
p.legend.location = "center_left"
p.legend.click_policy = 'hide'
p.legend.inactive_fill_alpha = 0.6
show(p)
title = "The SIR Model: Infected Population"
p = figure(title=title, x_axis_type="linear", y_axis_type="linear", plot_width=750, plot_height=400)
for beta, color in zip(betas, reversed(Reds[len(betas)])):
#p.line(t, results[beta][0]/1000, color=color, legend_label=f'{beta*10}')
p.line(t, results[beta][1]/1000, color=color, legend_label=f'{beta*10}')
#p.line(t, results[beta][2]/1000, color=color, legend_label=f'{beta*10}')
p.xaxis.axis_label = 'Days'
p.yaxis.axis_label = 'Infected Population (in thousands)'
p.legend.location = "top_left"
p.legend.click_policy = 'hide'
p.legend.inactive_fill_alpha = 0.6
show(p)