!date
Sat Feb 15 20:36:55 JST 2020
import pandas as pd
import numpy as np
import scipy as sp
import scipy.stats as ss
import scipy.special as sps
import arviz as az
# Make inline plots raster graphics
from IPython.display import set_matplotlib_formats
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
from termcolor import colored
import seaborn as sns
clrs = sns.color_palette("Spectral", 6)
def set_plot_style(usetex=False):
sns.set_style('white', {'axes.linewidth': 0.5})
sns.set(style='white', font_scale=1.1,#context='paper',
rc={'xtick.major.size': 6, 'ytick.major.size': 6, 'legend.fontsize': 14,
'text.usetex': usetex, 'font.family': 'serif', 'font.serif': ['Verdana'],
'text.latex.preamble': r"\usepackage{type1cm}"})
plt.rcParams['xtick.major.size'] = 6
plt.rcParams['xtick.major.width'] = 1
plt.rcParams['ytick.major.size'] = 6
plt.rcParams['ytick.major.width'] = 1
plt.rcParams['xtick.bottom'] = True
plt.rcParams['ytick.left'] = True
set_plot_style()
import warnings
warnings.filterwarnings('ignore')
func_dict = {"q2.5": lambda x: np.percentile(x, 2.5),
"q25": lambda x: np.percentile(x, 25),
"median": lambda x: np.percentile(x, 50),
"q75": lambda x: np.percentile(x, 75),
"q97.5": lambda x: np.percentile(x, 97.5)}
def get_stats(cmdstan_data):
# include mean and hpd
stats = az.summary(cmdstan_data,credible_interval=0.95).loc[:, ['mean','hpd_2.5%','hpd_97.5%','ess_bulk','ess_tail','r_hat']].reset_index().rename(columns={'index':'var', 'hpd_2.5%':'hpd2.5', 'hpd_97.5%':'hpd97.5'})
stats = az.summary(cmdstan_data,credible_interval=0.50).loc[:, ['hpd_25%','hpd_75%']].reset_index().rename(columns={'index':'var', 'hpd_25%':'hpd25', 'hpd_75%':'hpd75'}).\
merge(stats, left_on='var', right_on='var')
# include percentiles
stats = az.summary(cmdstan_data, stat_funcs=func_dict, extend=False).reset_index().rename(columns={'index': 'var'}).merge(stats, left_on='var', right_on='var')
stats['time'] = stats['var'].apply(lambda st: st[st.find("[")+1:st.find("]")])
stats['time'] = ['NA' if "[" not in y else int(x)+1 for x,y in zip(stats['time'],stats['var'])]
stats['var'] = stats['var'].apply(lambda st: st[:st.find("[")] if "[" in st else st)
return stats.loc[:,['var','time','mean','hpd2.5','hpd25','hpd75','hpd97.5','q2.5','q25','median','q75','q97.5','ess_bulk','ess_tail','r_hat']]
resultsdir = "../../../Hokkaido_Backup/Wuhan Serial Interval 2020/stan-sims-certain_and_probable"
from scipy.optimize import fsolve
import scipy.special as sps
def find_params_Weibull(p,mu,SD):
θ, k = np.exp(p)
return (θ*sps.gamma(1+1/k)-mu, θ*np.sqrt(sps.gamma(1+2/k)-sps.gamma(1+1/k)**2)-SD)
def weibull_pdf(x,θ,k):
return (k / θ) * (x / θ)**(k - 1) * np.exp(-(x / θ)**k)
weibull_cdf = lambda x,θ,k: -np.expm1(-(x/θ)**k)
mean_SARS = 8.4
sd_SARS = 3.8
folder = "lognormal-truncated"
distrib, truncation_type = folder.split("-")
print(colored(folder, 'red'))
posterior_glob = !cd "{resultsdir}/{folder}"; ls trace-*
cmdstan_data = az.from_cmdstan(posterior = [resultsdir+"/"+folder+"/"+x for x in posterior_glob],
log_likelihood="log_likelihood")
param1 = cmdstan_data.posterior.param1.values.ravel()
param2 = cmdstan_data.posterior.param2.values.ravel()
xstep = .01
xmax = 20
ymax = 0.12
x = np.arange(.01,xmax+xstep,xstep)
nsamples = 10000
# a bit faster than to use scipy.stats functions
lognormpdf = lambda x, mu, sigma: (np.exp(-(np.log(x) - mu)**2 / (2 * sigma**2)) / (x * sigma * np.sqrt(2 * np.pi)))
lognormcdf = lambda x, mu, sigma: 0.5 + 0.5*sps.erf((np.log(x) - mu)/np.sqrt(2)/sigma)
yy = np.stack([[lognormcdf(xx+xstep,param1[idx],param2[idx])-lognormcdf(xx,param1[idx],param2[idx]) for xx in x] for idx in range(nsamples)]).T
lognormal-truncated
fig, ax = plt.subplots()
fig.set_size_inches(4.5,3.5);
clrs = ['k','C7']
disease = 'COVID-19'
Ymedian = [np.median(yy[j]) for j in range(len(x)-1)]
Ylower = [np.percentile(yy[j],[2.5])[0] for j in range(len(x))]
Yupper = [np.percentile(yy[j],[97.5])[0] for j in range(len(x))]
ax.plot(x[1:], Ymedian, lw=2, color=clrs[0], label=disease)
ax.plot(x, Yupper, lw=.3, color=clrs[0])
ax.plot(x, Ylower, lw=.3, color=clrs[0])
ax.fill_between(x, Ylower, Yupper, color=clrs[1], alpha=.35)
ax.set_xlabel('Serial interval (days)'); ax.set_ylabel('Probability density')
disease = 'SARS'
res = fsolve(lambda x: find_params_Weibull(x, mean_SARS, sd_SARS), (1,1))
res_ = np.exp(res)
Y_SARS = [weibull_cdf(xx+xstep, res_[0], res_[1])-weibull_cdf(xx, res_[0], res_[1]) for xx in x]
ax.plot(x, Y_SARS, label=disease, lw=2, color=clrs[0], ls='dashed')
ax.legend(frameon=False)
ax.set_ylim(bottom=0, top=.003)
ax.set_xlim(0, 20)
ytks = [0,.001,.002,.003]; ax.set_yticks(ytks);
ax.spines['left'].set_bounds(0,.003)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False);
my_dpi = 300
plt.savefig("../../results/Fig-MAIN.pdf",
format='pdf',
figsize=(3.54331/my_dpi, 3.5/my_dpi), dpi=my_dpi,
bbox_inches='tight');
(for explanatory purposes)
fig, ax = plt.subplots()
fig.set_size_inches(4.5,3.5);
clrs = ['k', 'C7', 'C1']
disease = 'COVID-19'
Ymedian = [np.median(yy[j]) for j in range(len(x)-1)]
Ylower = [np.percentile(yy[j],[2.5])[0] for j in range(len(x))]
Yupper = [np.percentile(yy[j],[97.5])[0] for j in range(len(x))]
ax.plot(x[1:], Ymedian, lw=2, color=clrs[0], label=disease)
ax.plot(x, Yupper, lw=.3, color=clrs[0])
ax.plot(x, Ylower, lw=.3, color=clrs[0])
ax.fill_between(x, Ylower, Yupper, color=clrs[1], alpha=.35)
Ymean = [np.mean(yy[j]) for j in range(len(x)-1)]
Ymax_x = x[1:][np.argmax(Ymean)]
Ymax_y = Ymean[np.argmax(Ymean)]
print(colored("Maximum (x): %.2f"%x[1:][np.argmax(Ymean)],'red'))
ax.scatter([Ymax_x],[Ymax_y],color='red',marker='x',zorder=5)
Ymean_value = np.sum([x*y for x,y in zip(x[1:],Ymean)])
print(colored("Mean (--): %.2f"%Ymean_value,'green'))
ax.axvline(Ymean_value, color='C2', ls='dashed')
ax.set_xlabel('Serial interval (days)'); ax.set_ylabel('Probability density')
ax.legend(frameon=False)
ax.set_ylim(bottom=0, top=.003)
ax.set_xlim(0, 20)
ytks = [0,.001,.002,.003]; ax.set_yticks(ytks);
ax.spines['left'].set_bounds(0,.003)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False);
Maximum (x): 2.93 Mean (--): 4.59
print(colored("Median: %.2f"%np.exp(np.mean(param1)),'blue'))
Median: 3.96
which coincides with the value stated in our paper.