%load_ext watermark
%watermark -a 'cs224' -u -d -v -p numpy,xarray,scipy,pandas,sklearn,matplotlib,seaborn,pymc3,lifelines,rpy2
%matplotlib inline
import numpy as np, scipy, scipy.stats as stats, scipy.special, scipy.misc, pandas as pd, matplotlib.pyplot as plt, seaborn as sns, xarray as xr
import matplotlib as mpl
import pymc3 as pm
import theano as thno
import theano.tensor as T
import datetime, time, math
from dateutil import relativedelta
from collections import OrderedDict
SEED = 41
np.random.seed(SEED)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)
np.set_printoptions(edgeitems=10)
np.set_printoptions(linewidth=1000)
np.set_printoptions(suppress=True)
np.core.arrayprint._line_width = 180
sns.set()
from IPython.display import display, HTML
from IPython.display import display_html
def display_side_by_side(*args):
html_str=''
for df in args:
if type(df) == np.ndarray:
df = pd.DataFrame(df)
html_str+=df.to_html()
html_str = html_str.replace('table','table style="display:inline"')
# print(html_str)
display_html(html_str,raw=True)
CSS = """
.output {
flex-direction: row;
}
"""
def display_graphs_side_by_side(*args):
html_str='<table><tr>'
for g in args:
html_str += '<td>'
html_str += g._repr_svg_()
html_str += '</td>'
html_str += '</tr></table>'
display_html(html_str,raw=True)
display(HTML("<style>.container { width:70% !important; }</style>"))
%load_ext autoreload
%autoreload 1
%aimport covid19
x = np.linspace(0.0,30.0,1000)
y = covid19.gamma_dist.pdf(x)
fig=plt.figure(figsize=(14, 8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
ax.plot(x,y)
covid19.gamma_loc,covid19.gamma_k,covid19.gamme_theta
country_name, first_date, init_add = 'China', None, 0
# cfr_estimate, timeshift = covid19.calculate_delay_between_new_cases_and_death(country_name, first_date=first_date, init_add=init_add)
# print(cfr_estimate, timeshift)
# loc = max(timeshift - (gamma_mean - gamma_loc), 0.0)
china_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add)
china_mortality_analysis.fit()
print(china_mortality_analysis.death_rate())
china_mortality_analysis.plot()
china_mortality_analysis.df.tail()
# china_mortality_analysis.df_lifelines_individual.observed_death.sum()
# china_mortality_analysis.df.head()
# china_mortality_analysis.df.tail()
# china_mortality_analysis.fit()
# china_mortality_analysis.wbf.print_summary()
# expected_life_time = china_mortality_analysis.wbf.lambda_ * scipy.special.gamma(1 + 1 / china_mortality_analysis.wbf.rho_)
# expected_life_time/365
# china_mortality_analysis.death_rate()
# china_mortality_analysis.plot()
alternative_germany_data = covid19.get_rki_df()
alternative_germany_data.tail()
country_name, first_date, init_add = 'Germany', pd.to_datetime('2020-03-09'), 0.0
germany_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add, df=alternative_germany_data)
germany_mortality_analysis.fit()
print(germany_mortality_analysis.print_death_rate())
germany_mortality_analysis.plot()
# germany_mortality_analysis.ll.shift_and_scale_popt
# germany_mortality_analysis.ll.plot_lead_lag()
germany_mortality_analysis.plot_infection_and_death_curves()
germany_mortality_analysis.df.tail()
germany_mortality_analysis.project_death_and_hospitalization()
germany_mortality_analysis.project_death_and_hospitalization()
country_name, first_date, init_add = 'Austria', pd.to_datetime('2020-03-12'), 600
austria_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add)
austria_mortality_analysis.fit()
print(austria_mortality_analysis.print_death_rate())
austria_mortality_analysis.plot()
austria_mortality_analysis.df.tail()
austria_mortality_analysis.project_death_and_hospitalization()
austria_mortality_analysis.project_death_and_hospitalization()
country_name, first_date, init_add = 'Korea, South', None, 0
south_korea_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add)
south_korea_mortality_analysis.fit()
# south_korea_mortality_analysis2 = covid19.MortalityAnalysis(south_korea_name, first_date=pd.to_datetime('2020-02-20'), init_add=900)
# south_korea_mortality_analysis2.fit()
print(south_korea_mortality_analysis.print_death_rate())
# print(south_korea_mortality_analysis2.death_rate())
# print(south_korea_mortality_analysis2.prepend_df['confirmed'].iloc[-1])
south_korea_mortality_analysis.plot()
south_korea_mortality_analysis.df.tail()
# south_korea_mortality_analysis.prepend_df
country_name, first_date, init_add = 'United Kingdom', pd.to_datetime('2020-03-05'), 800
uk_mortality_analysis = covid19.MortalityAnalysis(country_name)
uk_mortality_analysis.fit()
print(uk_mortality_analysis.print_death_rate())
# uk_mortality_analysis2 = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add, mult=4.0)
# uk_mortality_analysis2.fit()
# print(uk_mortality_analysis2.death_rate())
# print(uk_mortality_analysis2.prepend_df['confirmed'].iloc[-1])
uk_mortality_analysis.plot()
uk_mortality_analysis.plot_infection_and_death_curves()
uk_mortality_analysis.df.tail()
uk_mortality_analysis.project_death_and_hospitalization()
uk_mortality_analysis.project_death_and_hospitalization()
# pd.options.mode.chained_assignment = "raise"
country_name, first_date, init_add = 'US', pd.to_datetime('2020-02-29'), 950
us_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add)
us_mortality_analysis.fit()
# us_mortality_analysis2 = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=450, mult=1.5)
# us_mortality_analysis2.fit()
print(us_mortality_analysis.print_death_rate())
# print(us_mortality_analysis2.death_rate())
# print(us_mortality_analysis2.prepend_df['confirmed'].iloc[-1])
us_mortality_analysis.plot()
us_mortality_analysis.plot_infection_and_death_curves()
us_mortality_analysis.df.tail()
us_mortality_analysis.project_death_and_hospitalization()
us_mortality_analysis.project_death_and_hospitalization()
alternative_italy_data = covid19.get_italy_df()
alternative_italy_data.tail()
country_name, first_date, init_add = 'Italy', pd.to_datetime('2020-02-21'), 0
italy_mortality_analysis = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=init_add, df=alternative_italy_data)
italy_mortality_analysis.fit()
print(italy_mortality_analysis.print_death_rate())
# italy_mortality_analysis2 = covid19.MortalityAnalysis(country_name, first_date=first_date, init_add=2000, mult=4.0)
# italy_mortality_analysis2.fit()
# print(italy_mortality_analysis2.death_rate())
# print(italy_mortality_analysis2.prepend_df['confirmed'].iloc[-1])
italy_mortality_analysis.plot()
italy_mortality_analysis.df.tail()
italy_mortality_analysis.project_death_and_hospitalization()
italy_mortality_analysis.project_death_and_hospitalization()
# italy_mortality_analysis2.prepend_df
alternative_spain_data = covid19.get_spain_df()
alternative_spain_data.tail()
spain_mortality_analysis = covid19.MortalityAnalysis('Spain', df=alternative_spain_data)
spain_mortality_analysis.fit()
print(spain_mortality_analysis.print_death_rate())
# spain_mortality_analysis2 = covid19.MortalityAnalysis('Spain', first_date=pd.to_datetime('2020-03-03'), init_add=800, mult=3.0)
# spain_mortality_analysis2.fit()
# print(spain_mortality_analysis2.death_rate())
# print(spain_mortality_analysis2.prepend_df['confirmed'].iloc[-1])
spain_mortality_analysis.plot()
spain_mortality_analysis.df.tail()
spain_mortality_analysis.project_death_and_hospitalization()
spain_mortality_analysis.project_death_and_hospitalization()
alternative_france_data = covid19.get_france_df()
alternative_france_data.tail()
france_mortality_analysis = covid19.MortalityAnalysis('France', first_date=pd.to_datetime('2020-02-15'), df=alternative_france_data)
france_mortality_analysis.fit()
print(france_mortality_analysis.print_death_rate())
# france_mortality_analysis2 = covid19.MortalityAnalysis('France', first_date=pd.to_datetime('2020-02-15'), init_add=500, mult=4)
# france_mortality_analysis2.fit()
# print(france_mortality_analysis2.death_rate())
# print(france_mortality_analysis2.prepend_df['confirmed'].iloc[-1])
france_mortality_analysis.plot()
france_mortality_analysis.plot_infection_and_death_curves()
france_mortality_analysis.df.tail()
date | note |
---|---|
2020-04-20 | expected_death (17296.0) < today_death(19718); death rate via survival analysis (15.36) << death rate via affine transform of curve estimates (18.7) |
2020-04-21 | expected_death (17520.0) < today_death(20265); death rate via survival analysis (15.28) << death rate via affine transform of curve estimates (18.9) |
2020-04-24 | expected_death (19208.0) < today_death(21856); death rate via survival analysis (15.9) << death rate via affine transform of curve estimates (19.2) |
france_mortality_analysis.project_death_and_hospitalization()
france_mortality_analysis.project_death_and_hospitalization()
import rpy2
print(rpy2.__version__)
import rpy2.robjects.packages as rpackages
baseR = rpackages.importr('base')
print(baseR.R_Version().rx('version.string'))
# from rpy2.rinterface import R_VERSION_BUILD
# print(R_VERSION_BUILD)
import IPython.display
import rpy2, rpy2.robjects, rpy2.robjects.pandas2ri, rpy2.rinterface, rpy2.robjects.packages, rpy2.interactive, rpy2.robjects.lib.ggplot2, rpy2.robjects.lib.grdevices
rpy2.robjects.pandas2ri.activate()
from rpy2.robjects.packages import importr
# import R's "base" package
base = importr('base')
# import rpy2's package module
import rpy2.robjects.packages as rpackages
# import R's utility package
utils = rpackages.importr('utils')
# select a mirror for R packages
utils.chooseCRANmirror(ind=1) # select the first mirror in the list
# R package names
packnames = ('LexisPlotR',)
# R vector of strings
from rpy2.robjects.vectors import StrVector
grdevices = rpy2.robjects.packages.importr('grDevices')
# Selectively install what needs to be install.
# We are fancy, just because we can.
names_to_install = [x for x in packnames if not rpackages.isinstalled(x)]
if len(names_to_install) > 0:
utils.install_packages(StrVector(names_to_install))
lexis = importr('LexisPlotR')
lexis
lexis_grid = rpy2.robjects.r['lexis.grid']
lexis_lifeline = rpy2.robjects.r['lexis.lifeline']
def plot_lexis(mortality_analysis_instance):
mylexis = lexis_grid(year_start = 2020, year_end = 2021, age_start = 0, age_end = 1) # lwd = 0.1
alpha = 1.0
ix_present = ~mortality_analysis_instance.df_lifelines_individual.observed_death
ix_lost = mortality_analysis_instance.df_lifelines_individual.observed_death
mylexis = lexis_lifeline(lg = mylexis , entry = mortality_analysis_instance.df_lifelines_individual['start_date'][ix_present], exit = mortality_analysis_instance.df_lifelines_individual['end_date'][ix_present], colour = "orange", alpha = alpha, lwd = 0.4)
mylexis = lexis_lifeline(lg = mylexis , entry = mortality_analysis_instance.df_lifelines_individual['start_date'][ix_lost] , exit = mortality_analysis_instance.df_lifelines_individual['end_date'][ix_lost] , colour = "blue" , alpha = alpha, lwd = 0.4, lineends = True)
with rpy2.robjects.lib.grdevices.render_to_bytesio(grdevices.png, width=1.5*1024, height=1.5*896, res=90) as img:
rpy2.robjects.r.print(mylexis)
IPython.display.display(IPython.display.Image(data=img.getvalue(), format='png', embed=True))
# plot_lexis(italy_mortality_analysis)
# plot_lexis(italy_mortality_analysis2)
plot_lexis(south_korea_mortality_analysis)
plot_lexis(germany_mortality_analysis)
plot_lexis(france_mortality_analysis)