In [1]:
%load_ext watermark
%watermark -a 'cs224' -u -d -v -p numpy,xarray,scipy,pandas,sklearn,matplotlib,seaborn,pymc3
cs224 
last updated: 2020-04-10 

CPython 3.6.10
IPython 7.13.0

numpy 1.18.1
xarray 0.15.0
scipy 1.4.1
pandas 1.0.2
sklearn 0.22.1
matplotlib 3.1.3
seaborn 0.10.0
pymc3 3.8
In [2]:
%matplotlib inline
import numpy as np, scipy, scipy.stats as stats, scipy.special, scipy.misc, pandas as pd, matplotlib.pyplot as plt, seaborn as sns, xarray as xr
import matplotlib as mpl

import pymc3 as pm

import theano as thno
import theano.tensor as T

import sklearn, sklearn.linear_model

import datetime, time, math
from dateutil import relativedelta

from collections import OrderedDict

SEED = 42
np.random.seed(SEED)

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)
np.set_printoptions(edgeitems=10)
np.set_printoptions(linewidth=1000)
np.set_printoptions(suppress=True)
np.core.arrayprint._line_width = 180

sns.set()
In [3]:
from IPython.display import display, HTML

from IPython.display import display_html
def display_side_by_side(*args):
    html_str=''
    for df in args:
        if type(df) == np.ndarray:
            df = pd.DataFrame(df)
        html_str+=df.to_html()
    html_str = html_str.replace('table','table style="display:inline"')
    # print(html_str)
    display_html(html_str,raw=True)

CSS = """
.output {
    flex-direction: row;
}
"""

def display_graphs_side_by_side(*args):
    html_str='<table><tr>'
    for g in args:
        html_str += '<td>'
        html_str += g._repr_svg_()
        html_str += '</td>'
    html_str += '</tr></table>'
    display_html(html_str,raw=True)
    

display(HTML("<style>.container { width:70% !important; }</style>"))
In [4]:
%load_ext autoreload
%autoreload 1
%aimport covid19

Data Source: CSSEGISandData/COVID-19, the data behind the dashboard: Coronavirus COVID-19 Global Cases by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University (JHU).

It seems that there are currently data-changes in progress that cause issues. This is mostly an issue for the US data, so that I comment it out. The US data as used in this notebook is currently wrong.

In [5]:
covid19.time_series_19_covid_confirmed.head()
Out[5]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 1/28/20 1/29/20 1/30/20 1/31/20 2/1/20 2/2/20 2/3/20 2/4/20 2/5/20 2/6/20 2/7/20 2/8/20 2/9/20 2/10/20 2/11/20 2/12/20 2/13/20 2/14/20 2/15/20 2/16/20 2/17/20 2/18/20 2/19/20 2/20/20 2/21/20 2/22/20 2/23/20 2/24/20 2/25/20 2/26/20 2/27/20 2/28/20 2/29/20 3/1/20 3/2/20 3/3/20 3/4/20 3/5/20 3/6/20 3/7/20 3/8/20 3/9/20 3/10/20 3/11/20 3/12/20 3/13/20 3/14/20 3/15/20 3/16/20 3/17/20 3/18/20 3/19/20 3/20/20 3/21/20 3/22/20 3/23/20 3/24/20 3/25/20 3/26/20 3/27/20 3/28/20 3/29/20 3/30/20 3/31/20 4/1/20 4/2/20 4/3/20 4/4/20 4/5/20 4/6/20 4/7/20 4/8/20 4/9/20
0 NaN Afghanistan 33.0000 65.0000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 4 4 5 7 7 7 11 16 21 22 22 22 24 24 40 40 74 84 94 110 110 120 170 174 237 273 281 299 349 367 423 444 484
1 NaN Albania 41.1533 20.1683 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 10 12 23 33 38 42 51 55 59 64 70 76 89 104 123 146 174 186 197 212 223 243 259 277 304 333 361 377 383 400 409
2 NaN Algeria 28.0339 1.6596 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 3 5 12 12 17 17 19 20 20 20 24 26 37 48 54 60 74 87 90 139 201 230 264 302 367 409 454 511 584 716 847 986 1171 1251 1320 1423 1468 1572 1666
3 NaN Andorra 42.5063 1.5218 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 39 39 53 75 88 113 133 164 188 224 267 308 334 370 376 390 428 439 466 501 525 545 564 583
4 NaN Angola -11.2027 17.8739 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 2 3 3 3 4 4 5 7 7 7 8 8 8 10 14 16 17 19 19
In [6]:
covid19.time_series_19_covid_recovered.head()
Out[6]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 1/28/20 1/29/20 1/30/20 1/31/20 2/1/20 2/2/20 2/3/20 2/4/20 2/5/20 2/6/20 2/7/20 2/8/20 2/9/20 2/10/20 2/11/20 2/12/20 2/13/20 2/14/20 2/15/20 2/16/20 2/17/20 2/18/20 2/19/20 2/20/20 2/21/20 2/22/20 2/23/20 2/24/20 2/25/20 2/26/20 2/27/20 2/28/20 2/29/20 3/1/20 3/2/20 3/3/20 3/4/20 3/5/20 3/6/20 3/7/20 3/8/20 3/9/20 3/10/20 3/11/20 3/12/20 3/13/20 3/14/20 3/15/20 3/16/20 3/17/20 3/18/20 3/19/20 3/20/20 3/21/20 3/22/20 3/23/20 3/24/20 3/25/20 3/26/20 3/27/20 3/28/20 3/29/20 3/30/20 3/31/20 4/1/20 4/2/20 4/3/20 4/4/20 4/5/20 4/6/20 4/7/20 4/8/20 4/9/20
0 NaN Afghanistan 33.0000 65.0000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 5 5 10 10 10 15 18 18 29 32
1 NaN Albania 41.1533 20.1683 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 10 17 17 31 31 33 44 52 67 76 89 99 104 116 131 154 165
2 NaN Algeria 28.0339 1.6596 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 8 12 12 12 12 12 32 32 32 65 65 24 65 29 29 31 31 37 46 61 61 62 90 90 90 113 237 347
3 NaN Andorra 42.5063 1.5218 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 10 10 10 10 16 21 26 31 39 52 58
4 NaN Angola -11.2027 17.8739 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 2 2 2 2 2
In [7]:
list(covid19.augment_time_series_from_daily_snapshots_date_range)
Out[7]:
[]
In [8]:
# ldf = covid19.augment_time_series_from_daily_snapshots(covid19.augment_time_series_from_daily_snapshots_date_range)
# ldf
In [9]:
# ldf = covid19.augment_time_series_from_daily_snapshots(covid19.augment_time_series_from_daily_snapshots_date_range)[0][['Province/State', 'Country/Region', '3/15/20']]
# # # ldf[ldf['3/14/20'] < 0]
# ldf[ldf['Country/Region'] == 'Spain']
In [10]:
# ldf_confirmed, ldf_recovered, ldf_death, columns, selector = covid19.get_cases_by_region()
In [11]:
df = covid19.get_cases_by_region()
df.tail()
Out[11]:
confirmed recovered death new_confirmed new_recovered new_death
2020-04-05 95950 26469 1452 4636 325 133
2020-04-06 98945 36081 1578 2995 9612 126
2020-04-07 103036 38287 1814 4091 2206 236
2020-04-08 108193 43656 2070 5157 5369 256
2020-04-09 112638 52407 2312 4445 8751 242
In [12]:
df = covid19.get_cases_by_region(region='China')
df.tail()
Out[12]:
confirmed recovered death new_confirmed new_recovered new_death
2020-04-05 82602 77207 3333 59 261 3
2020-04-06 82665 77310 3335 63 103 2
2020-04-07 82718 77410 3335 53 100 0
2020-04-08 82809 77567 3337 91 157 2
2020-04-09 82883 77679 3339 74 112 2
In [13]:
# df = covid19.get_cases_by_region(region='Switzerland')
# df
In [14]:
# df = covid19.get_cases_by_region(region='US')
# df
In [15]:
cbr_germany = covid19.CasesByRegion('Germany')
cbr_germany.tail()
Out[15]:
confirmed recovered death new_confirmed new_recovered new_death
2020-04-05 95950 26469 1452 4636 325 133
2020-04-06 98945 36081 1578 2995 9612 126
2020-04-07 103036 38287 1814 4091 2206 236
2020-04-08 108193 43656 2070 5157 5369 256
2020-04-09 112638 52407 2312 4445 8751 242
In [16]:
cbr_germany.plot_daily_stats()
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f2072660dd8>
In [17]:
date_range = pd.date_range(pd.to_datetime('2020-03-09'),pd.to_datetime('2020-03-16'))
cbr_germany_new_confirmed_df = cbr_germany.df.loc[date_range,['new_confirmed']]
x = np.arange(len(cbr_germany_new_confirmed_df) * 1.0)

regressor = sklearn.linear_model.LinearRegression()  
regressor.fit(x.reshape(-1, 1), cbr_germany_new_confirmed_df.new_confirmed.values.reshape(-1, 1) * 1.0)

last_day = cbr_germany.df.index[-1]
ldr = pd.date_range(pd.to_datetime('2020-03-09'),pd.to_datetime(last_day))
x = np.arange(len(ldr) * 1.0)

y = regressor.intercept_[0] + regressor.coef_[0,0]*x
cbr_germany_new_confirmed_lr_df = pd.DataFrame(y.reshape(-1,1),index=ldr, columns=['nc_lr'])

ldf = pd.concat([cbr_germany.df, cbr_germany_new_confirmed_lr_df], axis=1)
ldf.loc[:] = ldf.values * 1.0
ldf = ldf.loc[last_day + datetime.timedelta(days=-20):,['new_confirmed', 'nc_lr']]
fig = plt.figure(figsize=(32, 8), dpi=80, facecolor='w', edgecolor='k')
ax  = plt.subplot(1, 1, 1)
ldf.plot(ax=ax)
print(regressor.intercept_[0].round(2), regressor.coef_[0,0].round(2))
135.0 184.0

COVID-19-Pandemie#Deutschland:

Am 22. März 2020 einigten sich Bund und Länder auf ein „umfassendes Kontaktverbot“, statt weitergehende Ausgangssperren zu beschließen.

prediction date predicted day for reaching 100 threshold predicted max growth rate steady state rate
2020-04-02 2020-04-21 92845 0.057 -
2020-04-03 2020-04-23 99774 0.054 -
2020-04-04 2020-04-25 107193 0.051 -
2020-04-05 2020-04-27 114511 0.049 -
2020-04-06 2020-04-29 119618 0.045 -
2020-04-07 2020-04-29 121354 0.039 -
2020-04-08 2020-04-30 123011 0.035 -
2020-04-09 2020-04-30 125833 0.031 -
2020-04-10 - - 0.049 4820
In [18]:
country_name, first_date, init_add, restriction_start_date = 'Germany', pd.to_datetime('2020-03-09'), 0, datetime.datetime(2020, 3, 22)
ldf, lpopt, lpcov, lsqdiff, lgrowthRate, idx, label = covid19.prepare_country_prediction(country_name, first_date=first_date, init_add=init_add)
if len(lpopt) == 4:
    steady_state_rate = lpopt[1] * lpopt[3]
else:
    steady_state_rate = 0.0
print(label, ldf.index[-1], lpopt, lgrowthRate, steady_state_rate)
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
ldf[['confirmed', label + '_fit']].plot(ax=ax, marker=mpl.path.Path.unit_circle(), markersize=5);
ax.axvline(restriction_start_date);
sigmoid+linear 2020-05-18 00:00:00 [32756.31569272     0.24542165    -3.72501745 19638.61483769] 0.04482483253939229 4819.741164710418
In [19]:
cbr_germany.plot_daily_stacked()
In [20]:
cbr_austria = covid19.CasesByRegion('Austria')
cbr_austria.tail()
Out[20]:
confirmed recovered death new_confirmed new_recovered new_death
2020-04-05 12227 2998 204 181 0 0
2020-04-06 12494 3463 220 267 465 16
2020-04-07 12812 4046 243 318 583 23
2020-04-08 13025 4512 273 213 466 30
2020-04-09 13248 5240 295 223 728 22

COVID-19-Pandemie#%C3%96sterreich

Nachdem Mitte März zuerst einzelne Gemeinden unter Quarantäne gestellt wurden, verlautbarte Bundeskanzler Sebastian Kurz am 15. März eine landesweite Ausgangsbeschränkung, verbunden mit einer Einschränkung der Versammlungsfreiheit auf bis zu fünf Personen.

2020-03-15

In [21]:
cbr_austria.plot_daily_stats()
Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f20719905f8>
prediction date predicted day for reaching 100 threshold predicted max growth rate
2020-04-02 2020-04-12 13859 0.042
2020-04-03 2020-04-12 13864 0.036
2020-04-04 2020-04-12 14082 0.030
2020-04-05 2020-04-12 13978 0.025
2020-04-06 2020-04-11 13947 0.020
2020-04-07 2020-04-12 14033 0.017
2020-04-08 2020-04-12 14121 0.014
2020-04-09 2020-04-12 14229 0.012
2020-04-10 2020-04-12 14350 0.010
In [22]:
# country_name, first_date, init_add, restriction_start_date = 'Austria', pd.to_datetime('2020-02-19'), 600, datetime.datetime(2020, 3, 15)
country_name, first_date, init_add, restriction_start_date = 'Austria', pd.to_datetime('2020-03-12'), 600, datetime.datetime(2020, 3, 15)
ldf, lpopt, lpcov, lsqdiff, lgrowthRate, idx, label = covid19.prepare_country_prediction(country_name, first_date=first_date, init_add=init_add)
if len(lpopt) == 4:
    steady_state_rate = lpopt[1] * lpopt[3]
else:
    steady_state_rate = 0.0
print(label, ldf.index[-1], lpopt, lgrowthRate, steady_state_rate)
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
ldf[['confirmed', label + '_fit']].plot(ax=ax, marker=mpl.path.Path.unit_circle(), markersize=5);
ax.axvline(restriction_start_date);
sigmoid 2020-04-12 00:00:00 [14350.32961458     0.20569832    -2.90468878] 0.010223236574044892 0.0
In [23]:
cbr_austria.plot_daily_stacked()

COVID-19-Pandemie#Dänemark

Dänemark hat besonders früh mit strikten Maßnahmen auf die Corona-Krise reagiert, unter anderem sind seit dem 14. März die Grenzen für Ausländer ohne triftigen Einreisegrund geschlossen.

2020-03-14

In [24]:
country_name, first_date, init_add, restriction_start_date = 'Denmark', pd.to_datetime('2020-03-09'), 0, datetime.datetime(2020, 3, 14)
ldf, lpopt, lpcov, lsqdiff, lgrowthRate, idx, label = covid19.prepare_country_prediction(country_name, first_date=first_date, init_add=init_add)
if len(lpopt) == 4:
    steady_state_rate = lpopt[1] * lpopt[3]
else:
    steady_state_rate = 0.0
print(label, ldf.index[-1], lpopt, lgrowthRate, steady_state_rate)
fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1,1,1)
ldf[['confirmed', label + '_fit']].plot(ax=ax, marker=mpl.path.Path.unit_circle(), markersize=5);
ax.axvline(restriction_start_date);
sigmoid 2020-05-18 00:00:00 [20208.31681597     0.09299719    -3.85127036] 0.06879214790773136 0.0

There are several data issues since 2020-03-12/2020-03-13 in the input data as pointed out here: Italy has the same number of cases today as they did yesterday.

Also trying to get the raw numbers that Coronavirus-Monitor is using for Germany from @datentaeterin. These numbers seem to be more up-to-date than all the other data-sources.

In [25]:
fname = 'https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv'
alternative_italy_data = pd.read_csv(fname)
dates = pd.to_datetime(alternative_italy_data['data']).dt.date
alternative_italy_data = alternative_italy_data.rename(columns={"totale_casi": "confirmed", "deceduti": "death", "dimessi_guariti": "recovered"})
alternative_italy_data = alternative_italy_data[['confirmed', 'recovered', 'death']].copy()
for property in ['confirmed', 'recovered', 'death']:
    diff = alternative_italy_data[property].values[1:] - alternative_italy_data[property].values[:-1]
    alternative_italy_data['new_' + property] = np.concatenate([np.array([0]), diff])  
alternative_italy_data.index = dates
alternative_italy_data.tail()
Out[25]:
confirmed recovered death new_confirmed new_recovered new_death
data
2020-04-05 128948 21815 15887 4316 819 525
2020-04-06 132547 22837 16523 3599 1022 636
2020-04-07 135586 24392 17127 3039 1555 604
2020-04-08 139422 26491 17669 3836 2099 542
2020-04-09 143626 28470 18279 4204 1979 610
In [26]:
cbr_italy = covid19.CasesByRegion('Italy', df=alternative_italy_data)
cbr_italy.tail()
Out[26]:
confirmed recovered death new_confirmed new_recovered new_death
data
2020-04-05 128948 21815 15887 4316 819 525
2020-04-06 132547 22837 16523 3599 1022 636
2020-04-07 135586 24392 17127 3039 1555 604
2020-04-08 139422 26491 17669 3836 2099 542
2020-04-09 143626 28470 18279 4204 1979 610
In [27]:
cbr_italy.plot_daily_stats()
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f20716ff278>
In [28]:
cbr_italy.plot_daily_stacked()
In [29]:
cbr_spain = covid19.CasesByRegion('Spain')
cbr_spain.tail()
Out[29]:
confirmed recovered death new_confirmed new_recovered new_death
2020-04-05 131646 38080 12641 5478 3861 694
2020-04-06 136675 40437 13341 5029 2357 700
2020-04-07 141942 43208 14045 5267 2771 704
2020-04-08 148220 48021 14792 6278 4813 747
2020-04-09 153222 52165 15447 5002 4144 655
In [30]:
cbr_spain.plot_daily_stats()
Out[30]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f206776c828>
In [31]:
cbr_spain.plot_daily_stacked()
In [32]:
cbr_france = covid19.CasesByRegion('France')
cbr_france.tail()
Out[32]:
confirmed recovered death new_confirmed new_recovered new_death
2020-04-05 93773 16349 8093 2925 777 519
2020-04-06 98963 17428 8926 5190 1079 833
2020-04-07 110065 19523 10343 11102 2095 1417
2020-04-08 113959 21452 10887 3894 1929 544
2020-04-09 118781 23413 12228 4822 1961 1341
In [33]:
cbr_france.plot_daily_stats()
Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f20674ccac8>
In [34]:
cbr_france.plot_daily_stacked()