#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('load_ext', 'watermark') get_ipython().run_line_magic('watermark', "-a 'cs224' -u -d -v -p numpy,xarray,scipy,pandas,sklearn,matplotlib,seaborn,pymc3") # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np, scipy, scipy.stats as stats, scipy.special, scipy.misc, pandas as pd, matplotlib.pyplot as plt, seaborn as sns, xarray as xr import matplotlib as mpl import pymc3 as pm import theano as thno import theano.tensor as T import sklearn, sklearn.linear_model import datetime, time, math from dateutil import relativedelta from collections import OrderedDict SEED = 42 np.random.seed(SEED) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) # pd.set_option('display.float_format', lambda x: '%.2f' % x) np.set_printoptions(edgeitems=10) np.set_printoptions(linewidth=1000) np.set_printoptions(suppress=True) np.core.arrayprint._line_width = 180 sns.set() # sns.set_style("whitegrid") # In[3]: from IPython.display import display, HTML from IPython.display import display_html def display_side_by_side(*args): html_str='' for df in args: if type(df) == np.ndarray: df = pd.DataFrame(df) html_str+=df.to_html() html_str = html_str.replace('table','table style="display:inline"') # print(html_str) display_html(html_str,raw=True) CSS = """ .output { flex-direction: row; } """ def display_graphs_side_by_side(*args): html_str='' for g in args: html_str += '' html_str += '
' html_str += g._repr_svg_() html_str += '
' display_html(html_str,raw=True) display(HTML("")) # In[4]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '1') get_ipython().run_line_magic('aimport', 'covid19') # * [Corona-Hub von www.npgeo.de](https://npgeo-corona-npgeo-de.hub.arcgis.com/) # * [RKI COVID19](https://npgeo-corona-npgeo-de.hub.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0) # * [CSV](https://opendata.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0.csv) # * [Robert Koch-Institut: COVID-19-Dashboard](https://npgeo-corona-npgeo-de.hub.arcgis.com/app/478220a4c454480e823b17327b2bf1d4) # * [Priesemann-Group/covid19_inference_forecast](https://github.com/Priesemann-Group/covid19_inference_forecast/commit/4d34d342cd8e58c18b5107ce38c537d4e83bc561) # * [data_retrieval.py](https://github.com/Priesemann-Group/covid19_inference_forecast/blob/master/covid19_inference/data_retrieval.py) # * [data source description](https://www.arcgis.com/home/item.html?id=f10774f1c63e40168479a1feb6c7ca74) # * [DRP Austria Covid-19 Hub](https://covid-19-drp-austria.hub.arcgis.com/) # * [Dashboard](https://experience.arcgis.com/experience/fb603473e1f74f0bbae48155ff238565) # * [Daten](https://covid-19-drp-austria.hub.arcgis.com/search?categories=covid-19) # * [COVID19 VERLAUF BUNDESLAND](https://covid-19-drp-austria.hub.arcgis.com/datasets/covid19-verlauf-bundesland?orderBy=genesene&orderByAsc=false) # * [data.gv.at](https://www.data.gv.at/covid-19/) # In[5]: df = covid19.get_rki_df() cbr_germany = covid19.CasesByRegion('Germany', df=df) cbr_germany.tail() # In[6]: cbr_germany.plot_daily_stats() # In[7]: cbr_germany.fit(first_date=pd.to_datetime('2020-03-09')) fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_germany.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22)) # In[8]: # country_name, first_date, init_add, restriction_start_date = 'Germany', pd.to_datetime('2020-03-09'), 0, datetime.datetime(2020, 3, 22) # ldf, lpopt, lpcov, lsqdiff, lgrowthRate, idx, label = covid19.prepare_country_prediction(country_name, in_df=cbr_germany.df, first_date=first_date, init_add=init_add) # if len(lpopt) == 4: # steady_state_rate = lpopt[1] * lpopt[3] # else: # steady_state_rate = 0.0 # print(label, ldf.index[-1], lpopt, lgrowthRate, steady_state_rate) # fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') # ax = plt.subplot(1,1,1) # ldf[['confirmed', label + '_fit']].iloc[1:,:].plot(ax=ax, marker=mpl.path.Path.unit_circle(), markersize=5); # ax.axvline(restriction_start_date); # ax2 = ax.twinx() # ldf[[label + '_fit_diff']].iloc[1:,:].plot(ax=ax2, color=['steelblue']); # lbl = 'confirmed' + '_diff' # ldf[[lbl]].iloc[1:,:].reset_index().plot.scatter(ax=ax2, x = 'index', y = lbl, c='limegreen') # l = len(ax.get_yticks()) # a1 = ax.get_yticks()[0] # e1 = ax.get_yticks()[-1] # a2 = ax2.get_yticks()[0] # e2 = ax2.get_yticks()[-1] # ax.set_yticks(np.linspace(a1, e1, l)); # ax2.set_yticks(np.linspace(a2, e2, l)); # In[9]: cbr_germany.fit_df0[['fit_diff']].apply(['max']) # In[10]: cbr_germany.calculate_R_estimates() cbr_germany.R().round(3) # In[11]: fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_germany.plot_R(ax=ax) # , plot_start_date='2020-03-10' # In[12]: # -------------------------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------------------------- # In[13]: df = covid19.get_rki_df(state='Bayern') cbr_bavaria = covid19.CasesByRegion('Bavaria', df=df) cbr_bavaria.tail() # In[14]: einwohner_deutschland = 83019213.0 einwohner_bayern = 13076721.0 prozent_bayern = einwohner_bayern / einwohner_deutschland bavaria_new_confirmed_threshold = (100.0 * prozent_bayern) // 1 + 1 bavaria_new_confirmed_threshold # In[15]: cbr_bavaria.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=bavaria_new_confirmed_threshold) fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_bavaria.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22)) # In[16]: cbr_bavaria.calculate_R_estimates() cbr_bavaria.R().round(3) # In[17]: fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_bavaria.plot_R(ax=ax) # , plot_start_date='2020-03-10' # In[18]: df = covid19.get_rki_df(state='Nordrhein-Westfalen') cbr_nrw = covid19.CasesByRegion('NRW', df=df) cbr_nrw.tail() # In[19]: einwohner_nrw = 17932651.0 prozent_nrw = einwohner_nrw / einwohner_deutschland nrw_new_confirmed_threshold = (100.0 * prozent_nrw) // 1 + 1 nrw_new_confirmed_threshold # In[20]: cbr_nrw.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=nrw_new_confirmed_threshold) fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_nrw.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22)) # In[21]: cbr_nrw.calculate_R_estimates() cbr_nrw.R().round(3) # In[22]: fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_nrw.plot_R(ax=ax) # , plot_start_date='2020-03-10' # In[23]: covid19.rki_data_df.Bundesland.unique() # In[24]: df = covid19.get_rki_df(state='Baden-W') cbr_bw = covid19.CasesByRegion('BW', df=df) cbr_bw.tail() # In[25]: einwohner_bw = 11069533.0 prozent_bw = einwohner_bw / einwohner_deutschland bw_new_confirmed_threshold = (100.0 * prozent_bw) // 1 + 1 bw_new_confirmed_threshold # In[26]: cbr_bw.fit(first_date=pd.to_datetime('2020-03-09'), new_confirmed_threshold=bw_new_confirmed_threshold) fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_bw.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22)) # In[27]: cbr_bw.calculate_R_estimates() cbr_bw.R().round(3) # In[28]: fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_bw.plot_R(ax=ax) # , plot_start_date='2020-03-10' # In[29]: df = covid19.get_rki_df(county='LK Traunstein') cbr_traunstein = covid19.CasesByRegion('LK Traunstein', df=df) cbr_traunstein.tail() # In[30]: tage_inzidenz = 10 # In[31]: # https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/04-kreise.html einwohner_lk_traunstein = 177089 einwohner_sk_regensburg = 152610 einwohner_lk_regensburg = 193572 # In[32]: tage_inzidenz_lk_traunstein = cbr_traunstein.df['new_confirmed'][-tage_inzidenz:].sum() round(tage_inzidenz_lk_traunstein,1), round(tage_inzidenz_lk_traunstein / einwohner_lk_traunstein / tage_inzidenz * 100000,1), round(tage_inzidenz_lk_traunstein / einwohner_lk_traunstein / tage_inzidenz * 100000 * 7,1) # In[33]: cbr_traunstein.plot_daily_stats() # In[34]: cbr_traunstein.fit(first_date=pd.to_datetime('2020-03-09')) fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_traunstein.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22)) # In[35]: cbr_traunstein.calculate_R_estimates() cbr_traunstein.R().round(3) # In[36]: fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_traunstein.plot_R(ax=ax) # , plot_start_date='2020-03-10' # In[37]: df = covid19.get_rki_df(county='SK Regensburg') cbr_sk_regensburg = covid19.CasesByRegion('SK Regensburg', df=df) cbr_sk_regensburg.tail() # In[38]: tage_inzidenz_sk_regensburg = cbr_sk_regensburg.df['new_confirmed'][-tage_inzidenz:].sum() round(tage_inzidenz_sk_regensburg,1), round(tage_inzidenz_sk_regensburg / einwohner_sk_regensburg / tage_inzidenz * 100000, 1), round(tage_inzidenz_sk_regensburg / einwohner_sk_regensburg / tage_inzidenz * 100000 * 7, 1) # In[39]: cbr_sk_regensburg.plot_daily_stats() # In[40]: df = covid19.get_rki_df(county='LK Regensburg') cbr_lk_regensburg = covid19.CasesByRegion('LK Regensburg', df=df) cbr_lk_regensburg.tail() # In[41]: tage_inzidenz_lk_regensburg = cbr_lk_regensburg.df['new_confirmed'][-tage_inzidenz:].sum() tage_inzidenz_lk_regensburg, round(tage_inzidenz_lk_regensburg / einwohner_lk_regensburg / tage_inzidenz * 100000, 1), round(tage_inzidenz_lk_regensburg / einwohner_lk_regensburg / tage_inzidenz * 100000 * 7, 1) # In[42]: cbr_lk_regensburg.plot_daily_stats() # In[43]: # -------------------------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------------------------- # In[44]: df = covid19.get_rki_df(time_anchor_column_name='Meldedatum') cbr_germany2 = covid19.CasesByRegion('Germany', df=df) cbr_germany2.tail() # In[45]: cbr_germany2.plot_daily_stats(days=60) # In[46]: cbr_germany2.fit(first_date=pd.to_datetime('2020-03-09')) fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_germany2.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22)) # In[47]: cbr_germany2.fit_df0[['fit_diff']].apply(['max']) # In[48]: cbr_germany2.calculate_R_estimates() cbr_germany2.R().round(3) # In[49]: fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_germany2.plot_R(ax=ax) # , plot_start_date='2020-03-10' # In[50]: cbr_germany3 = covid19.CasesByRegion('Germany') cbr_germany3.tail() # In[51]: cbr_germany3.plot_daily_stats(days=60) # In[52]: cbr_germany3.fit(first_date=pd.to_datetime('2020-03-09')) fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_germany3.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22)) # In[53]: cbr_germany3.calculate_R_estimates() cbr_germany3.R().round(3) # In[54]: fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_germany3.plot_R(ax=ax) # , plot_start_date='2020-03-10' # In[55]: import requests # In[56]: # https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Projekte_RKI/Nowcasting.html # rki_nowcasting_data_url = 'https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Projekte_RKI/Nowcasting_Zahlen.xlsx' rki_nowcasting_data_url = 'https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Projekte_RKI/Nowcasting_Zahlen.xlsx?__blob=publicationFile' r = requests.get(rki_nowcasting_data_url, allow_redirects=True) # to get content after redirection # r.url, r.content xd = pd.ExcelFile(r.content) rki_nowcasting_df_ = xd.parse(xd.sheet_names[-1]) rki_nowcasting_df_.to_excel("rki_nowcasting_data.xlsx") rki_nowcasting_df_.tail() # In[57]: rki_nowcasting_df = rki_nowcasting_df_[['Datum des Erkrankungsbeginns', 'Punktschätzer der Anzahl Neuerkrankungen (ohne Glättung)', 'Punktschätzer der Anzahl Neuerkrankungen']].copy() rki_nowcasting_df.columns = ['index', 'new_confirmed', 'new_confirmed_'] rki_nowcasting_df['index'] = pd.to_datetime(rki_nowcasting_df['index']) rki_nowcasting_df = rki_nowcasting_df.set_index('index') last_date = rki_nowcasting_df.index[-1] + pd.DateOffset(days=1) last_value = cbr_germany.df['confirmed'].loc[last_date] # last_date, last_value ldf = pd.DataFrame(index=rki_nowcasting_df.index) ldf['confirmed'] = last_value - rki_nowcasting_df['new_confirmed'].values[::-1].cumsum()[::-1] ldf.loc[last_date] = [last_value] ldf['recovered'] = 0 ldf['death'] = 0 ldf['new_confirmed'] = covid19.discrete_diff(ldf['confirmed']) ldf['new_recovered'] = covid19.discrete_diff(ldf['recovered']) ldf['new_death'] = covid19.discrete_diff(ldf['death']) rki_nowcasting_df = ldf.copy() cbr_germany4 = covid19.CasesByRegion('Germany', df=rki_nowcasting_df) cbr_germany4.tail() # In[58]: cbr_germany4.plot_daily_stats(days=60) # In[59]: cbr_germany4.fit(first_date=pd.to_datetime('2020-03-09')) fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_germany4.plot_with_fits(ax=ax, restriction_start_date=datetime.datetime(2020, 3, 22)) # In[60]: cbr_germany4.calculate_R_estimates() cbr_germany4.R().round(3) # In[61]: fig = plt.figure(figsize=(32,8), dpi=80, facecolor='w', edgecolor='k') ax = plt.subplot(1,1,1) cbr_germany4.plot_R(ax=ax) # , plot_start_date='2020-03-10' # In[62]: ldf = cbr_germany.df.loc[cbr_germany4.df.index, ['new_confirmed']] ldf['new_confirmed_nc'] = cbr_germany4.df['new_confirmed'] ldf['delta'] = ldf['new_confirmed_nc'] - ldf['new_confirmed'] ldf = ldf.astype(np.int) ldf.iloc[-30:] # In[ ]: