#!/usr/bin/env python # coding: utf-8 # # Loading libraries and data # In[124]: import pandas as pd import altair as alt from github import Github import configparser # In[176]: config = configparser.ConfigParser() config.read('secrets.ini'); # In[177]: g = Github(config['corona']['api_key']) # In[209]: START_CASES_INFECTION = 100 START_CASES_DEATH = 10 # In[210]: URL = "https://covid.ourworldindata.org/data/ecdc/full_data.csv" # In[211]: df = pd.read_csv(URL) # In[212]: df.date = pd.to_datetime(df.date) # In[213]: df = df.sort_values(['location','date'], ascending=True) # In[214]: df.shape[0] # In[215]: df.head() # In[216]: df.info() # # Date of data # In[217]: df.date.max() # # Creating data for graphs # In[218]: df = df.drop(columns=['new_cases', 'new_deaths']) # In[219]: df_infection = df[df.total_cases >= START_CASES_INFECTION].copy() df_death = df[df.total_deaths >= START_CASES_DEATH].copy() # In[220]: df_infection.head() # In[221]: df_infection[df_infection.location == 'Austria'].tail(3) # In[222]: df_death.head() # In[223]: df_infection['total_cases_normalized'] = df_infection.groupby( "location")[['total_cases']].transform(lambda x: x / x.min() * START_CASES_INFECTION) df_infection.total_cases_normalized = df_infection.total_cases_normalized.astype("int") df_death['total_deaths_normalized'] = df_death.groupby( "location")[['total_deaths']].transform(lambda x: x / x.min() * START_CASES_DEATH) df_death.total_deaths_normalized = df_death.total_deaths_normalized.astype("int") # In[224]: df_infection["days_after"] = df_infection.groupby("location").cumcount() df_death["days_after"] = df_death.groupby("location").cumcount() # In[225]: df_infection.head() # In[226]: df_death.head() # In[227]: df_summary = df[df.date == df.date.max()].drop(columns='date') df_summary['cases'] = df_summary.total_cases df_summary = df_summary.drop(['total_cases', 'total_deaths'], axis=1) # In[228]: df_summary.head() # In[229]: df_infection.location.unique() # In[230]: df_death.location.unique() # In[231]: COUNTRIES = set(df[(df.date == df.date.max())].nlargest(columns='total_cases', n=34).sort_values('location').location) COUNTRIES.remove('World') # # Altair # ## Storing data locally # In[232]: url_infection = 'data_infection.json' url_death = 'data_death.json' url_summary = 'data_summary.json' df_infection[df_infection.location.isin(COUNTRIES)].to_json(url_infection, orient='records') df_death[df_death.location.isin(COUNTRIES)].to_json(url_death, orient='records') df_summary[df_summary.location.isin(COUNTRIES)].to_json(url_summary, orient='records') # ## Uploading data to github # In[202]: repo = g.get_repo("Datenspieler/notebooks_for_blog") # In[203]: FILELIST = ['data_infection.json', 'data_death.json', 'data_summary.json'] # In[236]: for file_for_upload in FILELIST: print('Uploading', file_for_upload, end=' - ') with open(file_for_upload) as f: data = f.read() contents = repo.get_contents("2020-corona/" + file_for_upload) repo.update_file(contents.path, "Update data as of " + df.date.max().strftime('%Y-%m-%d'), data, contents.sha, branch="master") contents = repo.get_contents("2020-corona/" + file_for_upload) print(contents.last_modified) # ## Preparing link to data in github # In[60]: # In future version files should be pushed to git automatically, this time I uploaded them BASEURL = 'https://raw.githubusercontent.com/Datenspieler/notebooks_for_blog/master/2020-corona/' url_infection = BASEURL + url_infection url_death = BASEURL + url_death url_summary = BASEURL + url_summary # ## Alternatively local version # In[233]: url_infection = df_infection[df_infection.location.isin(COUNTRIES)] url_death = df_death[df_death.location.isin(COUNTRIES)] url_summary = df_summary[df_summary.location.isin(COUNTRIES)] # ## Plotting data # In[235]: highlight = alt.selection(type='single', on='mouseover', fields=['location'], nearest=True) base_infection = alt.Chart(url_infection).encode( alt.X('days_after', type='quantitative', scale=alt.Scale(domain=[0,30], type='ordinal'), title='Days since the 100th confirmed infection'), alt.Y('total_cases_normalized', type='quantitative', scale=alt.Scale(type='log', base=10), title='Total confirmed infections of COVID-19, normalized'), alt.Color('location:N', title="Country", legend=None), alt.Tooltip(['location:N', 'total_cases:Q', 'date:T']) ).properties( title='Development of Corona infections', ) base_death = alt.Chart(url_death).encode( alt.X('days_after', type='quantitative', scale=alt.Scale(domain=[0,30], type='ordinal'), title='Days since the 10th confirmed death'), alt.Y('total_deaths_normalized', type='quantitative', scale=alt.Scale(type='log', base=10), title='Total confirmed deaths of COVID-19, normalized'), alt.Color('location:N', title="Country", legend=None), alt.Tooltip(['location:N', 'total_deaths:Q', 'date:T']) ).properties( title='Development of Corona deaths', ) base_summary = alt.Chart(url_summary).mark_bar().encode( x = alt.X('cases', type='quantitative', title='Confirmed cases', scale=alt.Scale(type='linear')), y = alt.Y('location:N', title='Country'), color = alt.condition(highlight, alt.Color('location:N', title="Country", legend=None), alt.ColorValue("grey")), tooltip = alt.Tooltip(['location:N', 'cases:Q']) ).add_selection( highlight ).properties( width=200, title='Corona cases by country' ) points_infection = base_infection.mark_circle().encode( opacity=alt.value(0) ).add_selection( highlight ).properties( width=600 ).interactive( bind_y = False ) points_death = base_death.mark_circle().encode( opacity=alt.value(0) ).add_selection( highlight ).properties( width=600 ).interactive( bind_y = False ) lines_infection = base_infection.mark_line().encode( size=alt.condition(~highlight, alt.value(1), alt.value(5), legend=None) ) lines_death = base_death.mark_line().encode( size=alt.condition(~highlight, alt.value(1), alt.value(5), legend=None) ) chart = alt.vconcat(points_infection + lines_infection, points_death + lines_death) chart = alt.hconcat(base_summary, chart) #chart.save('corona.html') chart.save('corona.json') chart # In[ ]: