import pandas as pd
import altair as alt
from github import Github
import configparser
config = configparser.ConfigParser()
config.read('secrets.ini');
g = Github(config['corona']['api_key'])
START_CASES_INFECTION = 100
START_CASES_DEATH = 10
URL = "https://covid.ourworldindata.org/data/ecdc/full_data.csv"
df = pd.read_csv(URL)
df.date = pd.to_datetime(df.date)
df = df.sort_values(['location','date'], ascending=True)
df.shape[0]
df.head()
df.info()
df.date.max()
df = df.drop(columns=['new_cases', 'new_deaths'])
df_infection = df[df.total_cases >= START_CASES_INFECTION].copy()
df_death = df[df.total_deaths >= START_CASES_DEATH].copy()
df_infection.head()
df_infection[df_infection.location == 'Austria'].tail(3)
df_death.head()
df_infection['total_cases_normalized'] = df_infection.groupby(
"location")[['total_cases']].transform(lambda x: x / x.min() * START_CASES_INFECTION)
df_infection.total_cases_normalized = df_infection.total_cases_normalized.astype("int")
df_death['total_deaths_normalized'] = df_death.groupby(
"location")[['total_deaths']].transform(lambda x: x / x.min() * START_CASES_DEATH)
df_death.total_deaths_normalized = df_death.total_deaths_normalized.astype("int")
df_infection["days_after"] = df_infection.groupby("location").cumcount()
df_death["days_after"] = df_death.groupby("location").cumcount()
df_infection.head()
df_death.head()
df_summary = df[df.date == df.date.max()].drop(columns='date')
df_summary['cases'] = df_summary.total_cases
df_summary = df_summary.drop(['total_cases', 'total_deaths'], axis=1)
df_summary.head()
df_infection.location.unique()
df_death.location.unique()
COUNTRIES = set(df[(df.date == df.date.max())].nlargest(columns='total_cases', n=34).sort_values('location').location)
COUNTRIES.remove('World')
url_infection = 'data_infection.json'
url_death = 'data_death.json'
url_summary = 'data_summary.json'
df_infection[df_infection.location.isin(COUNTRIES)].to_json(url_infection, orient='records')
df_death[df_death.location.isin(COUNTRIES)].to_json(url_death, orient='records')
df_summary[df_summary.location.isin(COUNTRIES)].to_json(url_summary, orient='records')
repo = g.get_repo("Datenspieler/notebooks_for_blog")
FILELIST = ['data_infection.json', 'data_death.json', 'data_summary.json']
for file_for_upload in FILELIST:
print('Uploading', file_for_upload, end=' - ')
with open(file_for_upload) as f:
data = f.read()
contents = repo.get_contents("2020-corona/" + file_for_upload)
repo.update_file(contents.path, "Update data as of " + df.date.max().strftime('%Y-%m-%d'), data,
contents.sha, branch="master")
contents = repo.get_contents("2020-corona/" + file_for_upload)
print(contents.last_modified)
# In future version files should be pushed to git automatically, this time I uploaded them
BASEURL = 'https://raw.githubusercontent.com/Datenspieler/notebooks_for_blog/master/2020-corona/'
url_infection = BASEURL + url_infection
url_death = BASEURL + url_death
url_summary = BASEURL + url_summary
url_infection = df_infection[df_infection.location.isin(COUNTRIES)]
url_death = df_death[df_death.location.isin(COUNTRIES)]
url_summary = df_summary[df_summary.location.isin(COUNTRIES)]
highlight = alt.selection(type='single', on='mouseover',
fields=['location'], nearest=True)
base_infection = alt.Chart(url_infection).encode(
alt.X('days_after', type='quantitative', scale=alt.Scale(domain=[0,30], type='ordinal'),
title='Days since the 100th confirmed infection'),
alt.Y('total_cases_normalized', type='quantitative', scale=alt.Scale(type='log', base=10),
title='Total confirmed infections of COVID-19, normalized'),
alt.Color('location:N', title="Country", legend=None),
alt.Tooltip(['location:N', 'total_cases:Q', 'date:T'])
).properties(
title='Development of Corona infections',
)
base_death = alt.Chart(url_death).encode(
alt.X('days_after', type='quantitative', scale=alt.Scale(domain=[0,30], type='ordinal'),
title='Days since the 10th confirmed death'),
alt.Y('total_deaths_normalized', type='quantitative', scale=alt.Scale(type='log', base=10),
title='Total confirmed deaths of COVID-19, normalized'),
alt.Color('location:N', title="Country", legend=None),
alt.Tooltip(['location:N', 'total_deaths:Q', 'date:T'])
).properties(
title='Development of Corona deaths',
)
base_summary = alt.Chart(url_summary).mark_bar().encode(
x = alt.X('cases', type='quantitative', title='Confirmed cases', scale=alt.Scale(type='linear')),
y = alt.Y('location:N', title='Country'),
color = alt.condition(highlight, alt.Color('location:N', title="Country", legend=None), alt.ColorValue("grey")),
tooltip = alt.Tooltip(['location:N', 'cases:Q'])
).add_selection(
highlight
).properties(
width=200,
title='Corona cases by country'
)
points_infection = base_infection.mark_circle().encode(
opacity=alt.value(0)
).add_selection(
highlight
).properties(
width=600
).interactive(
bind_y = False
)
points_death = base_death.mark_circle().encode(
opacity=alt.value(0)
).add_selection(
highlight
).properties(
width=600
).interactive(
bind_y = False
)
lines_infection = base_infection.mark_line().encode(
size=alt.condition(~highlight, alt.value(1), alt.value(5), legend=None)
)
lines_death = base_death.mark_line().encode(
size=alt.condition(~highlight, alt.value(1), alt.value(5), legend=None)
)
chart = alt.vconcat(points_infection + lines_infection, points_death + lines_death)
chart = alt.hconcat(base_summary, chart)
#chart.save('corona.html')
chart.save('corona.json')
chart