Cross-Country Epidemic Viz¶

In [1]:

library(LalRUtils)
libreq(data.table, tidyverse, janitor, anytime, lubridate, plotly, zoo, patchwork, ggrepel)
theme_set(lal_plot_theme())
options(repr.plot.width = 20, repr.plot.height=12)

      wants        loaded
 [1,] "data.table" TRUE  
 [2,] "tidyverse"  TRUE  
 [3,] "janitor"    TRUE  
 [4,] "anytime"    TRUE  
 [5,] "lubridate"  TRUE  
 [6,] "plotly"     TRUE  
 [7,] "zoo"        TRUE  
 [8,] "patchwork"  TRUE  
 [9,] "ggrepel"    TRUE

In [2]:

options(ggplot2.discrete.fill = RColorBrewer::brewer.pal(9, "Set1"))
options(ggplot2.discrete.colour = RColorBrewer::brewer.pal(9, "Set1"))
options(ggplot2.continuous.fill = "viridis"); options(ggplot2.continuous.colour = "viridis")
chr = function(...) as.character(...) %>% display_html()

JHU¶

In [3]:

jhu_cases_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
cases = fread(jhu_cases_url)

jhu_deaths_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
deaths = fread(jhu_deaths_url)

In [4]:

name_clean = function(df) setnames(df, str_replace(colnames(df), "/", "_"))

In [5]:

cases_long = melt(cases, id.vars = c("Province/State", "Country/Region", "Lat", "Long"), 
    variable.name = "date", value.name = "cases") 
name_clean(cases_long)

deaths_long = melt(deaths, id.vars = c("Province/State", "Country/Region", "Lat", "Long"), 
    variable.name = "date", value.name = "deaths") 
name_clean(deaths_long)
deaths_long[, c('Lat', 'Long') := NULL]

In [6]:

df = merge(cases_long, deaths_long,
      by = c("Province_State", "Country_Region", "date"), all.x = T)
df[, date := mdy(date)]
df[, day := weekdays(date)]
df[is.na(df)] = 0
df %>% glimpse

Rows: 188,160
Columns: 8
$ Province_State <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "",…
$ Country_Region <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanist…
$ date           <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-0…
$ Lat            <dbl> 33.94, 33.94, 33.94, 33.94, 33.94, 33.94, 33.94, 33.94,…
$ Long           <dbl> 67.71, 67.71, 67.71, 67.71, 67.71, 67.71, 67.71, 67.71,…
$ cases          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ deaths         <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ day            <chr> "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"…

In [7]:

fwrite(df, "data/jhu_panel.csv.gz")

Country Level¶

In [8]:

country_ts = df[, lapply(.SD, sum) , by = .(Country_Region, date), .SDcols = c("cases", "deaths")]

(t10countries = country_ts[date == max(country_ts$date)][order(-cases)][1:10])

A data.table: 10 × 4
Country_Region	date	cases	deaths
<chr>	<date>	<int>	<int>
US	2021-11-23	47980780	773770
India	2021-11-23	34535763	466584
Brazil	2021-11-23	22030182	613066
United Kingdom	2021-11-23	9985879	144579
Russia	2021-11-23	9238330	261526
Turkey	2021-11-23	8626550	75443
France	2021-11-23	7553513	119605
Iran	2021-11-23	6088009	129177
Germany	2021-11-23	5516623	99773
Argentina	2021-11-23	5317633	116415

In [9]:

t10_subset = country_ts[Country_Region %in% t10countries$Country_Region]
setorder(t10_subset, Country_Region, date)
# normalise time from first day with >= 10 cases
t10_subset[cases >= 10,  c_time := seq_len(.N), by = .(Country_Region)]
t10_subset[deaths >= 1, d_time := seq_len(.N), by = .(Country_Region)]

In [10]:

setorder(t10_subset, Country_Region, date)
t10_subset[, new_cases  := cases - shift(cases), by = .(Country_Region)]
t10_subset[, new_deaths := deaths - shift(deaths), by = .(Country_Region)]
t10_subset[, cfr := deaths/cases]
smoothvars = c('cases', 'new_cases', 'deaths', 'new_deaths', 'cfr')
t10_subset[, paste0("rm7_", smoothvars) := lapply(.SD, rollmean, k = 7, fill = NA, na.pad = T), 
   by = .(Country_Region), .SDcols = smoothvars]

t10_subset[, label := ifelse(date == max(date), Country_Region, NA)]

Cumulative and New Cases / Deaths¶

In [11]:

options(warn=-1)

In [12]:

p1 = ggplot(t10_subset,
           aes(x = c_time, y = cases, group = Country_Region, colour = Country_Region)) + 
    geom_point(aes(y = cases), size = 0.5) + geom_line(aes(y = rm7_cases)) + 
    scale_y_log10(limits = c(10, NA)) + 
    scale_colour_brewer(palette = "Spectral") +
    theme(legend.position = "None") +
    geom_text_repel(aes(label = label),
                      nudge_x = 1,
                      na.rm = TRUE) +
    ggtitle('Cumulative Case Counts in Worst-Hit Countries') 

p2 = 
    t10_subset %>% 
    ggplot(aes(x = c_time,y = new_cases, group = Country_Region, colour = Country_Region)) + 
    geom_point(aes(y = new_cases), size = 0.5) + 
#     geom_line(aes(y = rm7_new_cases)) + 
    geom_smooth(se = F) +
    scale_y_log10() + 
    scale_colour_brewer(palette = "Spectral") +
    theme(legend.position = "None") +
    geom_text_repel(aes(label = label),
                      nudge_x = 1,
                      na.rm = TRUE) +
    ggtitle('Growth in Cases in Worst-Hit Countries')

(p1 | p2 )

`geom_smooth()` using method = 'loess' and formula 'y ~ x'

In [13]:

p1 = ggplot(t10_subset,
           aes(x = d_time, y = deaths, group = Country_Region, colour = Country_Region)) + 
    geom_point(aes(y = deaths), size = 0.5) + geom_line(aes(y = rm7_deaths)) + 
    scale_y_log10(limits = c(10, NA)) + 
    scale_colour_brewer(palette = "Spectral") +
    theme(legend.position = "None") +
    geom_text_repel(aes(label = label),
                      nudge_x = 1,
                      na.rm = TRUE) +
    ggtitle('Cumulative Death Counts in Worst-Hit Countries') 

p2 =  t10_subset %>% 
    ggplot(aes(x = d_time, y = new_deaths, group = Country_Region, colour = Country_Region)) + 
    geom_point(aes(y = new_deaths), size = 0.5) + 
    geom_smooth(se = F) +
    scale_y_log10() + 
    scale_colour_brewer(palette = "Spectral") +
    theme(legend.position = "None") +
    geom_text_repel(aes(label = label),
                      nudge_x = 1,
                      na.rm = TRUE) +
    ggtitle('Growth in Deaths in Worst-Hit Countries')

(p1 | p2 )

`geom_smooth()` using method = 'loess' and formula 'y ~ x'

Case Fatality Rate¶

In [14]:

p3 =  t10_subset %>% 
    ggplot(aes(x = d_time, y = cfr, group = Country_Region, colour = Country_Region)) + 
    geom_point(size = 0.5) + 
#     geom_line(aes(y = rm7_cfr)) + 
    geom_smooth(se = F) +
    ylim(c(0, 0.3)) +
    scale_colour_brewer(palette = "Spectral") +
    theme(legend.position = "None") +
    geom_text_repel(aes(label = label),
                      na.rm = TRUE) +
    labs(title = 'Case Fatality Rate in Worst-Hit Countries', subtitle = "What is going on in Russia")
p3

`geom_smooth()` using method = 'loess' and formula 'y ~ x'

Shares¶

In [15]:

t10_subset[, denom_cases := sum(rm7_new_cases), by = date][, 
            denom_deaths := sum(rm7_new_deaths), by = date]
t10_subset[, newcase_share := rm7_new_cases / denom_cases][, 
            newdeath_share := rm7_new_deaths /denom_deaths]

In [20]:

p1 = ggplot(t10_subset[date >="2020-02-15"], aes(x = date, y = newcase_share, fill = Country_Region, colour = Country_Region)) +
    geom_area(position="fill") +
    scale_y_continuous(breaks = seq(0, 1, .1))+ ylim(c(0, 1)) +
    scale_fill_brewer(palette = "Spectral") +
    scale_colour_brewer(palette = "Spectral") +
    ggtitle("New Cases") + theme(legend.position = "None")
p2 = ggplot(t10_subset[date >="2020-02-15"], aes(x = date, y = newdeath_share, fill = Country_Region, colour = Country_Region)) +
    geom_area(position="fill") +
    scale_y_continuous(breaks = seq(0, 1, .1))+ ylim(c(0, 1)) +
    scale_fill_brewer(palette = "Spectral") +
    scale_colour_brewer(palette = "Spectral") +
    ggtitle("New Deaths") 

options(repr.plot.width = 20, repr.plot.height = 16)
(p = (p1 / p2 ) + plot_annotation(title = "Tracking the epidemic's hotspots over time", 
                                  subtitle = "each country's share of global 7 day rolling mean in deaths and cases"))

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

In [21]:

ggsave("carpet_plot_xc.png", p, width = 20, height = 16)

OWID¶

In [22]:

owid = fread("https://covid.ourworldindata.org/data/owid-covid-data.csv")
owid[, d := ymd(date)]
owid %>% glimpse

Rows: 135,370
Columns: 68
$ iso_code                                   <chr> "AFG", "AFG", "AFG", "AFG",…
$ continent                                  <chr> "Asia", "Asia", "Asia", "As…
$ location                                   <chr> "Afghanistan", "Afghanistan…
$ date                                       <date> 2020-02-24, 2020-02-25, 20…
$ total_cases                                <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, …
$ new_cases                                  <dbl> 5, 0, 0, 0, 0, 0, 0, 0, 0, …
$ new_cases_smoothed                         <dbl> NA, NA, NA, NA, NA, 0.714, …
$ total_deaths                               <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ new_deaths                                 <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ new_deaths_smoothed                        <dbl> NA, NA, NA, NA, NA, 0, 0, 0…
$ total_cases_per_million                    <dbl> 0.126, 0.126, 0.126, 0.126,…
$ new_cases_per_million                      <dbl> 0.126, 0.000, 0.000, 0.000,…
$ new_cases_smoothed_per_million             <dbl> NA, NA, NA, NA, NA, 0.018, …
$ total_deaths_per_million                   <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ new_deaths_per_million                     <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ new_deaths_smoothed_per_million            <dbl> NA, NA, NA, NA, NA, 0, 0, 0…
$ reproduction_rate                          <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ icu_patients                               <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ icu_patients_per_million                   <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ hosp_patients                              <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ hosp_patients_per_million                  <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ weekly_icu_admissions                      <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ weekly_icu_admissions_per_million          <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ weekly_hosp_admissions                     <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ weekly_hosp_admissions_per_million         <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ new_tests                                  <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ total_tests                                <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ total_tests_per_thousand                   <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ new_tests_per_thousand                     <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ new_tests_smoothed                         <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ new_tests_smoothed_per_thousand            <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ positive_rate                              <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ tests_per_case                             <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ tests_units                                <chr> "", "", "", "", "", "", "",…
$ total_vaccinations                         <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ people_vaccinated                          <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ people_fully_vaccinated                    <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ total_boosters                             <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ new_vaccinations                           <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ new_vaccinations_smoothed                  <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ total_vaccinations_per_hundred             <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ people_vaccinated_per_hundred              <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ people_fully_vaccinated_per_hundred        <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ total_boosters_per_hundred                 <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ new_vaccinations_smoothed_per_million      <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ new_people_vaccinated_smoothed             <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ new_people_vaccinated_smoothed_per_hundred <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ stringency_index                           <dbl> 8.33, 8.33, 8.33, 8.33, 8.3…
$ population                                 <dbl> 39835428, 39835428, 3983542…
$ population_density                         <dbl> 54.42, 54.42, 54.42, 54.42,…
$ median_age                                 <dbl> 18.6, 18.6, 18.6, 18.6, 18.…
$ aged_65_older                              <dbl> 2.581, 2.581, 2.581, 2.581,…
$ aged_70_older                              <dbl> 1.337, 1.337, 1.337, 1.337,…
$ gdp_per_capita                             <dbl> 1804, 1804, 1804, 1804, 180…
$ extreme_poverty                            <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ cardiovasc_death_rate                      <dbl> 597, 597, 597, 597, 597, 59…
$ diabetes_prevalence                        <dbl> 9.59, 9.59, 9.59, 9.59, 9.5…
$ female_smokers                             <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ male_smokers                               <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ handwashing_facilities                     <dbl> 37.75, 37.75, 37.75, 37.75,…
$ hospital_beds_per_thousand                 <dbl> 0.5, 0.5, 0.5, 0.5, 0.5, 0.…
$ life_expectancy                            <dbl> 64.83, 64.83, 64.83, 64.83,…
$ human_development_index                    <dbl> 0.511, 0.511, 0.511, 0.511,…
$ excess_mortality_cumulative_absolute       <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ excess_mortality_cumulative                <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ excess_mortality                           <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ excess_mortality_cumulative_per_million    <dbl> NA, NA, NA, NA, NA, NA, NA,…
$ d                                          <date> 2020-02-24, 2020-02-25, 20…

In [23]:

fwrite(owid, "data/owid_covid.csv.gz")

In [24]:

all_countries_xs = owid[location!="World"][
    order(-d)][
    , .SD[1], by = .(iso_code)]
(t10 = all_countries_xs[order(-total_cases)][1:10])
#  [order(-total_cases)][1:10])

A data.table: 10 × 68
iso_code	continent	location	date	total_cases	new_cases	new_cases_smoothed	total_deaths	new_deaths	new_deaths_smoothed	⋯	male_smokers	handwashing_facilities	hospital_beds_per_thousand	life_expectancy	human_development_index	excess_mortality_cumulative_absolute	excess_mortality_cumulative	excess_mortality	excess_mortality_cumulative_per_million	d
<chr>	<chr>	<chr>	<date>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	⋯	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>	<date>
OWID_HIC		High income	2021-11-23	112261196	390354	388574	1811688	3345	2736.4	⋯	NA	NA	NA	NA	NA	NA	NA	NA	NA	2021-11-23
OWID_UMC		Upper middle income	2021-11-23	81752399	137862	121382	2173124	2842	2890.6	⋯	NA	NA	NA	NA	NA	NA	NA	NA	NA	2021-11-23
OWID_ASI		Asia	2021-11-23	81502829	119415	90992	1208925	1533	1467.3	⋯	NA	NA	NA	NA	NA	NA	NA	NA	NA	2021-11-23
OWID_EUR		Europe	2021-11-23	72118721	345330	343602	1392865	4361	3907.1	⋯	NA	NA	NA	NA	NA	NA	NA	NA	NA	2021-11-23
OWID_LMC		Lower middle income	2021-11-23	63404088	77295	54872	1144154	1868	1665.6	⋯	NA	NA	NA	NA	NA	NA	NA	NA	NA	2021-11-23
OWID_NAM		North America	2021-11-23	57457648	98592	104178	1160501	1509	1411.7	⋯	NA	NA	NA	NA	NA	NA	NA	NA	NA	2021-11-23
USA	North America	United States	2021-11-23	47980780	92609	95778	773770	1426	1137.0	⋯	24.6	NA	2.77	78.86	0.926	NA	NA	NA	NA	2021-11-23
OWID_EUN		European Union	2021-11-23	44884187	240338	233786	833289	2053	1706.9	⋯	NA	NA	NA	NA	NA	NA	NA	NA	NA	2021-11-23
OWID_SAM		South America	2021-11-23	38804721	19241	19024	1178684	439	378.4	⋯	NA	NA	NA	NA	NA	NA	NA	NA	NA	2021-11-23
IND	Asia	India	2021-11-23	34535763	9283	9881	466584	437	347.3	⋯	20.6	59.55	0.53	69.66	0.645	NA	NA	NA	NA	2021-11-23

In [25]:

owid_t10_subset = owid[location %in% t10$location]

vars = c('new_cases', 'new_deaths', 'new_tests')
owid_t10_subset[, paste0('denom_', vars):= lapply(.SD, sum), by = date, .SDcols = vars]

owid_t10_subset[, `:=`(
            newcase_share  = new_cases / denom_new_cases,
            newdeath_share = new_deaths / denom_new_deaths,
            newtest_share  = new_tests / denom_new_tests
    )][ newcase_share < 0, newcase_share := 0]

In [28]:

p1 = ggplot(owid_t10_subset[d >="2020-02-15"], aes(x = d, y = newcase_share, fill = location, colour = location)) +
    geom_area(position="fill") +
    scale_y_continuous(breaks = seq(0, 1, .1))+
    scale_fill_brewer(palette = "Spectral") +
    scale_colour_brewer(palette = "Spectral") +
    ggtitle("New Cases")+ theme(legend.pos = "None")
p2 = ggplot(owid_t10_subset[d >="2020-02-15"], aes(x = d, y = newdeath_share, fill = location, colour = location)) +
    geom_area(position="fill") +
    scale_y_continuous(breaks = seq(0, 1, .1))+
    scale_fill_brewer(palette = "Spectral") +
    scale_colour_brewer(palette = "Spectral") + ylim(c(0, 1)) +
    ggtitle("New Deaths") 
options(repr.plot.width = 20, repr.plot.height = 12)
(p1 / p2 ) + plot_annotation(title = "Shares of Cases and Deaths over time")

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Country Profiles¶

In [29]:

country_plot = function(country){
    npl = owid[location == country][order(-date)]
    p1 =  npl[d>="2020-03-15"] %>% 
        ggplot(aes(x = d, y = total_cases_per_million)) + scale_y_log10() + 
        geom_point(size = 0.5) +  geom_smooth(se = F) +
        ylim(c(0, NA)) + 
        ggtitle('Total Cases per Million') 
    
    p2 =  npl[d>="2020-03-15"] %>% 
        ggplot(aes(x = d, y = new_cases_per_million)) +
        geom_point(size = 0.5) +  geom_smooth(se = F) +
         ylim(c(0, NA)) + 
        ggtitle('New Cases per Million')
    
    p3 =  npl[d>="2020-03-15"] %>% 
        ggplot(aes(x = d, y = total_deaths_per_million)) +
        ylim(c(0, NA)) + 
        geom_point(size = 0.5) +  geom_smooth(se = F) +
        ggtitle('Total Deaths per Million')
    
    p4 =  npl[d>="2020-03-15"] %>% 
        ggplot(aes(x = d, y = new_deaths_per_million)) +
        geom_point(size = 0.5) +  geom_smooth(se = F) +
         ylim(c(0, NA)) + 
        ggtitle('New Deaths per Million')

    p5 =  npl[d>="2020-03-15"] %>% 
        ggplot(aes(x = d, y = total_tests_per_thousand)) + scale_y_log10() + 
        geom_point(size = 0.5) +  geom_smooth(se = F) +
         ylim(c(0, NA)) + 
        ggtitle('Total Tests per thousand')

    p6 =  npl[d>="2020-03-15"] %>% 
        ggplot(aes(x = d, y = new_tests_per_thousand)) +
        geom_point(size = 0.5) +  geom_smooth(se = F) +
        ylim(c(0, NA)) + 
        ggtitle('New Tests per thousand')
    p7 = npl[d>="2020-12-01"] %>% 
        ggplot(aes(x = d, y = total_vaccinations_per_hundred)) + scale_y_log10() + 
        geom_point(size = 0.5) +  geom_smooth(se = F) +
         ylim(c(0, NA)) + 
        ggtitle('Total Vaccines per 100')

    p8 =  npl[d>="2020-12-01"] %>% 
        ggplot(aes(x = d, y = people_vaccinated_per_hundred)) +
        geom_point(size = 0.5) +  geom_smooth(se = F) +
        ylim(c(0, NA)) + 
        ggtitle('New Vaccines per 100')
    suppressMessages(suppressWarnings(print(
        (p5 | p6) /(p7 | p8) / (p1 | p2) / (p3 | p4) + plot_annotation(title = paste0("Covid Profile : ", country))
    )))
}

South Asia¶

In [30]:

sa_data = owid[location %in% c("Nepal", "India", "Bangladesh", "Sri Lanka", "Pakistan")][, .(date, location, total_vaccinations_per_hundred)]
sa_data %>% head

A data.table: 6 × 3
date	location	total_vaccinations_per_hundred
<date>	<chr>	<dbl>
2020-03-03	Bangladesh	NA
2020-03-04	Bangladesh	NA
2020-03-05	Bangladesh	NA
2020-03-06	Bangladesh	NA
2020-03-07	Bangladesh	NA
2020-03-08	Bangladesh	NA

In [31]:

options(repr.plot.width = 10, repr.plot.height=10)

In [32]:

suppressMessages(suppressWarnings(print(
    sa_data[date >= "2021-01-01"] %>% 
        ggplot(aes(x = date, y = total_vaccinations_per_hundred, colour = as.factor(location))) +
        geom_point() + geom_smooth(alpha = 0.5, se = F) +
        labs(title = "total vaccinations per 100 in South Asia", colour = "")
)))

In [34]:

options(repr.plot.width = 20, repr.plot.height=20)

In [35]:

country_plot("Nepal") 
country_plot("India")
country_plot("Bangladesh")
country_plot("Pakistan")

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Sub-Saharan Africa¶

In [36]:

country_plot("Ghana")
country_plot("Nigeria")
country_plot("Kenya")
country_plot("Uganda")

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Eastern Europe¶

In [30]:

country_plot("Russia")
country_plot("Poland")
country_plot("Belarus")

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

North America¶

In [31]:

country_plot("United States")
country_plot("Canada")
country_plot("Mexico")

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Europe¶

In [32]:

country_plot("United Kingdom")
country_plot("France")
country_plot("Germany")
country_plot("Spain")
country_plot("Italy")

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Scale for 'y' is already present. Adding another scale for 'y', which will
replace the existing scale.

Excess Mortality¶

In [33]:

exmort_nyt = fread("https://raw.githubusercontent.com/nytimes/covid-19-data/master/excess-deaths/deaths.csv")
exmort_nyt[, `:=`(
             start_date = ymd(start_date),
             end_date   = ymd(start_date),
             year = as.numeric(year)
           )
           ]
exmort_nyt %>% glimpse

Rows: 7,258
Columns: 12
$ country         <chr> "Austria", "Austria", "Austria", "Austria", "Austria",…
$ placename       <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", ""…
$ frequency       <chr> "weekly", "weekly", "weekly", "weekly", "weekly", "wee…
$ start_date      <date> 2020-01-06, 2020-01-13, 2020-01-20, 2020-01-27, 2020-…
$ end_date        <date> 2020-01-06, 2020-01-13, 2020-01-20, 2020-01-27, 2020-…
$ year            <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, …
$ month           <int> 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, …
$ week            <int> 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
$ deaths          <int> 1702, 1797, 1779, 1947, 1681, 1721, 1718, 1768, 1744, …
$ expected_deaths <int> 1806, 1819, 1831, 1837, 1837, 1829, 1812, 1786, 1753, …
$ excess_deaths   <int> -104, -22, -52, 110, -156, -108, -94, -18, -9, 1, 158,…
$ baseline        <chr> "2015-2019 historical data", "2015-2019 historical dat…

In [34]:

exmort_nyt[, .N, by = .(country, placename, frequency)]

A data.table: 35 × 4
country	placename	frequency	N
<chr>	<chr>	<chr>	<int>
Austria		weekly	304
Belgium		weekly	300
Bolivia		monthly	59
Brazil		weekly	97
Chile		weekly	305
Colombia		weekly	298
Czech Republic		weekly	302
Denmark		weekly	304
Ecuador		monthly	40
Finland		weekly	203
France		weekly	551
Germany		weekly	252
Hungary		weekly	302
Ireland		weekly	292
Israel		weekly	303
Turkey	Istanbul	weekly	153
Italy		monthly	22
Indonesia	Jakarta	monthly	132
Mexico		weekly	299
Russia	Moscow	monthly	71
India	Mumbai	monthly	19
Netherlands		weekly	254
Norway		weekly	305
Peru		monthly	60
Poland		monthly	59
Portugal		weekly	304
South Africa		weekly	102
South Korea		monthly	70
Spain		weekly	141
Sweden		weekly	305
Switzerland		weekly	255
Thailand		monthly	64
Japan		monthly	70
United Kingdom		weekly	560
United States		weekly	101

In [35]:

wksamp = exmort_nyt[frequency == "weekly" & year == 2020]

In [39]:

eur = exmort_nyt[country %in% c(
    'Austria', 'Belgium', 'Denmark', 'Finland', 'France', 'Germany', 'Italy', 'Netherlands', 
    'Norway', 'Portugal', 'Spain', 'Sweden', 'United Kingdom', "Switzerland"
    ) & year >= 2020 & placename == ""]
eur[, .SD[1], by = country]

A data.table: 14 × 12
country	placename	frequency	start_date	end_date	year	month	week	deaths	expected_deaths	excess_deaths	baseline
<chr>	<chr>	<chr>	<date>	<date>	<dbl>	<int>	<int>	<int>	<int>	<int>	<chr>
Austria		weekly	2020-01-06	2020-01-06	2020	1	2	1702	1806	-104	2015-2019 historical data
Belgium		weekly	2020-01-06	2020-01-06	2020	1	2	2381	2386	-5	2016-2019 historical data
Denmark		weekly	2020-01-06	2020-01-06	2020	1	2	1149	1120	29	2015-2019 historical data
Finland		weekly	2020-01-06	2020-01-06	2020	1	2	1068	1155	-87	2017-2019 historical data
France		weekly	2020-01-06	2020-01-06	2020	1	2	13404	13675	-271	2010-2019 historical data
Germany		weekly	2020-01-06	2020-01-06	2020	1	2	19430	19635	-205	2016-2019 historical data
Italy		monthly	NA	NA	2020	1	NA	61101	64713	-3612	2015-2019 historical data
Netherlands		weekly	2020-01-06	2020-01-06	2020	1	2	3364	3329	35	2016-2019 historical data
Norway		weekly	2020-01-06	2020-01-06	2020	1	2	951	910	41	2015-2019 historical data
Portugal		weekly	2020-01-06	2020-01-06	2020	1	2	2654	2900	-246	2015-2019 historical data
Spain		weekly	2020-01-06	2020-01-06	2020	1	2	8936	9210	-274	2018-2019 historical data
Sweden		weekly	2020-01-06	2020-01-06	2020	1	2	1889	1884	5	2015-2019 historical data
Switzerland		weekly	2020-01-06	2020-01-06	2020	1	2	1353	1475	-122	2016-2019 historical data
United Kingdom		weekly	2020-01-04	2020-01-04	2020	1	2	16020	15465	555	2010-2019 historical data

In [40]:

eur[is.na(start_date), start_date := ymd(paste0(year, '-', month, "-01"))]
eur[, excess := fifelse(excess_deaths > 0, 1, 0)]

In [41]:

options(repr.plot.width = 25, repr.plot.height=20)
ggplot(eur, aes(x = start_date, y = expected_deaths)) +
    geom_point(alpha = 0.6) + geom_line(linetype = 'dotted', alpha = 0.8) +
    geom_point(aes(y = deaths), colour = 'red') + geom_line(aes(y = deaths), colour = 'red') +
    facet_wrap(~ country, scales = 'free') +
    labs(title = "Excess Mortality in Europe", subtitle = "Expected in gray; actual in red")