library(LalRUtils)
libreq(data.table, tidyverse, janitor, anytime, lubridate, plotly, zoo, patchwork, ggrepel)
theme_set(lal_plot_theme())
options(repr.plot.width = 20, repr.plot.height=12)
wants loaded [1,] "data.table" TRUE [2,] "tidyverse" TRUE [3,] "janitor" TRUE [4,] "anytime" TRUE [5,] "lubridate" TRUE [6,] "plotly" TRUE [7,] "zoo" TRUE [8,] "patchwork" TRUE [9,] "ggrepel" TRUE
options(ggplot2.discrete.fill = RColorBrewer::brewer.pal(9, "Set1"))
options(ggplot2.discrete.colour = RColorBrewer::brewer.pal(9, "Set1"))
options(ggplot2.continuous.fill = "viridis"); options(ggplot2.continuous.colour = "viridis")
chr = function(...) as.character(...) %>% display_html()
jhu_cases_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
cases = fread(jhu_cases_url)
jhu_deaths_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
deaths = fread(jhu_deaths_url)
name_clean = function(df) setnames(df, str_replace(colnames(df), "/", "_"))
cases_long = melt(cases, id.vars = c("Province/State", "Country/Region", "Lat", "Long"),
variable.name = "date", value.name = "cases")
name_clean(cases_long)
deaths_long = melt(deaths, id.vars = c("Province/State", "Country/Region", "Lat", "Long"),
variable.name = "date", value.name = "deaths")
name_clean(deaths_long)
deaths_long[, c('Lat', 'Long') := NULL]
df = merge(cases_long, deaths_long,
by = c("Province_State", "Country_Region", "date"), all.x = T)
df[, date := mdy(date)]
df[, day := weekdays(date)]
df[is.na(df)] = 0
df %>% glimpse
Rows: 188,160 Columns: 8 $ Province_State <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", "",… $ Country_Region <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanist… $ date <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-0… $ Lat <dbl> 33.94, 33.94, 33.94, 33.94, 33.94, 33.94, 33.94, 33.94,… $ Long <dbl> 67.71, 67.71, 67.71, 67.71, 67.71, 67.71, 67.71, 67.71,… $ cases <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… $ deaths <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… $ day <chr> "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"…
fwrite(df, "data/jhu_panel.csv.gz")
country_ts = df[, lapply(.SD, sum) , by = .(Country_Region, date), .SDcols = c("cases", "deaths")]
(t10countries = country_ts[date == max(country_ts$date)][order(-cases)][1:10])
Country_Region | date | cases | deaths |
---|---|---|---|
<chr> | <date> | <int> | <int> |
US | 2021-11-23 | 47980780 | 773770 |
India | 2021-11-23 | 34535763 | 466584 |
Brazil | 2021-11-23 | 22030182 | 613066 |
United Kingdom | 2021-11-23 | 9985879 | 144579 |
Russia | 2021-11-23 | 9238330 | 261526 |
Turkey | 2021-11-23 | 8626550 | 75443 |
France | 2021-11-23 | 7553513 | 119605 |
Iran | 2021-11-23 | 6088009 | 129177 |
Germany | 2021-11-23 | 5516623 | 99773 |
Argentina | 2021-11-23 | 5317633 | 116415 |
t10_subset = country_ts[Country_Region %in% t10countries$Country_Region]
setorder(t10_subset, Country_Region, date)
# normalise time from first day with >= 10 cases
t10_subset[cases >= 10, c_time := seq_len(.N), by = .(Country_Region)]
t10_subset[deaths >= 1, d_time := seq_len(.N), by = .(Country_Region)]
setorder(t10_subset, Country_Region, date)
t10_subset[, new_cases := cases - shift(cases), by = .(Country_Region)]
t10_subset[, new_deaths := deaths - shift(deaths), by = .(Country_Region)]
t10_subset[, cfr := deaths/cases]
smoothvars = c('cases', 'new_cases', 'deaths', 'new_deaths', 'cfr')
t10_subset[, paste0("rm7_", smoothvars) := lapply(.SD, rollmean, k = 7, fill = NA, na.pad = T),
by = .(Country_Region), .SDcols = smoothvars]
t10_subset[, label := ifelse(date == max(date), Country_Region, NA)]
options(warn=-1)
p1 = ggplot(t10_subset,
aes(x = c_time, y = cases, group = Country_Region, colour = Country_Region)) +
geom_point(aes(y = cases), size = 0.5) + geom_line(aes(y = rm7_cases)) +
scale_y_log10(limits = c(10, NA)) +
scale_colour_brewer(palette = "Spectral") +
theme(legend.position = "None") +
geom_text_repel(aes(label = label),
nudge_x = 1,
na.rm = TRUE) +
ggtitle('Cumulative Case Counts in Worst-Hit Countries')
p2 =
t10_subset %>%
ggplot(aes(x = c_time,y = new_cases, group = Country_Region, colour = Country_Region)) +
geom_point(aes(y = new_cases), size = 0.5) +
# geom_line(aes(y = rm7_new_cases)) +
geom_smooth(se = F) +
scale_y_log10() +
scale_colour_brewer(palette = "Spectral") +
theme(legend.position = "None") +
geom_text_repel(aes(label = label),
nudge_x = 1,
na.rm = TRUE) +
ggtitle('Growth in Cases in Worst-Hit Countries')
(p1 | p2 )
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
p1 = ggplot(t10_subset,
aes(x = d_time, y = deaths, group = Country_Region, colour = Country_Region)) +
geom_point(aes(y = deaths), size = 0.5) + geom_line(aes(y = rm7_deaths)) +
scale_y_log10(limits = c(10, NA)) +
scale_colour_brewer(palette = "Spectral") +
theme(legend.position = "None") +
geom_text_repel(aes(label = label),
nudge_x = 1,
na.rm = TRUE) +
ggtitle('Cumulative Death Counts in Worst-Hit Countries')
p2 = t10_subset %>%
ggplot(aes(x = d_time, y = new_deaths, group = Country_Region, colour = Country_Region)) +
geom_point(aes(y = new_deaths), size = 0.5) +
geom_smooth(se = F) +
scale_y_log10() +
scale_colour_brewer(palette = "Spectral") +
theme(legend.position = "None") +
geom_text_repel(aes(label = label),
nudge_x = 1,
na.rm = TRUE) +
ggtitle('Growth in Deaths in Worst-Hit Countries')
(p1 | p2 )
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
p3 = t10_subset %>%
ggplot(aes(x = d_time, y = cfr, group = Country_Region, colour = Country_Region)) +
geom_point(size = 0.5) +
# geom_line(aes(y = rm7_cfr)) +
geom_smooth(se = F) +
ylim(c(0, 0.3)) +
scale_colour_brewer(palette = "Spectral") +
theme(legend.position = "None") +
geom_text_repel(aes(label = label),
na.rm = TRUE) +
labs(title = 'Case Fatality Rate in Worst-Hit Countries', subtitle = "What is going on in Russia")
p3
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
t10_subset[, denom_cases := sum(rm7_new_cases), by = date][,
denom_deaths := sum(rm7_new_deaths), by = date]
t10_subset[, newcase_share := rm7_new_cases / denom_cases][,
newdeath_share := rm7_new_deaths /denom_deaths]
p1 = ggplot(t10_subset[date >="2020-02-15"], aes(x = date, y = newcase_share, fill = Country_Region, colour = Country_Region)) +
geom_area(position="fill") +
scale_y_continuous(breaks = seq(0, 1, .1))+ ylim(c(0, 1)) +
scale_fill_brewer(palette = "Spectral") +
scale_colour_brewer(palette = "Spectral") +
ggtitle("New Cases") + theme(legend.position = "None")
p2 = ggplot(t10_subset[date >="2020-02-15"], aes(x = date, y = newdeath_share, fill = Country_Region, colour = Country_Region)) +
geom_area(position="fill") +
scale_y_continuous(breaks = seq(0, 1, .1))+ ylim(c(0, 1)) +
scale_fill_brewer(palette = "Spectral") +
scale_colour_brewer(palette = "Spectral") +
ggtitle("New Deaths")
options(repr.plot.width = 20, repr.plot.height = 16)
(p = (p1 / p2 ) + plot_annotation(title = "Tracking the epidemic's hotspots over time",
subtitle = "each country's share of global 7 day rolling mean in deaths and cases"))
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
ggsave("carpet_plot_xc.png", p, width = 20, height = 16)
owid = fread("https://covid.ourworldindata.org/data/owid-covid-data.csv")
owid[, d := ymd(date)]
owid %>% glimpse
Rows: 135,370 Columns: 68 $ iso_code <chr> "AFG", "AFG", "AFG", "AFG",… $ continent <chr> "Asia", "Asia", "Asia", "As… $ location <chr> "Afghanistan", "Afghanistan… $ date <date> 2020-02-24, 2020-02-25, 20… $ total_cases <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, … $ new_cases <dbl> 5, 0, 0, 0, 0, 0, 0, 0, 0, … $ new_cases_smoothed <dbl> NA, NA, NA, NA, NA, 0.714, … $ total_deaths <dbl> NA, NA, NA, NA, NA, NA, NA,… $ new_deaths <dbl> NA, NA, NA, NA, NA, NA, NA,… $ new_deaths_smoothed <dbl> NA, NA, NA, NA, NA, 0, 0, 0… $ total_cases_per_million <dbl> 0.126, 0.126, 0.126, 0.126,… $ new_cases_per_million <dbl> 0.126, 0.000, 0.000, 0.000,… $ new_cases_smoothed_per_million <dbl> NA, NA, NA, NA, NA, 0.018, … $ total_deaths_per_million <dbl> NA, NA, NA, NA, NA, NA, NA,… $ new_deaths_per_million <dbl> NA, NA, NA, NA, NA, NA, NA,… $ new_deaths_smoothed_per_million <dbl> NA, NA, NA, NA, NA, 0, 0, 0… $ reproduction_rate <dbl> NA, NA, NA, NA, NA, NA, NA,… $ icu_patients <dbl> NA, NA, NA, NA, NA, NA, NA,… $ icu_patients_per_million <dbl> NA, NA, NA, NA, NA, NA, NA,… $ hosp_patients <dbl> NA, NA, NA, NA, NA, NA, NA,… $ hosp_patients_per_million <dbl> NA, NA, NA, NA, NA, NA, NA,… $ weekly_icu_admissions <dbl> NA, NA, NA, NA, NA, NA, NA,… $ weekly_icu_admissions_per_million <dbl> NA, NA, NA, NA, NA, NA, NA,… $ weekly_hosp_admissions <dbl> NA, NA, NA, NA, NA, NA, NA,… $ weekly_hosp_admissions_per_million <dbl> NA, NA, NA, NA, NA, NA, NA,… $ new_tests <dbl> NA, NA, NA, NA, NA, NA, NA,… $ total_tests <dbl> NA, NA, NA, NA, NA, NA, NA,… $ total_tests_per_thousand <dbl> NA, NA, NA, NA, NA, NA, NA,… $ new_tests_per_thousand <dbl> NA, NA, NA, NA, NA, NA, NA,… $ new_tests_smoothed <dbl> NA, NA, NA, NA, NA, NA, NA,… $ new_tests_smoothed_per_thousand <dbl> NA, NA, NA, NA, NA, NA, NA,… $ positive_rate <dbl> NA, NA, NA, NA, NA, NA, NA,… $ tests_per_case <dbl> NA, NA, NA, NA, NA, NA, NA,… $ tests_units <chr> "", "", "", "", "", "", "",… $ total_vaccinations <dbl> NA, NA, NA, NA, NA, NA, NA,… $ people_vaccinated <dbl> NA, NA, NA, NA, NA, NA, NA,… $ people_fully_vaccinated <dbl> NA, NA, NA, NA, NA, NA, NA,… $ total_boosters <dbl> NA, NA, NA, NA, NA, NA, NA,… $ new_vaccinations <dbl> NA, NA, NA, NA, NA, NA, NA,… $ new_vaccinations_smoothed <dbl> NA, NA, NA, NA, NA, NA, NA,… $ total_vaccinations_per_hundred <dbl> NA, NA, NA, NA, NA, NA, NA,… $ people_vaccinated_per_hundred <dbl> NA, NA, NA, NA, NA, NA, NA,… $ people_fully_vaccinated_per_hundred <dbl> NA, NA, NA, NA, NA, NA, NA,… $ total_boosters_per_hundred <dbl> NA, NA, NA, NA, NA, NA, NA,… $ new_vaccinations_smoothed_per_million <dbl> NA, NA, NA, NA, NA, NA, NA,… $ new_people_vaccinated_smoothed <dbl> NA, NA, NA, NA, NA, NA, NA,… $ new_people_vaccinated_smoothed_per_hundred <dbl> NA, NA, NA, NA, NA, NA, NA,… $ stringency_index <dbl> 8.33, 8.33, 8.33, 8.33, 8.3… $ population <dbl> 39835428, 39835428, 3983542… $ population_density <dbl> 54.42, 54.42, 54.42, 54.42,… $ median_age <dbl> 18.6, 18.6, 18.6, 18.6, 18.… $ aged_65_older <dbl> 2.581, 2.581, 2.581, 2.581,… $ aged_70_older <dbl> 1.337, 1.337, 1.337, 1.337,… $ gdp_per_capita <dbl> 1804, 1804, 1804, 1804, 180… $ extreme_poverty <dbl> NA, NA, NA, NA, NA, NA, NA,… $ cardiovasc_death_rate <dbl> 597, 597, 597, 597, 597, 59… $ diabetes_prevalence <dbl> 9.59, 9.59, 9.59, 9.59, 9.5… $ female_smokers <dbl> NA, NA, NA, NA, NA, NA, NA,… $ male_smokers <dbl> NA, NA, NA, NA, NA, NA, NA,… $ handwashing_facilities <dbl> 37.75, 37.75, 37.75, 37.75,… $ hospital_beds_per_thousand <dbl> 0.5, 0.5, 0.5, 0.5, 0.5, 0.… $ life_expectancy <dbl> 64.83, 64.83, 64.83, 64.83,… $ human_development_index <dbl> 0.511, 0.511, 0.511, 0.511,… $ excess_mortality_cumulative_absolute <dbl> NA, NA, NA, NA, NA, NA, NA,… $ excess_mortality_cumulative <dbl> NA, NA, NA, NA, NA, NA, NA,… $ excess_mortality <dbl> NA, NA, NA, NA, NA, NA, NA,… $ excess_mortality_cumulative_per_million <dbl> NA, NA, NA, NA, NA, NA, NA,… $ d <date> 2020-02-24, 2020-02-25, 20…
fwrite(owid, "data/owid_covid.csv.gz")
all_countries_xs = owid[location!="World"][
order(-d)][
, .SD[1], by = .(iso_code)]
(t10 = all_countries_xs[order(-total_cases)][1:10])
# [order(-total_cases)][1:10])
iso_code | continent | location | date | total_cases | new_cases | new_cases_smoothed | total_deaths | new_deaths | new_deaths_smoothed | ⋯ | male_smokers | handwashing_facilities | hospital_beds_per_thousand | life_expectancy | human_development_index | excess_mortality_cumulative_absolute | excess_mortality_cumulative | excess_mortality | excess_mortality_cumulative_per_million | d |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <chr> | <chr> | <date> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <date> |
OWID_HIC | High income | 2021-11-23 | 112261196 | 390354 | 388574 | 1811688 | 3345 | 2736.4 | ⋯ | NA | NA | NA | NA | NA | NA | NA | NA | NA | 2021-11-23 | |
OWID_UMC | Upper middle income | 2021-11-23 | 81752399 | 137862 | 121382 | 2173124 | 2842 | 2890.6 | ⋯ | NA | NA | NA | NA | NA | NA | NA | NA | NA | 2021-11-23 | |
OWID_ASI | Asia | 2021-11-23 | 81502829 | 119415 | 90992 | 1208925 | 1533 | 1467.3 | ⋯ | NA | NA | NA | NA | NA | NA | NA | NA | NA | 2021-11-23 | |
OWID_EUR | Europe | 2021-11-23 | 72118721 | 345330 | 343602 | 1392865 | 4361 | 3907.1 | ⋯ | NA | NA | NA | NA | NA | NA | NA | NA | NA | 2021-11-23 | |
OWID_LMC | Lower middle income | 2021-11-23 | 63404088 | 77295 | 54872 | 1144154 | 1868 | 1665.6 | ⋯ | NA | NA | NA | NA | NA | NA | NA | NA | NA | 2021-11-23 | |
OWID_NAM | North America | 2021-11-23 | 57457648 | 98592 | 104178 | 1160501 | 1509 | 1411.7 | ⋯ | NA | NA | NA | NA | NA | NA | NA | NA | NA | 2021-11-23 | |
USA | North America | United States | 2021-11-23 | 47980780 | 92609 | 95778 | 773770 | 1426 | 1137.0 | ⋯ | 24.6 | NA | 2.77 | 78.86 | 0.926 | NA | NA | NA | NA | 2021-11-23 |
OWID_EUN | European Union | 2021-11-23 | 44884187 | 240338 | 233786 | 833289 | 2053 | 1706.9 | ⋯ | NA | NA | NA | NA | NA | NA | NA | NA | NA | 2021-11-23 | |
OWID_SAM | South America | 2021-11-23 | 38804721 | 19241 | 19024 | 1178684 | 439 | 378.4 | ⋯ | NA | NA | NA | NA | NA | NA | NA | NA | NA | 2021-11-23 | |
IND | Asia | India | 2021-11-23 | 34535763 | 9283 | 9881 | 466584 | 437 | 347.3 | ⋯ | 20.6 | 59.55 | 0.53 | 69.66 | 0.645 | NA | NA | NA | NA | 2021-11-23 |
owid_t10_subset = owid[location %in% t10$location]
vars = c('new_cases', 'new_deaths', 'new_tests')
owid_t10_subset[, paste0('denom_', vars):= lapply(.SD, sum), by = date, .SDcols = vars]
owid_t10_subset[, `:=`(
newcase_share = new_cases / denom_new_cases,
newdeath_share = new_deaths / denom_new_deaths,
newtest_share = new_tests / denom_new_tests
)][ newcase_share < 0, newcase_share := 0]
p1 = ggplot(owid_t10_subset[d >="2020-02-15"], aes(x = d, y = newcase_share, fill = location, colour = location)) +
geom_area(position="fill") +
scale_y_continuous(breaks = seq(0, 1, .1))+
scale_fill_brewer(palette = "Spectral") +
scale_colour_brewer(palette = "Spectral") +
ggtitle("New Cases")+ theme(legend.pos = "None")
p2 = ggplot(owid_t10_subset[d >="2020-02-15"], aes(x = d, y = newdeath_share, fill = location, colour = location)) +
geom_area(position="fill") +
scale_y_continuous(breaks = seq(0, 1, .1))+
scale_fill_brewer(palette = "Spectral") +
scale_colour_brewer(palette = "Spectral") + ylim(c(0, 1)) +
ggtitle("New Deaths")
options(repr.plot.width = 20, repr.plot.height = 12)
(p1 / p2 ) + plot_annotation(title = "Shares of Cases and Deaths over time")
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
country_plot = function(country){
npl = owid[location == country][order(-date)]
p1 = npl[d>="2020-03-15"] %>%
ggplot(aes(x = d, y = total_cases_per_million)) + scale_y_log10() +
geom_point(size = 0.5) + geom_smooth(se = F) +
ylim(c(0, NA)) +
ggtitle('Total Cases per Million')
p2 = npl[d>="2020-03-15"] %>%
ggplot(aes(x = d, y = new_cases_per_million)) +
geom_point(size = 0.5) + geom_smooth(se = F) +
ylim(c(0, NA)) +
ggtitle('New Cases per Million')
p3 = npl[d>="2020-03-15"] %>%
ggplot(aes(x = d, y = total_deaths_per_million)) +
ylim(c(0, NA)) +
geom_point(size = 0.5) + geom_smooth(se = F) +
ggtitle('Total Deaths per Million')
p4 = npl[d>="2020-03-15"] %>%
ggplot(aes(x = d, y = new_deaths_per_million)) +
geom_point(size = 0.5) + geom_smooth(se = F) +
ylim(c(0, NA)) +
ggtitle('New Deaths per Million')
p5 = npl[d>="2020-03-15"] %>%
ggplot(aes(x = d, y = total_tests_per_thousand)) + scale_y_log10() +
geom_point(size = 0.5) + geom_smooth(se = F) +
ylim(c(0, NA)) +
ggtitle('Total Tests per thousand')
p6 = npl[d>="2020-03-15"] %>%
ggplot(aes(x = d, y = new_tests_per_thousand)) +
geom_point(size = 0.5) + geom_smooth(se = F) +
ylim(c(0, NA)) +
ggtitle('New Tests per thousand')
p7 = npl[d>="2020-12-01"] %>%
ggplot(aes(x = d, y = total_vaccinations_per_hundred)) + scale_y_log10() +
geom_point(size = 0.5) + geom_smooth(se = F) +
ylim(c(0, NA)) +
ggtitle('Total Vaccines per 100')
p8 = npl[d>="2020-12-01"] %>%
ggplot(aes(x = d, y = people_vaccinated_per_hundred)) +
geom_point(size = 0.5) + geom_smooth(se = F) +
ylim(c(0, NA)) +
ggtitle('New Vaccines per 100')
suppressMessages(suppressWarnings(print(
(p5 | p6) /(p7 | p8) / (p1 | p2) / (p3 | p4) + plot_annotation(title = paste0("Covid Profile : ", country))
)))
}
sa_data = owid[location %in% c("Nepal", "India", "Bangladesh", "Sri Lanka", "Pakistan")][, .(date, location, total_vaccinations_per_hundred)]
sa_data %>% head
date | location | total_vaccinations_per_hundred |
---|---|---|
<date> | <chr> | <dbl> |
2020-03-03 | Bangladesh | NA |
2020-03-04 | Bangladesh | NA |
2020-03-05 | Bangladesh | NA |
2020-03-06 | Bangladesh | NA |
2020-03-07 | Bangladesh | NA |
2020-03-08 | Bangladesh | NA |
options(repr.plot.width = 10, repr.plot.height=10)
suppressMessages(suppressWarnings(print(
sa_data[date >= "2021-01-01"] %>%
ggplot(aes(x = date, y = total_vaccinations_per_hundred, colour = as.factor(location))) +
geom_point() + geom_smooth(alpha = 0.5, se = F) +
labs(title = "total vaccinations per 100 in South Asia", colour = "")
)))
options(repr.plot.width = 20, repr.plot.height=20)
country_plot("Nepal")
country_plot("India")
country_plot("Bangladesh")
country_plot("Pakistan")
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
country_plot("Ghana")
country_plot("Nigeria")
country_plot("Kenya")
country_plot("Uganda")
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
country_plot("Russia")
country_plot("Poland")
country_plot("Belarus")
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
country_plot("United States")
country_plot("Canada")
country_plot("Mexico")
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
country_plot("United Kingdom")
country_plot("France")
country_plot("Germany")
country_plot("Spain")
country_plot("Italy")
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale. Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
exmort_nyt = fread("https://raw.githubusercontent.com/nytimes/covid-19-data/master/excess-deaths/deaths.csv")
exmort_nyt[, `:=`(
start_date = ymd(start_date),
end_date = ymd(start_date),
year = as.numeric(year)
)
]
exmort_nyt %>% glimpse
Rows: 7,258 Columns: 12 $ country <chr> "Austria", "Austria", "Austria", "Austria", "Austria",… $ placename <chr> "", "", "", "", "", "", "", "", "", "", "", "", "", ""… $ frequency <chr> "weekly", "weekly", "weekly", "weekly", "weekly", "wee… $ start_date <date> 2020-01-06, 2020-01-13, 2020-01-20, 2020-01-27, 2020-… $ end_date <date> 2020-01-06, 2020-01-13, 2020-01-20, 2020-01-27, 2020-… $ year <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, … $ month <int> 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, … $ week <int> 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17… $ deaths <int> 1702, 1797, 1779, 1947, 1681, 1721, 1718, 1768, 1744, … $ expected_deaths <int> 1806, 1819, 1831, 1837, 1837, 1829, 1812, 1786, 1753, … $ excess_deaths <int> -104, -22, -52, 110, -156, -108, -94, -18, -9, 1, 158,… $ baseline <chr> "2015-2019 historical data", "2015-2019 historical dat…
exmort_nyt[, .N, by = .(country, placename, frequency)]
country | placename | frequency | N |
---|---|---|---|
<chr> | <chr> | <chr> | <int> |
Austria | weekly | 304 | |
Belgium | weekly | 300 | |
Bolivia | monthly | 59 | |
Brazil | weekly | 97 | |
Chile | weekly | 305 | |
Colombia | weekly | 298 | |
Czech Republic | weekly | 302 | |
Denmark | weekly | 304 | |
Ecuador | monthly | 40 | |
Finland | weekly | 203 | |
France | weekly | 551 | |
Germany | weekly | 252 | |
Hungary | weekly | 302 | |
Ireland | weekly | 292 | |
Israel | weekly | 303 | |
Turkey | Istanbul | weekly | 153 |
Italy | monthly | 22 | |
Indonesia | Jakarta | monthly | 132 |
Mexico | weekly | 299 | |
Russia | Moscow | monthly | 71 |
India | Mumbai | monthly | 19 |
Netherlands | weekly | 254 | |
Norway | weekly | 305 | |
Peru | monthly | 60 | |
Poland | monthly | 59 | |
Portugal | weekly | 304 | |
South Africa | weekly | 102 | |
South Korea | monthly | 70 | |
Spain | weekly | 141 | |
Sweden | weekly | 305 | |
Switzerland | weekly | 255 | |
Thailand | monthly | 64 | |
Japan | monthly | 70 | |
United Kingdom | weekly | 560 | |
United States | weekly | 101 |
wksamp = exmort_nyt[frequency == "weekly" & year == 2020]
eur = exmort_nyt[country %in% c(
'Austria', 'Belgium', 'Denmark', 'Finland', 'France', 'Germany', 'Italy', 'Netherlands',
'Norway', 'Portugal', 'Spain', 'Sweden', 'United Kingdom', "Switzerland"
) & year >= 2020 & placename == ""]
eur[, .SD[1], by = country]
country | placename | frequency | start_date | end_date | year | month | week | deaths | expected_deaths | excess_deaths | baseline |
---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <chr> | <chr> | <date> | <date> | <dbl> | <int> | <int> | <int> | <int> | <int> | <chr> |
Austria | weekly | 2020-01-06 | 2020-01-06 | 2020 | 1 | 2 | 1702 | 1806 | -104 | 2015-2019 historical data | |
Belgium | weekly | 2020-01-06 | 2020-01-06 | 2020 | 1 | 2 | 2381 | 2386 | -5 | 2016-2019 historical data | |
Denmark | weekly | 2020-01-06 | 2020-01-06 | 2020 | 1 | 2 | 1149 | 1120 | 29 | 2015-2019 historical data | |
Finland | weekly | 2020-01-06 | 2020-01-06 | 2020 | 1 | 2 | 1068 | 1155 | -87 | 2017-2019 historical data | |
France | weekly | 2020-01-06 | 2020-01-06 | 2020 | 1 | 2 | 13404 | 13675 | -271 | 2010-2019 historical data | |
Germany | weekly | 2020-01-06 | 2020-01-06 | 2020 | 1 | 2 | 19430 | 19635 | -205 | 2016-2019 historical data | |
Italy | monthly | NA | NA | 2020 | 1 | NA | 61101 | 64713 | -3612 | 2015-2019 historical data | |
Netherlands | weekly | 2020-01-06 | 2020-01-06 | 2020 | 1 | 2 | 3364 | 3329 | 35 | 2016-2019 historical data | |
Norway | weekly | 2020-01-06 | 2020-01-06 | 2020 | 1 | 2 | 951 | 910 | 41 | 2015-2019 historical data | |
Portugal | weekly | 2020-01-06 | 2020-01-06 | 2020 | 1 | 2 | 2654 | 2900 | -246 | 2015-2019 historical data | |
Spain | weekly | 2020-01-06 | 2020-01-06 | 2020 | 1 | 2 | 8936 | 9210 | -274 | 2018-2019 historical data | |
Sweden | weekly | 2020-01-06 | 2020-01-06 | 2020 | 1 | 2 | 1889 | 1884 | 5 | 2015-2019 historical data | |
Switzerland | weekly | 2020-01-06 | 2020-01-06 | 2020 | 1 | 2 | 1353 | 1475 | -122 | 2016-2019 historical data | |
United Kingdom | weekly | 2020-01-04 | 2020-01-04 | 2020 | 1 | 2 | 16020 | 15465 | 555 | 2010-2019 historical data |
eur[is.na(start_date), start_date := ymd(paste0(year, '-', month, "-01"))]
eur[, excess := fifelse(excess_deaths > 0, 1, 0)]
options(repr.plot.width = 25, repr.plot.height=20)
ggplot(eur, aes(x = start_date, y = expected_deaths)) +
geom_point(alpha = 0.6) + geom_line(linetype = 'dotted', alpha = 0.8) +
geom_point(aes(y = deaths), colour = 'red') + geom_line(aes(y = deaths), colour = 'red') +
facet_wrap(~ country, scales = 'free') +
labs(title = "Excess Mortality in Europe", subtitle = "Expected in gray; actual in red")