Prepared by J.Grazzini (Eurostat).
This notebook illustrates the principles of storytelling through literate programming by reproducing some of the figures of the Statistics Explained_article on [young people and social inclusion](https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Young_people-_social_inclusion).
library("eurostat")
library("ggplot2")
library("tidyr")
library(repr)
options(repr.plot.width=8, repr.plot.height=3)
id = "ilc_li02"
# id <-search_eurostat("At-risk-of-poverty rate by poverty threshold, age and sex")$code[1]
dat <-get_eurostat(id, time_format = "num", filters = list(age="Y16-29", time="2017", unit="PC"))
head(dat)
unit | indic_il | sex | age | geo | time | values |
---|---|---|---|---|---|---|
PC | LI_R_M40 | F | Y16-29 | AT | 2017 | 11.2 |
PC | LI_R_M40 | F | Y16-29 | BE | 2017 | 5.9 |
PC | LI_R_M40 | F | Y16-29 | BG | 2017 | 19.3 |
PC | LI_R_M40 | F | Y16-29 | CH | 2017 | NA |
PC | LI_R_M40 | F | Y16-29 | CY | 2017 | 6.4 |
PC | LI_R_M40 | F | Y16-29 | CZ | 2017 | 3.8 |
ctries <- c("EU28", "DK", "RO", "ES", "EL", "IT", "SE", "BG", "LU", "PT", "NL", "DE", "FI", "IE", "BE", "FR", "EE", "AT", "UK", "PL", "HR", "CY", "LV", "HU", "SK", "MT", "SI", "CZ", "NO", "CH", "IS", "RS", "MK", "TR")
p<-ggplot(data=subset(dat, indic_il == "LI_R_MD60" & sex == "T" & geo %in% ctries),
aes(x=reorder(geo, -values), y=values)) +
geom_bar(stat="identity", fill="orange") +
xlab("Share of young people (aged 16-29 years) at risk of poverty, 2017") +
ylab("(%)")
print(p)
Warning message: “Removed 8 rows containing missing values (position_stack).”
id = "ilc_lvps08"
# id <-search_eurostat("Share of young adults aged 18-34 living with their parents by age and sex", fixed = TRUE)$code[1]
dat <-get_eurostat(id, time_format = "num", filters = list(time="2017", unit="PC"))
head(dat)
age | sex | unit | geo | time | values |
---|---|---|---|---|---|
Y16-19 | F | PC | AT | 2017 | 96.1 |
Y16-19 | F | PC | BE | 2017 | 95.5 |
Y16-19 | F | PC | BG | 2017 | 87.8 |
Y16-19 | F | PC | CH | 2017 | NA |
Y16-19 | F | PC | CY | 2017 | 98.6 |
Y16-19 | F | PC | CZ | 2017 | 97.2 |
p<-ggplot(data=subset(dat, geo %in% ctries & sex %in% c("F", "M") & age == "Y16-29"),
aes(x=reorder(geo, -values), y=values, fill=sex)) +
geom_bar(stat="identity", position=position_dodge()) +
xlab("Share of young people (aged 16-29 years) living with their parents, 2017") +
ylab("(%)")
print(p)
Warning message: “Removed 16 rows containing missing values (geom_bar).”
p<-ggplot(data=subset(dat, geo %in% ctries & sex == "T" & age %in% c("Y16-29","Y20-24","Y25-29")),
aes(x=reorder(geo, -values), y=values, fill=sex, col=age)) +
geom_point() +
xlab("Share of young people (aged 16-29 years) living with their parents, 2017") +
ylab("(%)")
print(p)
Warning message: “Removed 24 rows containing missing values (geom_point).”
id = "ilc_peps01"
# id <-search_eurostat("People at risk of poverty or social exclusion by age and sex")$code[1]
dat <-get_eurostat(id, time_format = "num", filters=list(unit="PC", geo="EU28", age="Y16-29"))
head(dat)
unit | age | sex | geo | time | values |
---|---|---|---|---|---|
PC | Y16-29 | F | EU28 | 2007 | NA |
PC | Y16-29 | F | EU28 | 2008 | NA |
PC | Y16-29 | F | EU28 | 2009 | NA |
PC | Y16-29 | F | EU28 | 2010 | 28.0 |
PC | Y16-29 | F | EU28 | 2011 | 29.1 |
PC | Y16-29 | F | EU28 | 2012 | 30.2 |
p<-ggplot(data=dat, aes(x=time, y=values, group=sex)) +
geom_point() +
xlab("Share of young people (aged 16-29 years) at risk of poverty or social exclusion, EU-28, 2007-2017 - by sex") +
ylab("(%)")
print(p)
Warning message: “Removed 9 rows containing missing values (geom_point).”
id = "ilc_li02"
# id <-search_eurostat("At-risk-of-poverty rate by poverty threshold, age and sex", fixed = TRUE)$code[1]
dat <-get_eurostat(id, time_format = "num", filters = list(unit="PC", geo="EU28", indic_il="LI_R_MD60", sex="T", age=c("Y16-29","Y20-24","Y25-29")))
head(dat)
unit | indic_il | sex | age | geo | time | values |
---|---|---|---|---|---|---|
PC | LI_R_MD60 | T | Y16-29 | EU28 | 1995 | NA |
PC | LI_R_MD60 | T | Y16-29 | EU28 | 1996 | NA |
PC | LI_R_MD60 | T | Y16-29 | EU28 | 1997 | NA |
PC | LI_R_MD60 | T | Y16-29 | EU28 | 1998 | NA |
PC | LI_R_MD60 | T | Y16-29 | EU28 | 1999 | NA |
PC | LI_R_MD60 | T | Y16-29 | EU28 | 2000 | NA |
p<-ggplot(data=dat, aes(x=time, y=values, group=age)) +
geom_point() +
xlab("Share of young people at risk of poverty, EU-28, 2007-2017 - by age") +
ylab("(%)")
print(p)
Warning message: “Removed 45 rows containing missing values (geom_point).”
id = "yth_incl_060"
# id <-search_eurostat("Young people's at-risk-of-poverty rate by sex, age and living/not living with parents", fixed = TRUE)$code[1]
dat <-get_eurostat(id, time_format = "num", filters = list(time="2016"))
head(dat)
age | sex | hhstatus | unit | geo | time | values |
---|---|---|---|---|---|---|
Y16-19 | F | P_NPAR | PC_POP | AT | 2016 | 44.8 |
Y16-19 | F | P_NPAR | PC_POP | BE | 2016 | NA |
Y16-19 | F | P_NPAR | PC_POP | BG | 2016 | 74.4 |
Y16-19 | F | P_NPAR | PC_POP | CH | 2016 | NA |
Y16-19 | F | P_NPAR | PC_POP | CY | 2016 | NA |
Y16-19 | F | P_NPAR | PC_POP | CZ | 2016 | NA |
p<-ggplot(data=subset(dat, geo %in% ctries & age == "Y16-29"),
aes(x=reorder(geo, -values), y=values, fill=hhstatus)) +
geom_bar(stat="identity", position=position_dodge()) +
xlab("Share of young people (aged 16-29 years) at risk of poverty, 2016 - by HH status") +
ylab("(%)")
print(p)
Warning message: “Removed 12 rows containing missing values (geom_bar).”