Prepared by S. Hadj Hassen (EMOS).
The objective of this notebook is to reproduce the figures of the Statistics Explained article on tourism statistics.
If necessary install the following packages and load them afterwards.
# install.packages(c("ggplot2", "plotly", "tidyr", "repr", "dplyr", "devtools", "restatapi", "eurostat",
# "patchwork", "Matching", "ggforce", "h2o", "plyr"))
library(ggplot2)
library(tidyr)
library(dplyr)
# library(eurostat)
library(restatapi)
#library(Matching)
library(stats)
library(base)
library(ggforce)
#library(h2o)
library(plyr)
Attaching package: ‘dplyr’ The following objects are masked from ‘package:stats’: filter, lag The following objects are masked from ‘package:base’: intersect, setdiff, setequal, union restatapi: - config file with the API version 1 loaded from GitHub (the 'current' API version number is 1). - 4 from the 8 cores are used for parallel computing. - 'libcurl' will be used for file download. - the Table of contents (TOC) was not pre-loaded into the deafult cache ('.restatapi_env'). ------------------------------------------------------------------------------ You have loaded plyr after dplyr - this is likely to cause problems. If you need functions from both plyr and dplyr, please load plyr first, then dplyr: library(plyr); library(dplyr) ------------------------------------------------------------------------------ Attaching package: ‘plyr’ The following objects are masked from ‘package:dplyr’: arrange, count, desc, failwith, id, mutate, rename, summarise, summarize
data
countr
data <- as_tibble(get_eurostat_data(id="tour_occ_ninat"))
data$year <- as.integer(substr(data$time,1,4))
countr <- c("ES","IT","FR","EL","AT","DE","HR","PT","NL","CZ","BE","BG","PL","IE","CY","SE","HU","DK","SI",
"MT","FI","SK","RO","EE","LV","LT","LU","UK","CH","NO","IS","LI","TR","ME","RS","MK","XK")
For Ireland, Slovenia, United Kingdom, Switzerland, Norway, Iceland, Montenegro, Serbia and Kosovo we need 2018 monthly data.
data_month
countr_fig0
data_month <- as_tibble(get_eurostat_data(id="tour_occ_nim"))
data_month$year <- as.integer(substr(data_month$time,1,4))
data_month$month <- as.integer(substr(data_month$time,6,7))
countr_fig0 <- c("IE","SI","UK","CH","NO","IS","ME","RS","XK")
data_mod
data_mod <- subset(data_month,
data_month$geo %in% countr_fig0 & #include just the countries of the vector above
data_month$year==2018 & #only values for 2018
data_month$c_resid=="FOR" & #just non-residents
data_month$nace_r2=="I551-I553"& #the classification we need
data_month$unit=="NR") #we need the unit "number"
data_mod <- aggregate(data_mod$values, by = list(geo = data_mod$geo), FUN = sum)
names(data_mod)[2] <- "values"
Build needed subset for the other countries (same like above with the dataset data
): data_mod2
data_mod2 <- subset(data,
data$geo %in% countr &
data$year==2018 &
data$c_resid=="FOR" &
data$nace_r2=="I551-I553"&
data$unit=="NR")
data_mod2 <- data_mod2[,c(4,6)]
For Turkey (countr_fig0_1
) we need data from 2016. Build subset data_mod3
countr_fig0_1 <- c("TR")
data_mod3 <- subset(data,
data$geo %in% countr_fig0_1 &
data$year==2016 &
data$c_resid=="FOR" &
data$nace_r2=="I551-I553"&
data$unit=="NR")
data_mod3 <- data_mod3[,c(4,6)]
Match data_mod1
and data_mod2
and data_mod3
data_mod4
Bring the country names in the order the graphic shows: you can do this due to modify the levels of the variable "geo"
.
data_mod4 <- rbind(data_mod,data_mod2,data_mod3)
data_mod4$geo <- factor(data_mod4$geo,
levels = c("ES","IT","FR","EL","AT","DE","HR","PT","NL","CZ","BE","BG","PL","IE","CY","SE","HU",
"DK","SI", "MT","FI","SK","RO","EE","LV","LT","LU","UK","CH","NO","IS","LI","TR","ME",
"RS","MK","XK"))
head(data_mod4)
geo | values | |
---|---|---|
<fct> | <dbl> | |
1 | CH | 26720046 |
2 | IE | 16759455 |
3 | IS | 7446807 |
4 | ME | 3763857 |
5 | NO | 10138183 |
6 | RS | 3656787 |
Finally use the modified dataset data_mod4
and build the graph by using ggplot
.
options(scipen=999) #prevent scientific notation
label_axes1 <- seq(0, 300, by = 50) #we need this later to label the axes
ggplot(data_mod4, #needed dataset
aes(x=geo,y=values)) + #the axes you want
geom_bar(stat = "identity", position="dodge", width=0.5, fill="steelblue3") + #for creating a bar chart
theme_classic() +
scale_y_continuous(limits = c(0, 310000000), breaks = seq(0, 300000000, by = 50000000), #limit the axes
labels=label_axes1) + #label the axes
ggtitle("Tourism destinations - nights spent at tourist accomodation establishments, 2018") +
ylab("(million nights spent in the country by non-residents") + #name the axes
xlab(" ")
countr_0 <- c("BE","BG","CZ","DK","DE","EE","IE","EL","ES","FR","HR","IT","CY","LV","LT","LU","HU","MT","NL",
"AT","PL","PT","RO","SI","SK","FI","SE","UK","IS","LI","NO","CH","ME","MK","RS","TR","XK")
column 1: number of establishments (units)
data_1
countr_tab1
data_1 <- as_tibble(get_eurostat_data(id="tour_cap_nat"))
data_1$year <- as.integer(substr(data_1$time,1,4))
countr_tab1 <- c("BE","BG","CZ","DK","DE","EE","EL","ES","FR","HR","IT","CY","LV","LT","HU","MT","NL",
"AT","PL","PT","RO","SK","FI","SE","IS","LI","NO","CH","ME","MK","RS")
Build the subset we need for the first column of the table and choose column 4 and 6 again: data_tab1
data_tab1 <- subset(data_1,
data_1$geo %in% countr_tab1 &
data_1$year==2018 &
data_1$nace_r2=="I551-I553"&
data_1$unit=="NR"&
data_1$accommod=="ESTBL")
data_tab1 <- data_tab1[,c(4,6)]
For Ireland and United Kingdom (countr_tab1_1
) we need data from 2016. Build the subset: data_tab1_1
countr_tab1_1 <- c("IE","UK")
data_tab1_1 <- subset(data_1,
data_1$geo %in% countr_tab1_1 &
data_1$year==2016 &
data_1$nace_r2=="I551-I553"&
data_1$unit=="NR"&
data_1$accommod=="ESTBL")
data_tab1_1 <- data_tab1_1[,c(4,6)]
For Luxembourg, Slovenia and Kosovo (countr_tab1_2
) we need data from 2017. Build the subset: data_tab1_2
countr_tab1_2 <- c("LU","SI","XK")
data_tab1_2 <- subset(data_1,
data_1$geo %in% countr_tab1_2 &
data_1$year==2017 &
data_1$nace_r2=="I551-I553"&
data_1$unit=="NR"&
data_1$accommod=="ESTBL")
data_tab1_2 <- data_tab1_2[,c(4,6)]
For Turkey we have NA. Assign to data_tab1_3
data_tab1_3 <- data.frame( geo = "TR", values= NA)
Match the three calculated datasets to data_tab_col1
and bring the country names in the order the graphic shows
data_tab_col1 <- rbind(data_tab1,data_tab1_1,data_tab1_2,data_tab1_3)
data_tab_col1 <- data_tab_col1[order(factor(data_tab_col1$geo, levels= countr_0)),]
head(data_tab_col1)
geo | values |
---|---|
<fct> | <dbl> |
BE | 9211 |
BG | 3458 |
CZ | 9426 |
DK | 1167 |
DE | 50020 |
EE | 1535 |
column 2: number of bed places (thousands)
countr_tab2
data_tab2
countr_tab2 <- c("BE","BG","CZ","DK","DE","EE","EL","ES","FR","HR","IT","CY","LV","LT","HU","MT","NL",
"AT","PL","PT","RO","SK","FI","SE","IS","LI","NO","CH","ME","MK","RS","TR")
data_tab2 <- subset(data_1,
data_1$geo %in% countr_tab2 &
data_1$year==2018 &
data_1$nace_r2=="I551-I553"&
data_1$unit=="NR"&
data_1$accommod=="BEDPL")
data_tab2 <- data_tab2[,c(4,6)]
For Ireland and United Kingdom (countr_tab2_1
) we need data from 2016. Build the subset: data_tab2_1
countr_tab2_1 <- c("IE","UK")
data_tab2_1 <- subset(data_1,
data_1$geo %in% countr_tab2_1 &
data_1$year==2016 &
data_1$nace_r2=="I551-I553"&
data_1$unit=="NR"&
data_1$accommod=="BEDPL")
data_tab2_1 <- data_tab2_1[,c(4,6)]
For Luxembourg, Slovenia and Kosovo (countr_tab2_2
) we need data from 2017. Build the subset: data_tab2_2
countr_tab2_2 <- c("LU","SI","XK")
data_tab2_2 <- subset(data_1,
data_1$geo %in% countr_tab2_2 &
data_1$year==2017 &
data_1$nace_r2=="I551-I553"&
data_1$unit=="NR"&
data_1$accommod=="BEDPL")
data_tab2_2 <- data_tab2_2[,c(4,6)]
For Turkey we have NA. Assign to data_tab2_3
data_tab2_3 <- data.frame( geo = "TR", values= NA)
Match the three calculated datasets to data_tab_col2
and bring the country names in the order the graph shows
data_tab_col2 <- rbind(data_tab2,data_tab2_1,data_tab2_2,data_tab2_3)
data_tab_col2 <- data_tab_col2[order(factor(data_tab_col2$geo, levels= countr_0)),]
head(data_tab_col2)
geo | values |
---|---|
<fct> | <dbl> |
BE | 389826 |
BG | 335597 |
CZ | 741235 |
DK | 426075 |
DE | 3473630 |
EE | 61193 |
column 3: nights spent by residents and non-residents (millions)
countr_tab3
data
) for a graph before. Build the subset: data_tab3
countr_tab3 <- c("BE","BG","CZ","DK","DE","EE","EL","ES","FR","HR","IT","CY","LV","LT","LU","HU","MT","NL",
"AT","PL","PT","RO","SK","FI","SE","LI","MK","TR")
data_tab3 <- subset(data,
data$geo %in% countr_tab3 &
data$year==2018 &
data$c_resid=="TOTAL" &
data$nace_r2=="I551-I553"&
data$unit=="NR")
data_tab3 <- data_tab3[,c(4,6)]
For Ireland, Slovenia, United Kingdom, Switzerland, Norway, Iceland, Montenegro, Serbia and Kosovo (countr_tab3_1
) we need 2018 monthly data
data_month
) for a graph before. Build the subset: data_tab3_1
countr_tab3_1 <- c("IE","SI","UK","CH","NO","IS","ME","RS","XK")
data_tab3_1 <- subset(data_month,
data_month$geo %in% countr_tab3_1 &
data_month$year==2018 &
data_month$c_resid=="TOTAL" &
data_month$nace_r2=="I551-I553"&
data_month$unit=="NR")
data_tab3_1 <- aggregate(data_tab3_1$values, by = list(geo = data_tab3_1$geo), FUN = sum)
names(data_tab3_1)[2] <- "values"
For Turkey (countr_tab3_2
) we need data from 2016. Build the subset: data_tab3_2
countr_tab3_2 <- c("TR")
data_tab3_2 <- subset(data,
data$geo %in% countr_tab3_2 &
data$year==2016 &
data$c_resid=="TOTAL" &
data$nace_r2=="I551-I553"&
data$unit=="NR")
data_tab3_2 <- data_tab3_2[,c(4,6)]
Match the three calculated datasets to data_tab_col3
and bring the country names in the order the graphic shows
data_tab_col3 <- rbind(data_tab3,data_tab3_1,data_tab3_2)
data_tab_col3 <- data_tab_col3[order(factor(data_tab_col3$geo, levels= countr_0)),]
head(data_tab_col3)
geo | values |
---|---|
<fct> | <dbl> |
BE | 41320284 |
BG | 26845013 |
CZ | 55513922 |
DK | 33280395 |
DE | 419556284 |
EE | 6630315 |
countr_names
table
countr_names <- c("Belgium","Bulgaria","Czechia","Denmark","Germany","Estonia", "Ireland", "Greece", "Spain", "France",
"Croatia", "Italy", "Cyprus", "Latvia", "Lithuania", "Luxembourg", "Hungary", "Malta", "Netherlands",
"Austria", "Poland", "Portugal", "Romania", "Slovenia", "Slovakia", "Finland", "Sweden",
"United Kingdom", "Iceland", "Liechtenstein", "Norway", "Switzerland", "Montenegro", "North Macedonia",
"Serbia", "Turkey", "Kosovo")
table <- cbind(countr_names, data_tab_col1[,2], data_tab_col2[,2], data_tab_col3[,2])
head(table)
countr_names | values | values | values | |
---|---|---|---|---|
<fct> | <dbl> | <dbl> | <dbl> | |
1 | Belgium | 9211 | 389826 | 41320284 |
2 | Bulgaria | 3458 | 335597 | 26845013 |
3 | Czechia | 9426 | 741235 | 55513922 |
4 | Denmark | 1167 | 426075 | 33280395 |
5 | Germany | 50020 | 3473630 | 419556284 |
6 | Estonia | 1535 | 61193 | 6630315 |
Calculate the EU-27 value by summing up
EU_27
EU_27 <- c("AT","BE", "BG","HR","CY", "CZ","DK", "DE", "EE","FI","FR","EL","HU","IE",
"IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES","SE")
table
from above (the three columns) with shortcuts instead of full names to exclude the not-EU countries: tab_prep
EU_27
vectorcountr_0
vector, which we create in the first stepcountr_0
are within EU_27: EU_27_prep
table
tab_prep <- cbind(countr_0,data_tab_col1[,2], data_tab_col2[,2], data_tab_col3[,2])
EU_27_prep <- tab_prep$countr_0 %in% EU_27
tab_EU <- c("EU-27",sapply(table[EU_27_prep,2:4],sum))
table
from above: table_final
table_final <- rbind(tab_EU,table)
names(table_final) <- c("", "Number of establishments (units)", "Number of bed places (thousands)",
"Nights spent by residents and non-residents (millions)")
table_final[,1] <- as.character(table_final[,1])
table_final[1,1] <- "EU-27"
table_final[,1] <- factor(table_final[,1])
table_final[,3] <- round(as.numeric(table_final[,3])/ 1000,0)
table_final[,4] <- round(as.numeric(table_final[,4])/ 1000000,1)
as.matrix(table_final)
Warning message in `[<-.factor`(`*tmp*`, ri, value = "EU-27"): “invalid factor level, NA generated”
Number of establishments (units) | Number of bed places (thousands) | Nights spent by residents and non-residents (millions) | |
---|---|---|---|
EU-27 | 600157 | 28294 | 2791.2 |
Belgium | 9211 | 390 | 41.3 |
Bulgaria | 3458 | 336 | 26.8 |
Czechia | 9426 | 741 | 55.5 |
Denmark | 1167 | 426 | 33.3 |
Germany | 50020 | 3474 | 419.6 |
Estonia | 1535 | 61 | 6.6 |
Ireland | 3145 | 200 | 33.4 |
Greece | 38180 | 1340 | 123.1 |
Spain | 51418 | 3600 | 466.9 |
France | 29652 | 5112 | 442.8 |
Croatia | 108212 | 1116 | 89.6 |
Italy | 216141 | 5113 | 428.8 |
Cyprus | 802 | 87 | 17.2 |
Latvia | 1145 | 54 | 5.4 |
Lithuania | 3616 | 90 | 8.1 |
Luxembourg | 425 | 64 | 2.9 |
Hungary | 4587 | 419 | 32.8 |
Malta | 211 | 45 | 10.1 |
Netherlands | 9145 | 1398 | 116.1 |
Austria | 21494 | 1046 | 125.2 |
Poland | 11076 | 799 | 88.9 |
Portugal | 5964 | 649 | 74.4 |
Romania | 7720 | 349 | 28.5 |
Slovenia | 3699 | 117 | 15.7 |
Slovakia | 3087 | 191 | 15.2 |
Finland | 1372 | 260 | 22.2 |
Sweden | 4249 | 819 | 60.9 |
United Kingdom | 84580 | 3944 | 354.4 |
Iceland | 1120 | 47 | 8.6 |
Liechtenstein | 96 | 2 | 0.2 |
Norway | 2351 | 588 | 33.8 |
Switzerland | 39057 | 665 | 55.3 |
Montenegro | 382 | 43 | 4.2 |
North Macedonia | 494 | 48 | 2.2 |
Serbia | 3651 | 109 | 9.3 |
Turkey | NA | NA | 112.5 |
Kosovo | 217 | 8 | 0.3 |
EU_27 <- c("AT","BE", "BG","HR","CY", "CZ","DK", "DE", "EE","FI","FR","EL","HU","IE",
"IT","LV","LT","LU","MT","NL","PL","PT","RO","SK","SI","ES","SE")
Build a loop for getting the data for the three lines
night
data_fig1_final
data
, we see that there are some years without datapoints, therefore we can omit themdata_fig1
in each intermediate step to the empty data framenight <- c("FOR","NAT","TOTAL")
data_fig1_final <- list(year = c(2018,2017,2016,2015,2014,2013,2012,2011,2010,2009,2008,2007,2006,2005),
FOR = matrix(ncol=2, nrow=14),
NAT = matrix(ncol=2, nrow=14),
TOTAL = matrix(ncol=2, nrow=14))
# build the loop
for (i in 1:3) {
data_fig1 <- subset(data,
data$geo %in% EU_27 &
data$year %in% c(2005,2006, 2007, 2008,2009,2010,2011,2012, 2013,2014,2015,2016,2017,2018) &
data$c_resid== night[i] & #here we have the essential difference between the columns we create
data$unit=="NR" &
data$nace_r2=="I551-I553")
data_fig1 <- aggregate(data_fig1$values, by = list(year = data_fig1$year), FUN = sum)
# print(data_fig1) #if you want to see the intermediate steps
data_fig1_final[[i+1]] <- data_fig1
}
data_fig1_final <- as.data.frame(data_fig1_final)
data_fig1_final <- data_fig1_final[,c(2,3,5,7)] #attention! the first year-column is wrongly in an decreasing order!
data_fig1_final <- data_fig1_final[order(data_fig1_final[,1]),]
data_fig1_final
FOR.year | FOR.x | NAT.x | TOTAL.x | |
---|---|---|---|---|
<int> | <dbl> | <dbl> | <dbl> | |
1 | 2005 | 861934580 | 1143692910 | 2005627490 |
2 | 2006 | 897421835 | 1178407554 | 2075829389 |
3 | 2007 | 923129509 | 1167029322 | 2090158831 |
4 | 2008 | 917291027 | 1168537632 | 2085828659 |
5 | 2009 | 865984766 | 1161870294 | 2027855060 |
6 | 2010 | 921187045 | 1240792245 | 2161979290 |
7 | 2011 | 988093865 | 1257258087 | 2245351952 |
8 | 2012 | 1028103295 | 1254140891 | 2282244186 |
9 | 2013 | 1073638692 | 1256932157 | 2330570849 |
10 | 2014 | 1107272985 | 1278406482 | 2385679467 |
11 | 2015 | 1153427648 | 1332604334 | 2486031982 |
12 | 2016 | 1212449118 | 1373256963 | 2585706081 |
13 | 2017 | 1274205302 | 1410126730 | 2684332032 |
14 | 2018 | 1301381594 | 1440774340 | 2742155934 |
Now we have to calculate the changes from year to year
index
data_fig1_final_perc
index <- data_fig1_final[1,2:4]
data_fig1_final_perc <- as.data.frame(matrix(ncol=4, nrow=14))
for (i in 1:14) {
data_fig1_final_perc[i,2:4] <- (data_fig1_final[i,2:4]/index)*100 }
data_fig1_final_perc$V1 <- c(2005,2006, 2007, 2008,2009,2010,2011,2012, 2013,2014,2015,2016,2017,2018)
head(data_fig1_final_perc)
V1 | V2 | V3 | V4 | |
---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | |
1 | 2005 | 100.0000 | 100.0000 | 100.0000 |
2 | 2006 | 104.1172 | 103.0353 | 103.5002 |
3 | 2007 | 107.0997 | 102.0404 | 104.2147 |
4 | 2008 | 106.4223 | 102.1723 | 103.9988 |
5 | 2009 | 100.4699 | 101.5894 | 101.1083 |
6 | 2010 | 106.8744 | 108.4900 | 107.7957 |
Finally use the modified dataset data_fig1_final_perc
and build the graph by using ggplot
ggplot(data_fig1_final_perc,aes(x=V1)) +
geom_line(aes(y=V2, group=1, color="line"), color="lightblue", size=1) + #we add three lines
geom_line(aes(y=V3, group=1, color="line"), color="darkblue", size=1) +
geom_line(aes(y=V4, group=1, color="line"), color="darkorange", size=2) +
ggtitle("Trends in nights spent at EU-27 tourist accommodation establishments, EU-27, 2005-2018",
subtitle="(index 2005=100)") +
scale_y_continuous(limits = c(100, 155), breaks = seq(100, 155, by = 5)) +
scale_x_discrete(limits = c(2005,2006, 2007, 2008,2009,2010,2011,2012, 2013,2014,2015,2016,2017,2018)) +
ylab(" ") +
xlab(" ") +
theme(panel.grid.major.y = element_line(color="grey"),
panel.background = element_rect(fill = NA),
axis.ticks.y = element_line(colour= "white"))
Warning message: “Continuous limits supplied to discrete scale. Did you mean `limits = factor(...)` or `scale_*_continuous()`?”
It's in principal the same graph like in the abstract part. Check the code there.
label_axes2 <- seq(0, 320, by = 20)
ggplot(data_mod4, aes(x=geo,y=values)) +
geom_bar(stat = "identity", position="dodge", width=0.5, fill="steelblue3") +
scale_y_continuous(limits = c(0, 320000000), breaks = seq(0, 320000000, by = 20000000),
labels=label_axes2) +
ggtitle("Tourism destinations - nights spent at tourist accomodation establishments, 2018",
subtitle = "(million nights spent in the country by non-residents)") +
coord_flip() +
scale_x_discrete(limits = rev(levels(data_mod4$geo)),
labels = c("Kosovo", "North Macedonia", "Serbia", "Montenegro", "Turkey", "Liechtenstein",
"Iceland", "Norway", "Switzerland", "United Kingdom", "Luxembourg", "Lithuania",
"Latvia", "Estonia", "Romania", "Slovakia", "Finland", "Malta", "Slovenia",
"Denmark", "Hungary", "Sweden", "Cyprus", "Ireland", "Poland", "Bulgaria",
"Belgium", "Czechia", "Netherlands", "Portugal", "Croatia", "Germany", "Austria",
"Greece", "France", "Italy", "Spain")) +
ylab(" ") +
xlab(" ") +
theme(panel.grid.major.x = element_line(color="grey"),
panel.background = element_rect(fill = NA),
axis.line = element_line(colour = "white"),
axis.ticks = element_line(colour= "white"))
EU_27_prep2 <- data_mod4$geo %in% EU_27
tab_EU2 <- sapply(as.data.frame(data_mod4[EU_27_prep2,2]),sum)
Add a column with the ratios we need for the pie chart: tab_ratio
EU_27_prep2
(countries outside the EU should be delete)tab_ratio_top9
and summing up the others to "Rest of EU-27" in tab_ratio_other
by using again the sapply commandtab_ratio_other_sum
to the top 9: tab_ratio_new
tab_ratio <- cbind(data_mod4,data_mod4[2]/tab_EU2*100)
tab_ratio <- tab_ratio[EU_27_prep2,]
tab_ratio <- tab_ratio[order(tab_ratio[,3], decreasing=T),]
tab_ratio_top9 <- tab_ratio[1:9,]
tab_ratio_other <- tab_ratio[10:27,]
tab_ratio_other_sum <- sapply(as.data.frame(tab_ratio_other[,3]),sum)
tab_ratio_new <- rbind(tab_ratio_top9[,c(1,3)], tab_ratio_other_sum)
tab_ratio_new
Warning message in `[<-.factor`(`*tmp*`, ri, value = 15.9651602486859): “invalid factor level, NA generated”
geo | values | |
---|---|---|
<fct> | <dbl> | |
19 | ES | 22.645198 |
24 | IT | 16.287560 |
21 | FR | 10.586126 |
18 | EL | 7.716184 |
10 | AT | 6.717353 |
15 | DE | 6.541928 |
22 | HR | 6.254410 |
33 | PT | 3.750269 |
31 | NL | 3.535812 |
101 | NA | 15.965160 |
Plot the pie chart
"%"
behind it: label_pie1
label_pie2
label_pie3
label_pie1 <- paste(round(tab_ratio_new$values,0), "%")
label_pie2 <- c("Spain","Italy","France","Greece","Austria","Germany","Croatia","Portugal","Netherlands","Rest of EU-27")
label_pie3 <- paste(label_pie2,"\n",label_pie1) #we can add "/n" to achieve a line break
ggplot(tab_ratio_new, aes(x="", y=values, fill=geo)) +
geom_bar(stat = "identity", width=1) +
coord_polar("y", direction = -1, start=0) +
geom_text(label = label_pie3, position = position_stack(vjust = 0.5),size=2.5) +
ylab(" ") +
xlab(" ") +
ggtitle("Share of nights spent at EU-27 tourist accommodation by tourists travelling outside their own country
of residence, 2018",
subtitle = "(% of all nights spent in EU-27 tourist accommodation establishements)") +
theme_classic() +
theme(legend.position = "none",
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank())
bev <- as_tibble(get_eurostat_data(id="demo_gind"))
bev_subs <- subset(bev,
bev$geo %in% countr_0 &
bev$indic_de=="AVG" &
bev$time=="2018")
bev_subs <- bev_subs[,c(2,4)]
countr_0
) like the table above, that we can add the columns with the tourist accomodationdata_tab_col3
and add the needed ratio-columnbev_subs <- bev_subs[order(factor(bev_subs$geo, levels= countr_0)),]
bev_subs <- cbind(bev_subs,data_tab_col3)
bev_subs <- cbind(bev_subs,bev_subs[,4]/bev_subs[,2])
names(bev_subs)[5] <- "values"
bev_subs <- bev_subs[,c(1,5)]
countr_fig4
to bring the countries in the order the graph showscountr_fig4 <- c("HR", "MT","CY","AT","EL","ES","SI","PT","IT","IE","NL","FR","SE","DK","CZ","DE","EE","LU","FI","BG",
"BE","HU","LT","SK","LV","PL","RO", "UK", "IS","CH","NO","LI","ME","TR","RS","MK","XK")
bev_subs <- bev_subs[order(factor(bev_subs$geo, levels= countr_fig4)),]
Calculate the EU-27 value by summing up
EU_27
: EU_27_prep_2_1
bev_subs
"EU-27"
get generated: we solve this problem two steps later"EU-27"
and this only works by converting it from a factor first, then doing the change and re-convert it backEU_27_prep_2_1 <- bev_subs$geo %in% EU_27
tab_EU2_1 <- c("EU-27",sapply(as.data.frame(bev_subs[EU_27_prep_2_1,2]),mean))
bev_subs <- rbind(tab_EU2_1,bev_subs)
bev_subs[,1] <- as.character(bev_subs[,1])
bev_subs[1,1] <- "EU-27"
bev_subs[,1] <- factor(bev_subs[,1])
head(bev_subs)
Warning message in `[<-.factor`(`*tmp*`, ri, value = "EU-27"): “invalid factor level, NA generated”
geo | values | |
---|---|---|
<fct> | <chr> | |
1 | EU-27 | 7.37476857884317 |
11 | HR | 21.894524392122 |
18 | MT | 20.8699337639024 |
13 | CY | 19.7274029156342 |
20 | AT | 14.1653608424209 |
8 | EL | 11.4682317386886 |
Plot the graph via ggplot
"geo"
bev_subs$geo <- factor(bev_subs$geo,
levels = c("EU-27","HR", "MT","CY","AT","EL","ES","SI","PT","IT","IE","NL","FR","SE","DK","CZ","DE",
"EE","LU","FI","BG","BE","HU","LT","SK","LV","PL","RO", "UK", "IS","CH","NO","LI",
"ME","TR","RS","MK","XK"))
bev_subs$values <- as.numeric(as.character(bev_subs$values))
ggplot(bev_subs, aes(x=geo,y=values)) +
geom_bar(stat = "identity", position="dodge", width=0.5, fill="steelblue3") +
scale_y_continuous(limits = c(0, 30), breaks = seq(0, 30, by = 5)) +
ggtitle("Tourism intensity, 2018",
subtitle="(nights spent by residents and non-residents at tourist accommodation
establishments per inhabitant)") +
coord_flip() +
scale_x_discrete(limits = rev(levels(bev_subs$geo)),
labels = c("Kosovo", "North Macedonia", "Serbia", "Turkey", "Montenegro", "Liechtenstein",
"Norway", "Switzerland", "Iceland", "United Kingdom", "Romania", "Poland",
"Latvia", "Slovakia", "Lithuania", "Hungary", "Belgium", "Bulgaria", "Finland",
"Luxembourg", "Estonia", "Germany", "Czechia", "Denmark", "Sweden", "France",
"Netherlands", "Ireland", "Italy", "Portugal", "Slovenia", "Spain", "Greece",
"Austria", "Cyprus", "Malta", "Croatia", "EU-27")) +
ylab(" ") +
xlab(" ") +
theme(panel.grid.major.x = element_line(color="grey"),
panel.background = element_rect(fill = NA),
axis.line = element_line(colour = "white"),
axis.ticks = element_line(colour= "white"))
First we create the table and afterwards figure 5, because it is easier to build the graph with the data from the table.
We start with the first three columns, which are based on the same dataset
data_2
data_2 <- as_tibble(get_eurostat_data(id="tour_dem_tttot"))
data_2$year <- as.integer(substr(data_2$time,1,4))
Build a loop
duration_vec
data_tab2_part1
duration_vec <- c("N_GE1","N1-3","N_GE4")
data_tab2_part1 <- as.data.frame(matrix(ncol = 3, nrow = 30))
Within the loop create several subsets from data_2
data_2_col1_1
: exclude the countries countr_tab2_1_1
, which need data from 2017data_2_col1_2
: for Poland and Romania in countr_tab2_1_2
we need data from 2017data_2_col1_3
: for United Kingdom we have NAdata_tab2_col1
the three calculated subsets in every step within the loopcountr_0
from beginningdata_tab2_col1
three times for each case of duration_vecdata_tab2_part1
we create beforefor (i in 1:3) {
countr_tab2_1_1 <- c("BE","BG","CZ","DK","DE","EE","IE","EL","ES","FR","HR","IT","CY","LV","LT","LU","HU","MT","NL",
"AT","PT","SI","SK","FI","SE","UK","IS","LI","NO","CH","ME","MK","RS","TR","XK")
data_2_col1_1 <- subset(data_2,
data_2$geo %in% countr_tab2_1_1 &
data_2$year==2018 &
data_2$purpose=="TOTAL"&
data_2$duration==duration_vec[i]&
data_2$unit=="THS" &
data_2$partner=="WORLD")
data_2_col1_1 <- data_2_col1_1[,c(5,7)]
countr_tab2_1_2 <- c("PL","RO")
data_2_col1_2 <- subset(data_2,
data_2$geo %in% countr_tab2_1_2 &
data_2$year==2017 &
data_2$purpose=="TOTAL"&
data_2$duration==duration_vec[i]&
data_2$unit=="THS" &
data_2$partner=="WORLD")
data_2_col1_2 <- data_2_col1_2[,c(5,7)]
data_2_col1_3 <- data.frame( geo = "UK", values= NA)
data_tab2_col1 <- rbind(data_2_col1_1,data_2_col1_2,data_2_col1_3)
data_tab2_col1 <- data_tab2_col1[order(factor(data_tab2_col1$geo, levels= countr_0)),]
data_tab2_col1$values <- round(data_tab2_col1$values,0)
data_tab2_col1
# print(data_tab2_col1)
data_tab2_part1[,i] <- data_tab2_col1[,2]
}
head(data_tab2_part1)
V1 | V2 | V3 | |
---|---|---|---|
<dbl> | <dbl> | <dbl> | |
1 | 16893 | 7058 | 9835 |
2 | 5090 | 2748 | 2343 |
3 | 34569 | 21380 | 13189 |
4 | 18875 | 10380 | 8495 |
5 | 267879 | 132289 | 135591 |
6 | 6468 | 5109 | 1358 |
Create a column with the names of the countries (countr_names2
), this will be the first column of the complete table 2. Add this column to the three calculated columns from above: table2_vers1
countr_names2 <- c("Belgium","Bulgaria","Czechia","Denmark","Germany","Estonia", "Ireland", "Greece", "Spain", "France",
"Croatia", "Italy", "Cyprus", "Latvia", "Lithuania", "Luxembourg", "Hungary", "Malta", "Netherlands",
"Austria", "Poland", "Portugal", "Romania", "Slovenia", "Slovakia", "Finland", "Sweden",
"United Kingdom", "Norway", "Switzerland")
table2_vers1 <- cbind(countr_names2, data_tab2_part1)
head(table2_vers1)
countr_names2 | V1 | V2 | V3 | |
---|---|---|---|---|
<fct> | <dbl> | <dbl> | <dbl> | |
1 | Belgium | 16893 | 7058 | 9835 |
2 | Bulgaria | 5090 | 2748 | 2343 |
3 | Czechia | 34569 | 21380 | 13189 |
4 | Denmark | 18875 | 10380 | 8495 |
5 | Germany | 267879 | 132289 | 135591 |
6 | Estonia | 6468 | 5109 | 1358 |
Continuing with column four and five, which are based on the same dataset like column one to three (data_2
), but with different arguments.
duration_vec2
data_tab2_part2
We want to calculate the ratio, also for the EU (this we will do later on).
EU_tab2_sum1
(first we create the empty vector and assign the sums during the loop)duration_vec2 <- c("N1-3","N_GE4")
data_tab2_part2 <- as.data.frame(matrix(ncol = 2, nrow = 30))
EU_tab2_sum1 <- as.data.frame(matrix(ncol = 2, nrow = 1))
Within the loop create several subsets
data_2_col2_1
: exclude the countries (countr_tab2_1_1
), which need data from 2017countr_tab2_1_2
we need data from 2017. Assign in data_2_col2_2
data_2_col2_3
data_tab2_col2
countr_0
from beginning)Within the loop calculate the ratio for each country and a sum for the EU-value
EU_27_prep_col3
tab_EU3_col3
data_tab2_part2
for (i in 1:2) {
countr_tab2_1_1 <- c("BE","BG","CZ","DK","DE","EE","IE","EL","ES","FR","HR","IT","CY","LV","LT","LU","HU","MT","NL",
"AT","PT","SI","SK","FI","SE","UK","IS","LI","NO","CH","ME","MK","RS","TR","XK")
data_2_col2_1 <- subset(data_2,
data_2$geo %in% countr_tab2_1_1 &
data_2$year==2018 &
data_2$purpose=="TOTAL"&
data_2$duration==duration_vec2[i]&
data_2$unit=="THS" &
data_2$partner=="DOM")
data_2_col2_1 <- data_2_col2_1[,c(5,7)]
countr_tab2_1_2 <- c("PL","RO")
data_2_col2_2 <- subset(data_2,
data_2$geo %in% countr_tab2_1_2 &
data_2$year==2017 &
data_2$purpose=="TOTAL"&
data_2$duration==duration_vec2[i]&
data_2$unit=="THS" &
data_2$partner=="DOM")
data_2_col2_2 <- data_2_col2_2[,c(5,7)]
data_2_col2_3 <- data.frame( geo = "UK", values= NA)
data_tab2_col2 <- rbind(data_2_col2_1,data_2_col2_2,data_2_col2_3)
data_tab2_col2 <- data_tab2_col2[order(factor(data_tab2_col2$geo, levels= countr_0)),]
data_tab2_col2$values <- round(data_tab2_col2$values,0)
EU_27_prep_col3 <- data_tab2_col2$geo %in% EU_27
tab_EU3_col3 <- sapply(data_tab2_col2[EU_27_prep_col3,2],sum)
EU_tab2_sum1[,i] <- tab_EU3_col3
data_tab2_part2[,i] <- round(data_tab2_col2[,2]/table2_vers1[,2]*100,1)
}
head(data_tab2_part2)
V1 | V2 | |
---|---|---|
<dbl> | <dbl> | |
1 | 15.7 | 6.8 |
2 | 48.0 | 33.5 |
3 | 55.7 | 23.0 |
4 | 44.0 | 19.2 |
5 | 40.0 | 19.5 |
6 | 63.5 | 4.5 |
Continue with column six and seven, which are based on the same data like column four to five, the only difference is the variable "partner".
duration_vec2 <- c("N1-3","N_GE4")
data_tab2_part3 <- as.data.frame(matrix(ncol = 2, nrow = 30))
EU_tab2_sum2 <- as.data.frame(matrix(ncol = 2, nrow = 1))
for (i in 1:2) {
countr_tab2_1_1 <- c("BE","BG","CZ","DK","DE","EE","IE","EL","ES","FR","HR","IT","CY","LV","LT","LU","HU","MT","NL",
"AT","PT","SI","SK","FI","SE","UK","IS","LI","NO","CH","ME","MK","RS","TR","XK")
data_2_col3_1 <- subset(data_2,
data_2$geo %in% countr_tab2_1_1 &
data_2$year==2018 &
data_2$purpose=="TOTAL"&
data_2$duration==duration_vec2[i]&
data_2$unit=="THS" &
data_2$partner=="OUT") #the only difference to column four and five
data_2_col3_1 <- data_2_col3_1[,c(5,7)]
countr_tab2_1_2 <- c("PL","RO")
data_2_col3_2 <- subset(data_2,
data_2$geo %in% countr_tab2_1_2 &
data_2$year==2017 &
data_2$purpose=="TOTAL"&
data_2$duration==duration_vec2[i]&
data_2$unit=="THS" &
data_2$partner=="OUT") #the only difference to column four and five
data_2_col3_2 <- data_2_col3_2[,c(5,7)]
data_2_col3_3 <- data.frame( geo = "UK", values= NA)
data_tab2_col3 <- rbind(data_2_col3_1,data_2_col3_2,data_2_col3_3)
data_tab2_col3 <- data_tab2_col3[order(factor(data_tab2_col3$geo, levels= countr_0)),]
data_tab2_col3$values <- round(data_tab2_col3$values,0)
EU_27_prep_col3 <- data_tab2_col3$geo %in% EU_27
tab_EU3_col3 <- sapply(data_tab2_col3[EU_27_prep_col3,2],sum)
EU_tab2_sum2[,i] <- tab_EU3_col3
data_tab2_part3[,i] <- round(data_tab2_col3[,2]/table2_vers1[,2]*100,1)
}
head(data_tab2_part3)
V1 | V2 | |
---|---|---|
<dbl> | <dbl> | |
1 | 26.1 | 51.4 |
2 | 6.0 | 12.5 |
3 | 6.2 | 15.2 |
4 | 11.0 | 25.8 |
5 | 9.4 | 31.1 |
6 | 15.5 | 16.5 |
Match table2_vers1
with the remaining columns (i.e. column 4-7; data_tab2_part2
,data_tab2_part3
) to table2_vers2
table2_vers2 <- cbind(table2_vers1, data_tab2_part2, data_tab2_part3)
head(table2_vers2)
countr_names2 | V1 | V2 | V3 | V1 | V2 | V1 | V2 | |
---|---|---|---|---|---|---|---|---|
<fct> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
1 | Belgium | 16893 | 7058 | 9835 | 15.7 | 6.8 | 26.1 | 51.4 |
2 | Bulgaria | 5090 | 2748 | 2343 | 48.0 | 33.5 | 6.0 | 12.5 |
3 | Czechia | 34569 | 21380 | 13189 | 55.7 | 23.0 | 6.2 | 15.2 |
4 | Denmark | 18875 | 10380 | 8495 | 44.0 | 19.2 | 11.0 | 25.8 |
5 | Germany | 267879 | 132289 | 135591 | 40.0 | 19.5 | 9.4 | 31.1 |
6 | Estonia | 6468 | 5109 | 1358 | 63.5 | 4.5 | 15.5 | 16.5 |
Now we add the last column, which has another dataset data_3
data_3 <- as_tibble(get_eurostat_data(id="tour_dem_totot"))
data_3$year <- as.integer(substr(data_3$time,1,4))
Exclude the countries, which need data from 2016 and 2017 (countr_tab2_2_1
). Build the subset data_2_col4_1
countr_tab2_2_1 <- c("BE","BG","CZ","DK","DE","EE","IE","EL","ES","FR","HR","IT","CY","LT","LU","HU","MT","NL",
"PL","PT","RO","SI","SK","FI","SE","IS","LI","NO","CH","ME","RS","TR")
data_2_col4_1 <- subset(data_3,
data_3$geo %in% countr_tab2_2_1 &
data_3$year==2018 &
data_3$duration=="N_GE1"&
data_3$unit=="PC_POP"&
data_3$partner=="WORLD")
data_2_col4_1 <- data_2_col4_1[,c(4,6)]
For United Kingdom (countr_tab2_2_2
) we need data from 2016. Build the subset data_2_col4_2
countr_tab2_2_2 <- c("UK")
data_2_col4_2 <- subset(data_3,
data_3$geo %in% countr_tab2_2_2 &
data_3$year==2016 &
data_3$duration=="N_GE1"&
data_3$unit=="PC_POP"&
data_3$partner=="WORLD")
data_2_col4_2 <- data_2_col4_2[,c(4,6)]
For Austria and Latvia (countr_tab2_2_3
) we need data from 2017. Build the subset data_2_col4_3
countr_tab2_2_3 <- c("LV","AT")
data_2_col4_3 <- subset(data_3,
data_3$geo %in% countr_tab2_2_3 &
data_3$year==2017 &
data_3$duration=="N_GE1"&
data_3$unit=="PC_POP"&
data_3$partner=="WORLD")
data_2_col4_3 <- data_2_col4_3[,c(4,6)]
match the subsets to data_tab2_part4
bring the country names in the order the table shows (use countr_0
)
here we can keep the country shortcuts, because we need them to calculate the EU-value later
by matching the last column to table2_vers2
, we can delete the shortcut column, because we need the full names here: table2_vers3
data_tab2_part4 <- rbind(data_2_col4_1, data_2_col4_2, data_2_col4_3)
data_tab2_part4 <- data_tab2_part4[order(factor(data_tab2_part4$geo, levels= countr_0)),]
data_tab2_part4$values <- round(data_tab2_part4$values, 1)
table2_vers3 <- cbind(table2_vers2, data_tab2_part4[,2])
head(table2_vers3)
countr_names2 | V1 | V2 | V3 | V1 | V2 | V1 | V2 | values | |
---|---|---|---|---|---|---|---|---|---|
<fct> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
1 | Belgium | 16893 | 7058 | 9835 | 15.7 | 6.8 | 26.1 | 51.4 | 67.7 |
2 | Bulgaria | 5090 | 2748 | 2343 | 48.0 | 33.5 | 6.0 | 12.5 | 33.8 |
3 | Czechia | 34569 | 21380 | 13189 | 55.7 | 23.0 | 6.2 | 15.2 | 78.1 |
4 | Denmark | 18875 | 10380 | 8495 | 44.0 | 19.2 | 11.0 | 25.8 | 69.9 |
5 | Germany | 267879 | 132289 | 135591 | 40.0 | 19.5 | 9.4 | 31.1 | 75.3 |
6 | Estonia | 6468 | 5109 | 1358 | 63.5 | 4.5 | 15.5 | 16.5 | 79.5 |
Calculate the EU value
table2_vers3
with shortcuts instead of full names to exclude the not-EU countries by using the EU_27
vector.table2_vers3
we keeped the shortcuts for the countries: "data_tab2_part4[,1]" --> this are the shortcutstable2_vers3
tab_prep3 <- cbind(data_tab2_part4[,1], table2_vers3[,2:9])
tab_prep3
are within EU_27
: EU_27_prep3
table2_vers3
: tab_EU3_1
EU_27_prep3 <- tab_prep3$geo %in% EU_27
tab_EU3_1 <- sapply(table2_vers3[EU_27_prep3,2:4],sum)
EU_tab2_sum1
and EU_tab2_sum2
tab_EU3_2 <- round(EU_tab2_sum1/tab_EU3_1[1]*100,1)
tab_EU3_3 <- round(EU_tab2_sum2/tab_EU3_1[1]*100,1)
tab_EU3_4 <- round(mean(table2_vers3[EU_27_prep3,9]),1)
Match all EU-values in tab_EU3
table2_final
tab_EU3 <- as.numeric(c("EU-27", tab_EU3_1, tab_EU3_2, tab_EU3_3, tab_EU3_4))
table2_final <- rbind(tab_EU3,table2_vers3)
names(table2_final) <- c("",
"All trips",
"Short trips (1-3 nights)",
"Long trips (4+ nights)",
"Short domestic trips (1-3 nights)",
"Long domestic trips (4+ nights)",
"Short outbound trips (1-3 nights)",
"Long outbound trips (4+ nights)",
"Share of the population (aged 15+) taking part in tourism trips for personal purposes (%)")
Warning message in eval(expr, envir, enclos): “NAs introduced by coercion”
table2_final[,1] <- as.character(table2_final[,1])
table2_final[1,1] <- "EU-27"
table2_final[,1] <- factor(table2_final[,1])
as.matrix(table2_final)
All trips | Short trips (1-3 nights) | Long trips (4+ nights) | Short domestic trips (1-3 nights) | Long domestic trips (4+ nights) | Short outbound trips (1-3 nights) | Long outbound trips (4+ nights) | Share of the population (aged 15+) taking part in tourism trips for personal purposes (%) | |
---|---|---|---|---|---|---|---|---|
EU-27 | 1130622 | 619562 | 511063 | 46.0 | 24.5 | 8.8 | 20.7 | 65.1 |
Belgium | 16893 | 7058 | 9835 | 15.7 | 6.8 | 26.1 | 51.4 | 67.7 |
Bulgaria | 5090 | 2748 | 2343 | 48.0 | 33.5 | 6.0 | 12.5 | 33.8 |
Czechia | 34569 | 21380 | 13189 | 55.7 | 23.0 | 6.2 | 15.2 | 78.1 |
Denmark | 18875 | 10380 | 8495 | 44.0 | 19.2 | 11.0 | 25.8 | 69.9 |
Germany | 267879 | 132289 | 135591 | 40.0 | 19.5 | 9.4 | 31.1 | 75.3 |
Estonia | 6468 | 5109 | 1358 | 63.5 | 4.5 | 15.5 | 16.5 | 79.5 |
Ireland | 15941 | 10110 | 5831 | 45.9 | 8.0 | 17.5 | 28.6 | 74.7 |
Greece | 6565 | 1834 | 4731 | 25.0 | 61.7 | 2.9 | 10.4 | 42.8 |
Spain | 156370 | 110284 | 46086 | 66.9 | 22.6 | 3.6 | 6.9 | 67.4 |
France | 220225 | 98403 | 121822 | 37.3 | 38.5 | 7.4 | 16.9 | 74.3 |
Croatia | 5255 | 2754 | 2501 | 31.9 | 28.9 | 20.5 | 18.6 | 46.5 |
Italy | 68186 | 35285 | 32901 | 45.1 | 33.9 | 6.7 | 14.4 | 44.9 |
Cyprus | 3001 | 1717 | 1284 | 44.7 | 7.5 | 12.5 | 35.3 | 69.5 |
Latvia | 3952 | 2952 | 1000 | 58.6 | 6.7 | 16.0 | 18.6 | 57.7 |
Lithuania | 4885 | 2925 | 1960 | 45.1 | 9.0 | 14.8 | 31.1 | 57.3 |
Luxembourg | 2555 | 1167 | 1388 | 1.5 | 0.3 | 44.2 | 54.0 | 81.1 |
Hungary | 19230 | 12506 | 6724 | 46.2 | 15.8 | 18.9 | 19.2 | 57.6 |
Malta | 874 | 391 | 483 | 23.2 | 4.9 | 21.6 | 50.3 | 65.5 |
Netherlands | 44551 | 21955 | 22596 | 37.3 | 15.9 | 12.0 | 34.9 | 85.0 |
Austria | 24391 | 13395 | 10997 | 36.8 | 14.4 | 18.1 | 30.7 | 76.6 |
Poland | 57910 | 30364 | 27546 | 48.3 | 31.1 | 4.1 | 16.5 | 61.9 |
Portugal | 18298 | 12846 | 5452 | 66.3 | 21.8 | 3.9 | 8.0 | 44.8 |
Romania | 17902 | 11291 | 6612 | 62.3 | 31.7 | 0.8 | 5.2 | 27.0 |
Slovenia | 5191 | 3203 | 1988 | 32.1 | 6.5 | 29.6 | 31.8 | 69.3 |
Slovakia | 12423 | 7497 | 4926 | 45.6 | 16.9 | 14.8 | 22.8 | 73.2 |
Finland | 38602 | 27837 | 10765 | 60.4 | 15.0 | 11.7 | 12.9 | 91.0 |
Sweden | 54541 | 31882 | 22659 | 47.2 | 18.3 | 11.3 | 23.3 | 84.9 |
United Kingdom | NA | NA | NA | NA | NA | NA | NA | 64.1 |
Norway | 27084 | 17813 | 9270 | 53.6 | 16.5 | 12.2 | 17.7 | 92.5 |
Switzerland | 21946 | 9605 | 12341 | 19.6 | 10.8 | 24.2 | 45.5 | 89.8 |
countr_tab2_2_4 <- c("AL","MK")
data_2_col4_4 <- subset(data_3,
data_3$geo %in% countr_tab2_2_4 &
data_3$year==2018 &
data_3$duration=="N_GE1"&
data_3$unit=="PC_POP"&
data_3$partner=="WORLD")
data_2_col4_4 <- data_2_col4_4[,c(4,6)]
countr_fig5
data_tab2_part4.1
and the two additional countries data_2_col4_4
in data_tab2_part5
countr_fig5 <- c("EU-27","FI", "NL","SE","LU","EE","CZ","AT","DE","IE","FR","SK","DK","CY","SI","BE","ES","MT","PL","LV",
"HU","LT","HR","IT","PT","EL","BG","RO", "UK", "NO","CH","AL","MK")
data_tab2_part4.1 <- data_tab2_part4[order(factor(data_tab2_part4$geo, levels= countr_fig5)),]
data_tab2_part5 <- rbind(tab_EU3[9], data_tab2_part4.1, data_2_col4_4)
Warning message in `[<-.factor`(`*tmp*`, ri, value = 65.1): “invalid factor level, NA generated”
We have to convert the class, because we have to add "EU-27" and this only works by converting it from a factor first, then doing the change and re-convert it back.
data_tab2_part5[,1] <- as.character(data_tab2_part5[,1])
data_tab2_part5[1,1] <- "EU-27"
data_tab2_part5[,1] <- factor(countr_fig5)
head(data_tab2_part5)
geo | values |
---|---|
<fct> | <dbl> |
EU-27 | 65.1 |
FI | 91.0 |
NL | 85.0 |
SE | 84.9 |
LU | 81.1 |
EE | 79.5 |
Bring the countries in the order the graph shows. You can do this due to modify the levels of the variable "geo".
data_tab2_part5$geo <- factor(data_tab2_part5$geo,
levels = c("EU-27","FI", "NL","SE","LU","EE","CZ","AT","DE","IE","FR","SK","DK","CY","SI","BE",
"ES","MT","PL","LV","HU","LT","HR","IT","PT","EL","BG","RO", "UK", "NO","CH","AL","MK"))
Plot the graph via ggplot
ggplot(data_tab2_part5, aes(x=geo,y=values)) +
geom_bar(stat = "identity", position="dodge", width=0.5, fill="steelblue3") +
scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, by = 10)) +
ggtitle("Share of population participating in tourism, 2018",
subtitle="(% of population aged 15 years or more)") +
coord_flip()+
scale_x_discrete(limits = rev(levels(data_tab2_part5$geo)),
labels = c("North Macedonia", "Albania", "Switzerland", "Norway", "United Kingdom",
"Romania", "Bulgaria", "Greece", "Portugal", "Italy", "Croatia", "Lithuania",
"Hungary", "Latvia", "Poland", "Malta", "Spain", "Belgium", "Slovenia", "Cyprus",
"Denmark", "Slovakia", "France", "Ireland", "Germany", "Austria", "Czechia",
"Estonia", "Luxembourg", "Sweden", "Netherlands", "Finland", "EU-27")) +
ylab(" ") +
xlab(" ") +
theme(panel.grid.major.x = element_line(color="grey"),
panel.background = element_rect(fill = NA),
axis.line = element_line(colour = "white"),
axis.ticks = element_line(colour= "white"))
data_fig6 <- as_tibble(get_eurostat_data(id="tour_dem_tntot"))
data_fig6$year <- as.integer(substr(data_fig6$time,1,4))
Create a new contry vector countr_fig6
and exclude the countries, which need data from 2017. Build the subset data_fig6_subs
countr_fig6 <- countr <- c("ES","IT","FR","EL","AT","DE","HR","PT","NL","CZ","BE","BG","IE","CY","SE","HU","DK","SI",
"MT","FI","SK","EE","LV","LT","LU","UK","CH","NO","IS","LI","TR","ME","RS","MK","XK")
data_fig6_subs <- subset(data_fig6,
data_fig6$geo %in% countr_fig6 &
data_fig6$year==2018 &
data_fig6$purpose=="TOTAL" &
data_fig6$duration=="N_GE1"&
data_fig6$unit=="NR"&
data_fig6$partner=="OUT")
data_fig6_subs <- data_fig6_subs[,c(5,7)]
For Poland (countr_fig6_2
) we need data from 2017. Build the subset data_fig6_subs2
countr_fig6_2 <- c("PL","RO")
data_fig6_subs2 <- subset(data_fig6,
data_fig6$geo %in% countr_fig6_2 &
data_fig6$year==2017 &
data_fig6$purpose=="TOTAL" &
data_fig6$duration=="N_GE1"&
data_fig6$unit=="NR"&
data_fig6$partner=="OUT")
data_fig6_subs2 <- data_fig6_subs2[,c(5,7)]
Match the two calculated subsets in data_fig6_subs
data_fig6_subs <- rbind(data_fig6_subs,data_fig6_subs2)
head(data_fig6_subs)
geo | values |
---|---|
<fct> | <dbl> |
AT | 74626464 |
BE | 104276180 |
BG | 6397986 |
CH | 118774558 |
CY | 11955548 |
CZ | 46994452 |
Calculate the EU-27 value by summing up
data_fig6_subs
are within EU_27
: EU_27_prep4
tab_EU4
EU_27_prep4 <- data_fig6_subs$geo %in% EU_27
tab_EU4 <- sapply(as.data.frame(data_fig6_subs[EU_27_prep4,2]),sum)
Add a column with the ratio we need for the pie chart
tab_ratio2
tab_ratio_top9_2
) and summing up the others to "Rest of EU-27" (tab_ratio_other_2_sum
) by using again the sapply commandtab_ratio_new_2
tab_ratio2 <- cbind(data_fig6_subs,data_fig6_subs[2]/tab_EU4*100)
tab_ratio2 <- tab_ratio2[EU_27_prep4,]
tab_ratio2 <- tab_ratio2[order(tab_ratio2[,3], decreasing=T),]
tab_ratio_top9_2 <- tab_ratio2[1:9,]
tab_ratio_other_2 <- tab_ratio2[10:27,]
tab_ratio_other_2_sum <- sapply(as.data.frame(tab_ratio_other_2[,3]),sum)
tab_ratio_new_2 <- rbind(tab_ratio_top9_2[,c(1,3)], tab_ratio_other_2_sum)
tab_ratio_new_2
Warning message in `[<-.factor`(`*tmp*`, ri, value = 15.3142164698757): “invalid factor level, NA generated”
geo | values | |
---|---|---|
<fct> | <dbl> | |
7 | DE | 35.407430 |
13 | FR | 15.544253 |
22 | NL | 7.500622 |
25 | SE | 5.559875 |
11 | ES | 4.806601 |
28 | PL | 4.630022 |
17 | IT | 4.284631 |
2 | BE | 4.052285 |
1 | AT | 2.900065 |
10 | NA | 15.314216 |
Create the labels we need for the pie chart
label_pie2_1
label_pie2_2
label_pie2_3
label_pie2_1 <- paste(round(tab_ratio_new_2$values,0), "%")
label_pie2_2 <- c("Germany","France","Netherlands","Sweden","Spain","Poland","Italy","Belgium",
"Austria","Rest of EU-27")
label_pie2_3 <- paste(label_pie2_2,"\n",label_pie2_1) #we can use a "/n" to get a line break
Create variables for the position of the labels in the pie chart. Additionally change the colors in the vector colors
, if you want.
Plot the pie chart
tab_ratio_new_2 <- tab_ratio_new_2 %>%
mutate(end = 2 * pi * cumsum(values)/sum(values),
start = lag(end, default = 0),
middle = 0.5 * (start + end),
hjust = ifelse(middle > pi, 1, 0),
vjust = ifelse(middle < pi/2 | middle > 3 * pi/2, 0, 1))
# colors <- c("#ffd700","skyblue1","lightblue1","tomato2","tan1","peachpuff1","dodgerblue3","blue","tan")
ggplot(tab_ratio_new_2) +
geom_arc_bar(aes(x0 = 0, y0 = 0, r0 = 0, r = 1,
start = start, end = end, fill = geo)) +
geom_text(aes(x = 1.05 * sin(middle), y = 1.05 * cos(middle), label = label_pie2_3,
hjust = hjust, vjust = vjust)) +
# scale_fill_manual(values=colors) +
coord_fixed() +
scale_x_continuous(limits = c(-1.5, 1.4), #Adjust so labels are not cut off
name = "", breaks = NULL, labels = NULL) +
scale_y_continuous(limits = c(-1.15, 1.05), #Adjust so labels are not cut off
name = "", breaks = NULL, labels = NULL) +
ylab(" ") +
xlab(" ") +
theme_classic() +
theme(legend.position = "none",
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank()) +
ggtitle("Share of nights spent during outbound holidays of Europeans, by country of residence of the tourist, 2018",
subtitle="(% of nights spent abroad by residents of the EU-27)")
data_fig7 <- as_tibble(get_eurostat_data(id="demo_pjanbroad"))
data_fig7$year <- as.integer(substr(data_fig7$time,1,4))
countr_fig7 <- c("LU","SE","CY","NL","IE","DE","FI","EE","BE","AT","DK","SI","MT","FR","LT","SK",
"CZ","LV","HU","PL","HR","ES","IT","PT","BG","EL","RO","CH","NO")
Build the subset data_fig7_subs
data_fig7_subs <- subset(data_fig7,
data_fig7$geo %in% countr_fig7 &
data_fig7$year==2018 &
data_fig7$age %in% c("Y15-64","Y_GE65") &
data_fig7$sex=="T")
data_fig7_subs <- data_fig7_subs[,c(4,6)]
data_fig7_subs <- aggregate(data_fig7_subs$values, by = list(geo = data_fig7_subs$geo), FUN = sum)
Bring the countries in the order the graphik shows (we have to do this for both needed datasets, i.e. data_fig7_subs
and data_fig6_subs
).
data_fig7_final
data_fig6_subs_fig7 <- data_fig6_subs[order(factor(data_fig6_subs$geo, levels= countr_fig7)),]
data_fig7_subs_2 <- data_fig7_subs[order(factor(data_fig7_subs$geo, levels= countr_fig7)),]
data_fig7_final <- cbind(data_fig6_subs_fig7,data_fig7_subs_2[,2])
data_fig7_final$ratio <- round(data_fig7_final[,2]/data_fig7_final[,3],1)
Calculate the EU-27 value by summing up
data_fig7_final
are within EU_27
: EU_27_prep5
tab_EU5
tab_EU5_ratio
and match them to tab_EU5_final
(at this stage we get the warning message, that one NA for "Eu-27" get generated: we solve this problem two steps later)EU_27_prep5 <- data_fig7_final$geo %in% EU_27
tab_EU5 <- sapply(as.data.frame(data_fig7_final[EU_27_prep5,2:3]),sum)
tab_EU5_ratio <- round(tab_EU5[1]/tab_EU5[2],1)
tab_EU5_final <- c("EU-27",tab_EU5,tab_EU5_ratio)
data_fig7_final
data_fig7_final <- rbind(tab_EU5_final,data_fig7_final)
data_fig7_final[,1] <- as.character(data_fig7_final[,1])
data_fig7_final[1,1] <- "EU-27"
data_fig7_final[,1] <- factor(data_fig7_final[,1])
head(data_fig7_final)
Warning message in `[<-.factor`(`*tmp*`, ri, value = "EU-27"): “invalid factor level, NA generated”
geo | values | data_fig7_subs_2[, 2] | ratio | |
---|---|---|---|---|
<fct> | <chr> | <chr> | <chr> | |
1 | EU-27 | 2573268471 | 378201085 | 6.8 |
2 | LU | 15819287 | 504883 | 31.3 |
3 | SE | 143070504 | 8325565 | 17.2 |
4 | CY | 11955548 | 723947 | 16.5 |
5 | NL | 193011133 | 14418460 | 13.4 |
6 | IE | 50197497 | 3823944 | 13.1 |
"geo"
Plot the bar chart
data_fig7_final$geo <- factor(data_fig7_final$geo,
levels = c("EU-27","LU","SE","CY","NL","IE","DE","FI","EE","BE","AT","DK","SI","MT","FR","LT",
"SK","CZ","LV","HU","PL","HR","ES","IT","PT","BG","EL","RO","CH","NO"))
data_fig7_final$ratio <- as.numeric(as.character(data_fig7_final$ratio))
ggplot(data_fig7_final, aes(x=geo,y=ratio)) +
geom_bar(stat = "identity", position="dodge", width=0.5, fill="steelblue3") +
theme_classic() +
ggtitle("Country of origin for outbound tourism trips, 2018",
subtitle = "(average nights spent abroad per inhabitant aged 15 years or more)") +
coord_flip()+
scale_x_discrete(limits = rev(levels(data_fig7_final$geo)),
labels = c("Norway", "Switzerland", "Romania", "Greece", "Bulgaria", "Portugal", "Italy",
"Spain", "Croatia", "Poland", "Hungary", "Latvia", "Czechia", "Slovakia",
"Lithuania", "France", "Malta", "Slovenia", "Denmark", "Austria",
"Belgium", "Estonia", "Finland", "Germany", "Ireland", "Netherlands",
"Cyprus", "Sweden", "Luxembourg","EU-27")) +
ylab(" ") +
xlab(" ") +
theme(axis.title.x=element_blank(), #remove labels at x-axes
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
geom_text(aes(label=ratio)) #add the ratio values after the bars
memory.limit(size=9999999999)
data_3 <- as_tibble(get_eurostat_data(id="bop_c6_a",filters=c("CRE","DEB","BAL","MIO_EUR","SD","S1","WRL_REST"),date_filter=c(2013,2018),verbose=TRUE,name=FALSE))
data_3$year <- as.integer(substr(data_3$time,1,4))
Warning message: “'memory.limit()' is Windows-specific”
0.9.8 id%3Dbop_c6_a filters%3Dc("CRE", "DEB", "BAL", "MIO_EUR", "SD", "S1", "WRL_REST") date_filter%3Dc(2013, 2018) verbose%3DTRUE name%3DFALSE Trying to download the DSD from: http://ec.europa.eu/eurostat/SDMX/diss-web/rest/datastructure/ESTAT/DSD_bop_c6_a Trying to extract the DSD from: /tmp/RtmpF5luDK/file1211abd98fa The DSD of the bop_c6_a dataset was cached in memory ('bop_c6_a.dsd' in '.restatapi_env'). date_filter: c20132018 filters: c20132018; is numeric: FALSE; call parents: 26 length df: 3 -*- df: c, 2013, 2018 2013, 2018 date filter length: 2, nchar date_filter: 4,4 date_filter: 2013, 2018 nrow dft: 2 .MIO_EUR.SD.S1.S1.CRE+DEB+BAL.WRL_REST.-?startPeriod=2013-01-01&endPeriod=2013-12-31?startPeriod=2018-01-01&endPeriod=2018-12-31 http://ec.europa.eu/eurostat/SDMX/diss-web/rest/data/bop_c6_a/.MIO_EUR.SD.S1.S1.CRE+DEB+BAL.WRL_REST./?startPeriod=2013-01-01&endPeriod=2013-12-31 http://ec.europa.eu/eurostat/SDMX/diss-web/rest/data/bop_c6_a/.MIO_EUR.SD.S1.S1.CRE+DEB+BAL.WRL_REST./?startPeriod=2013-01-01&endPeriod=2013-12-31 code: - severity: http://ec.europa.eu/eurostat/SDMX/diss-web/rest/data/bop_c6_a/.MIO_EUR.SD.S1.S1.CRE+DEB+BAL.WRL_REST./?startPeriod=2018-01-01&endPeriod=2018-12-31 http://ec.europa.eu/eurostat/SDMX/diss-web/rest/data/bop_c6_a/.MIO_EUR.SD.S1.S1.CRE+DEB+BAL.WRL_REST./?startPeriod=2018-01-01&endPeriod=2018-12-31 code: - severity: restat - nrow:228;ncol:10;colnames:CURRENCY/PARTNER/BOP_ITEM/STK_FLOW/SECTOR10/SECTPART/GEO/FREQ/obsTime/obsValue
countr_tab3_1_1 <- c("BE","BG","CZ","DK","DE","EE","IE","EL","ES","FR","HR","IT","CY","LV","LT","LU","HU","MT","NL",
"AT","PL","PT","RO","SI","SK","FI","SE","UK","IS","NO","CH","ME","MK","AL","RS","TR","BA","XK")
We create first the columns for 2018 and afterwards the same for 2013
cred_deb_bal
data_tab3_part1
Build the subset data_3_col1
within the loop
data_tab3_part1
from abovecred_deb_bal <- c("CRE","DEB","BAL")
data_tab3_part1 <- as.data.frame(matrix(ncol = 4, nrow = 38))
for (i in 1:3) {
data_3_col1 <- subset(data_3,
data_3$geo %in% countr_tab3_1_1 &
data_3$year == 2018 &
data_3$currency =="MIO_EUR"&
data_3$bop_item== "SD" &
data_3$stk_flow == cred_deb_bal[i] &
data_3$partner == "WRL_REST")
data_3_col1 <- data_3_col1[,c(7,9)]
data_tab3_part1[,1] <- data_3_col1[,1]
data_tab3_part1[,1+i] <- data_3_col1[,2]
}
Bring the countries in the order the graphik shows.
Finally we have three of the seven columns filled with data.
data_tab3_part1 <- data_tab3_part1[order(factor(data_tab3_part1$V1, levels= countr_tab3_1_1)),]
head(data_tab3_part1)
V1 | V2 | V3 | V4 | |
---|---|---|---|---|
<fct> | <dbl> | <dbl> | <dbl> | |
4 | BE | 7528.0 | 15654.0 | -8126.0 |
5 | BG | 3808.2 | 1584.4 | 2223.8 |
8 | CZ | 6305.3 | 5055.5 | 1249.8 |
10 | DK | 7718.8 | 8926.6 | -1207.7 |
9 | DE | 36390.0 | 80933.0 | -44543.0 |
11 | EE | 1509.6 | 1244.8 | 264.8 |
Now we create the 2013-columns and repeat the loop from above.
cred_deb_bal2
data_tab3_part2
and assign NA after the loop in data_3_col2_1
countr_tab3_1_2
Build the subset data_3_col2
within the loop and match the loop's result afterwards with the NAs of the three countries (data_tab3_part2
).
cred_deb_bal2 <- c("CRE","DEB")
data_tab3_part2 <- as.data.frame(matrix(ncol = 3, nrow = 35))
countr_tab3_1_2 <- c("BE","BG","CZ","DK","DE","EE","IE","EL","ES","FR","HR","IT","CY","LV","LT","LU","HU","MT","NL",
"AT","PL","RO","SI","SK","FI","SE","UK","IS","CH","ME","MK","RS","TR","BA","XK")
for (i in 1:2) {
data_3_col2 <- subset(data_3,
data_3$geo %in% countr_tab3_1_2 &
data_3$year == 2013 &
data_3$currency =="MIO_EUR"&
data_3$bop_item== "SD" &
data_3$stk_flow == cred_deb_bal2[i] &
data_3$partner == "WRL_REST")
data_3_col2 <- data_3_col2[,c(7,9)]
data_tab3_part2[,1] <- data_3_col2[,1]
data_tab3_part2[,1+i] <- data_3_col2[,2]
}
data_3_col2_1 <- data.frame( V1=c("AL","PT","NO"),
V2=c(NA,NA,NA),
V3=c(NA,NA,NA))
data_tab3_part2 <- rbind(data_tab3_part2,data_3_col2_1)
Bring the countries in the order the graphik shows.
Finally we have two further of the seven columns filled with data.
data_tab3_part2 <- data_tab3_part2[order(factor(data_tab3_part2$V1, levels= countr_tab3_1_1)),]
head(data_tab3_part2)
V1 | V2 | V3 | |
---|---|---|---|
<fct> | <dbl> | <dbl> | |
3 | BE | 10074.0 | 16692.0 |
4 | BG | 2890.8 | 840.0 |
7 | CZ | 5303.1 | 3493.8 |
9 | DK | 5385.1 | 7584.1 |
8 | DE | 31081.0 | 68793.0 |
10 | EE | 1255.8 | 802.8 |
Calculate the "relative to GDP"-column.
data_4
countr_tab3_2_1
data_4_col3_2
Build the subset data_4_col3_1
and match the result afterwards with the NA of Norway to data_tab3_part3
data_4 <- as_tibble(get_eurostat_data(id="nama_10_gdp"))
data_4$year <- as.integer(substr(data_4$time,1,4))
countr_tab3_2_1 <- c("BE","BG","CZ","DK","DE","EE","IE","EL","ES","FR","HR","IT","CY","LV","LT","LU","HU","MT","NL",
"AT","PL","PT","RO","SI","SK","FI","SE","UK","IS","CH","ME","MK","AL","RS","TR","BA","XK")
data_4_col3_1 <- subset(data_4,
data_4$geo %in% countr_tab3_2_1 &
data_4$year == 2018 &
data_4$na_item == "B1GQ" &
data_4$unit == "CP_MEUR")
data_4_col3_1 <- data_4_col3_1[,c(3,5)]
data_4_col3_2 <- data.frame( geo= "NO", values= NA)
data_tab3_part3 <- rbind(data_4_col3_1,data_4_col3_2)
Bring the countries in the order the graphik shows.
data_tab3_part3 <- data_tab3_part3[order(factor(data_tab3_part3$geo, levels= countr_tab3_1_1)),]
head(data_tab3_part3)
geo | values |
---|---|
<fct> | <dbl> |
BE | 460419.4 |
BG | 56111.8 |
CZ | 210927.8 |
DK | 302361.1 |
DE | 3356410.0 |
EE | 25937.6 |
Now we have to calculate the ratio.
col4_1
,col4_2
)col4_1 <- data_tab3_part1[,2]/data_tab3_part3[,2]*100
col4_2 <- data_tab3_part1[,3]/data_tab3_part3[,2]*100
head(col4_1)
head(col4_2)
values | |
---|---|
<dbl> | |
1 | 1.635031 |
2 | 6.786808 |
3 | 2.989317 |
4 | 2.552842 |
5 | 1.084194 |
6 | 5.820122 |
values | |
---|---|
<dbl> | |
1 | 3.399944 |
2 | 2.823649 |
3 | 2.396792 |
4 | 2.952298 |
5 | 2.411297 |
6 | 4.799210 |
countr_names2
table3
countr_names2 <- c("Belgium","Bulgaria","Czechia","Denmark","Germany","Estonia", "Ireland", "Greece", "Spain", "France",
"Croatia", "Italy", "Cyprus", "Latvia", "Lithuania", "Luxembourg", "Hungary", "Malta", "Netherlands",
"Austria", "Poland", "Portugal", "Romania", "Slovenia", "Slovakia", "Finland", "Sweden",
"United Kingdom","Iceland","Norway", "Switzerland","Montenegro","North Macedonia","Albania",
"Serbia","Turkey","Bosnia and Herzegovina","Kosovo")
table3 <- cbind(countr_names2,
round(data_tab3_part2[,2],0),
round(data_tab3_part1[,2],0),
round(col4_1,1),
round(data_tab3_part2[,3],0),
round(data_tab3_part1[,3],0),
round(col4_2,1),
round(data_tab3_part1[,4],0))
names(table3) <- c("",
"Receipts (million EUR) 2013",
"Receipts (million EUR) 2018",
"Relative to GDP 2018 (%)",
"Expenditure (million EUR) 2013",
"Expenditure (million EUR) 2018",
"Relative to GDP 2018 (%)",
"Balance (million EUR) 2018")
head(table3)
Receipts (million EUR) 2013 | Receipts (million EUR) 2018 | Relative to GDP 2018 (%) | Expenditure (million EUR) 2013 | Expenditure (million EUR) 2018 | Relative to GDP 2018 (%) | Balance (million EUR) 2018 | ||
---|---|---|---|---|---|---|---|---|
<fct> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
1 | Belgium | 10074 | 7528 | 1.6 | 16692 | 15654 | 3.4 | -8126 |
2 | Bulgaria | 2891 | 3808 | 6.8 | 840 | 1584 | 2.8 | 2224 |
3 | Czechia | 5303 | 6305 | 3.0 | 3494 | 5056 | 2.4 | 1250 |
4 | Denmark | 5385 | 7719 | 2.6 | 7584 | 8927 | 3.0 | -1208 |
5 | Germany | 31081 | 36390 | 1.1 | 68793 | 80933 | 2.4 | -44543 |
6 | Estonia | 1256 | 1510 | 5.8 | 803 | 1245 | 4.8 | 265 |
We need additionally the values for the EU
data_5
and add the year-columndata_5 <- as_tibble(get_eurostat_data(id="bop_eu6_q",select_freq="A"))
No data retrieved for the given filter(s), because the results are too big to download immediately through the REST API. The whole dataset is downloaded through the raw download and the filters are applied locally.
We create first the columns for 2018 and afterwards the same for 2013
cred_deb_bal <- c("CRE","DEB","BAL")
data_tab3_part4 <- as.data.frame(matrix(ncol = 3, nrow = 1))
for (i in 1:3) {
data_5_col1 <- subset(data_5,
data_5$geo == "EU27_2020" &
data_5$time == 2018 &
data_5$currency =="MIO_EUR"&
data_5$bop_item== "SD" &
data_5$stk_flow == cred_deb_bal[i] &
data_5$partner == "EXT_EU27_2020")
data_5_col1 <- data_5_col1[,10]
data_tab3_part4[,i] <- data_5_col1
}
data_tab3_part4
V1 | V2 | V3 |
---|---|---|
<dbl> | <dbl> | <dbl> |
156587 | 109167.3 | 47419.7 |
Now we create the 2013-columns and repeat the loop from above.
cred_deb_bal2 <- c("CRE","DEB")
data_tab3_part5 <- as.data.frame(matrix(ncol = 2, nrow = 1))
for (i in 1:2) {
data_5_col2 <- subset(data_5,
data_5$geo == "EU27_2020" &
data_5$time == 2013 &
data_5$currency =="MIO_EUR"&
data_5$bop_item== "SD" &
data_5$stk_flow == cred_deb_bal[i] &
data_5$partner == "EXT_EU27_2020")
data_5_col2 <- data_5_col2[,10]
data_tab3_part5[,i] <- data_5_col2
}
data_tab3_part5
V1 | V2 |
---|---|
<dbl> | <dbl> |
126010.7 | 86737.8 |
Calculate the "relative to GDP"-column
data_4
to receive the GDPs per countrydata_tab3_part4
) through the GDP-column calculated just now (data_5_col3
): data_5_col4_1
, data_5_col4_2
data_5_col3 <- subset(data_4,
data_4$geo == "EU27_2020" &
data_4$year == 2018 &
data_4$na_item == "B1GQ"&
data_4$unit == "CP_MEUR")
data_5_col3 <- data_5_col3[,5]
data_5_col4_1 <- data_tab3_part4[,1]/data_5_col3*100
data_5_col4_2 <- data_tab3_part4[,2]/data_5_col3*100
data_5_col4_1
data_5_col4_2
values |
---|
<dbl> |
1.158366 |
values |
---|
<dbl> |
0.8075745 |
Finally put the calculated columns all together and round them correspondingly: table3_EU
Give the columns the correct names
countr_names3 <- c("EU-27")
table3_EU <- cbind(countr_names3,
round(data_tab3_part5[,1],0),
round(data_tab3_part4[,1],0),
round(data_5_col4_1,1),
round(data_tab3_part5[,2],0),
round(data_tab3_part4[,2],0),
round(data_5_col4_2,1),
round(data_tab3_part4[,3],0))
names(table3_EU) <- c("",
"Receipts (million EUR) 2013",
"Receipts (million EUR) 2018",
"Relative to GDP 2018 (%)",
"Expenditure (million EUR) 2013",
"Expenditure (million EUR) 2018",
"Relative to GDP 2018 (%)",
"Balance (million EUR) 2018")
table3_EU
Receipts (million EUR) 2013 | Receipts (million EUR) 2018 | Relative to GDP 2018 (%) | Expenditure (million EUR) 2013 | Expenditure (million EUR) 2018 | Relative to GDP 2018 (%) | Balance (million EUR) 2018 | |
---|---|---|---|---|---|---|---|
<fct> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> |
EU-27 | 126011 | 156587 | 1.2 | 86738 | 109167 | 0.8 | 47420 |
Add the EU values (table3_EU
) to table3
: table3_final
table3_final <- rbind(table3_EU,table3)
as.matrix(table3_final)
Receipts (million EUR) 2013 | Receipts (million EUR) 2018 | Relative to GDP 2018 (%) | Expenditure (million EUR) 2013 | Expenditure (million EUR) 2018 | Relative to GDP 2018 (%) | Balance (million EUR) 2018 | |
---|---|---|---|---|---|---|---|
EU-27 | 126011 | 156587 | 1.2 | 86738 | 109167 | 0.8 | 47420 |
Belgium | 10074 | 7528 | 1.6 | 16692 | 15654 | 3.4 | -8126 |
Bulgaria | 2891 | 3808 | 6.8 | 840 | 1584 | 2.8 | 2224 |
Czechia | 5303 | 6305 | 3.0 | 3494 | 5056 | 2.4 | 1250 |
Denmark | 5385 | 7719 | 2.6 | 7584 | 8927 | 3.0 | -1208 |
Germany | 31081 | 36390 | 1.1 | 68793 | 80933 | 2.4 | -44543 |
Estonia | 1256 | 1510 | 5.8 | 803 | 1245 | 4.8 | 265 |
Ireland | 3370 | 5760 | 1.8 | 4669 | 6399 | 2.0 | -639 |
Greece | 12152 | 16086 | 9.0 | 1835 | 2191 | 1.2 | 13895 |
Spain | 51589 | 69168 | 5.7 | 12359 | 22396 | 1.9 | 46772 |
France | 53103 | 55912 | 2.4 | 31787 | 41398 | 1.8 | 14514 |
Croatia | 6136 | 9489 | 18.3 | 679 | 1434 | 2.8 | 8054 |
Italy | 33063 | 41712 | 2.4 | 20309 | 25484 | 1.4 | 16228 |
Cyprus | 2211 | 2940 | 13.7 | 944 | 1330 | 6.2 | 1610 |
Latvia | 651 | 897 | 3.1 | 538 | 660 | 2.3 | 237 |
Lithuania | 1035 | 1274 | 2.8 | 805 | 1186 | 2.6 | 89 |
Luxembourg | 5387 | 5175 | 8.6 | 1896 | 2813 | 4.7 | 2362 |
Hungary | 4043 | 5850 | 4.3 | 1437 | 2239 | 1.6 | 3612 |
Malta | 1057 | 1574 | 12.6 | 289 | 440 | 3.5 | 1133 |
Netherlands | 10343 | 15057 | 1.9 | 15589 | 17680 | 2.3 | -2623 |
Austria | 15237 | 19559 | 5.1 | 7738 | 10020 | 2.6 | 9539 |
Poland | 8405 | 11759 | 2.4 | 6483 | 8042 | 1.6 | 3717 |
Portugal | NA | 17053 | 8.3 | NA | 4585 | 2.2 | 12468 |
Romania | 1392 | 2876 | 1.4 | 1508 | 4522 | 2.2 | -1646 |
Slovenia | 2094 | 2704 | 5.9 | 1068 | 1482 | 3.2 | 1221 |
Slovakia | 1998 | 2710 | 3.0 | 1782 | 2225 | 2.5 | 484 |
Finland | 3044 | 3104 | 1.3 | 3989 | 5151 | 2.2 | -2047 |
Sweden | 6908 | 8464 | 1.8 | 10661 | 13297 | 2.8 | -4833 |
United Kingdom | 35080 | 42387 | 1.8 | 45556 | 60006 | 2.5 | -17619 |
Iceland | 818 | 2664 | 12.0 | 642 | 1437 | 6.5 | 1227 |
Norway | NA | 5029 | NA | NA | 14472 | NA | -9443 |
Switzerland | 12562 | 15232 | 2.4 | 12819 | 16147 | 2.6 | -914 |
Montenegro | 666 | 1001 | 21.5 | 37 | 58 | 1.2 | 943 |
North Macedonia | 201 | 325 | 3.0 | 98 | 220 | 2.0 | 105 |
Albania | NA | 1856 | 14.5 | NA | 1426 | 11.1 | 430 |
Serbia | 792 | 1317 | 3.1 | 841 | 1396 | 3.3 | -79 |
Turkey | 21089 | 21482 | 3.3 | 3623 | 3888 | 0.6 | 17594 |
Bosnia and Herzegovina | 516 | 933 | 5.5 | 101 | 227 | 1.3 | 706 |
Kosovo | 648 | 1228 | 18.3 | 135 | 302 | 4.5 | 926 |