rm(list = ls()) library(readr) library(ggplot2) library(dplyr) library(tidyr) library(RSocrata) library(ca) library(forcats) library(reshape2) library(lubridate) library(repr) library(stringr) my.read.csv <- function(url, filename) { if (!file.exists(filename)) { write_csv(read.socrata(url), filename) } return(read_csv(filename)) } crops.herbaceous <- my.read.csv("https://analisis.datosabiertos.jcyl.es/resource/agu2-cspz.csv", "./data/crops-herbaceous.csv") crops.woody <- my.read.csv("https://analisis.datosabiertos.jcyl.es/resource/2vwa-si9n.csv", "./data/crops-woody.csv") code.province.to.province <- function(code) { recode(code, "47" = "Valladolid", "24" = "León", "34" = "Palencia", "37" = "Salamanca", "9" = "Burgos", "49" = "Zamora", "5" = "Ávila", "42" = "Soria", "40" = "Segovia") } crops.herbaceous.use <- crops.herbaceous %>% select(a_o, codigo_provincia:ocupaci_n_primera_secano) %>% rename(year = a_o, code.province = codigo_provincia, crop = cultivo, crop.group = grupo_de_cultivo, region = comarca, town = municipio, irrigation = ocupaci_n_primera_regad_o, dry = ocupaci_n_primera_secano) %>% gather(c(dry, irrigation), key = "crop.technique", value = "area") %>% filter(!is.na(area) & area > 0 ) %>% mutate(crop = factor(crop), crop.group = as.factor(crop.group), region = as.factor(region), town = as.factor(town), province = as.factor(code.province.to.province(code.province))) %>% select(-code.province) crops.woody.use <- crops.woody %>% select(a_o, codigo_provincia:municipio, superficie_regad_o_en_producci_n, superficie_secano_en_producci_n) %>% rename(year = a_o, code.province = codigo_provincia, crop = cultivo, crop.group = grupo_de_cultivo, region = comarca, town = municipio, irrigation = superficie_regad_o_en_producci_n, dry = superficie_secano_en_producci_n) %>% gather(c(dry, irrigation), key = "crop.technique", value = "area") %>% filter(!is.na(area) & area > 0 ) %>% mutate(crop = as.factor(crop), crop.group = as.factor(crop.group), region = as.factor(region), town = as.factor(town), province = as.factor(code.province.to.province(code.province)), year = year(round(as.POSIXct(year), "days")), crop.group = recode(crop.group, "VIÑEDO OCUPACIÓN PRINCIPAL" = "VIÑEDO", "CITRICOS" = "FRUTALES"), crop.group = replace(crop.group, crop == "MIMBRERO" | crop == "MORERA Y OTROS", "OTROS CULTIVOS LEÑOSOS"), crop.group = replace(crop.group, crop == "OLI ACEITUNA ACEITE" | crop == "OLIVAR ACEIUNA MESA", "OLIVAR")) %>% select(-code.province) crops.woody.use %>% select(crop.group) %>% pull(crop.group) %>% droplevels() %>% levels() # crops.woody.use %>% # select(crop.group, crop) %>% # filter(crop.group == "VIÑEDO") %>% # pull(crop) %>% # droplevels() %>% # levels() # # crops.woody.use %>% # select(crop.group, crop) %>% # filter(crop.group == "VIÑEDO OCUPACIÓN PRINCIPAL") %>% # pull(crop) %>% # droplevels() %>% # levels() # # crops.woody.use %>% # select(crop.group, crop) %>% # filter(crop.group == "CITRICOS") %>% # pull(crop) %>% # droplevels() %>% # levels() crops.woody.use %>% select(crop.group, crop) %>% filter(crop.group == "OLIVAR") %>% pull(crop) %>% droplevels() %>% levels() crops.woody.use %>% select(crop.group, crop) %>% filter(crop.group == "OLIVAR Y OTROS C.I.") %>% pull(crop) %>% droplevels() %>% levels() crops.woody.use %>% select(crop.group, crop) %>% filter(crop.group == "OTROS CULTIVOS LEÑOSOS") %>% pull(crop) %>% droplevels() %>% levels() crops.use <- rbind(crops.woody.use %>% mutate(crop.type = factor("woody")), crops.herbaceous.use %>% mutate(crop.type = factor("herbaceous"))) sample_n(crops.use, 10) summary(crops.use) options(repr.plot.width = 10, repr.plot.height = 5) crops.use %>% select(year, area, crop.type, crop.technique, province) %>% group_by(year, crop.type, crop.technique, province) %>% summarise(area = sum(area)) %>% group_by(crop.type, crop.technique, province) %>% mutate(area = area / sum(area)) %>% ggplot(aes(x=year, y = area, group = province, col = province)) + labs(title = "Crop Area Ratio by Year and irrigation type", x = "Year", y = "Area") + facet_wrap( ~ crop.type + crop.technique, scales = "free_y") + geom_line() options(repr.plot.width = 10, repr.plot.height = 15) crops.use %>% select(crop, crop.type, crop.group, crop.technique, area) %>% group_by(crop.group, crop.type, crop.technique, crop) %>% summarise(area = sum(area) / (max(crops.use$year) - min(crops.use$year))) %>% top_n(5, area) %>% ggplot(aes(x = crop.technique, y = area, fill = crop, label = str_to_title(crop) )) + geom_bar(width = 1, stat = "identity") + theme(legend.position='none') + labs(title = "Woody Crop Decomposition", x = "", y = "Area") + facet_wrap(crop.group ~ crop.type, scales = "free_y", ncol = 3)+ geom_text(size = 3, position = position_stack(vjust = 0.5), colour = "white") options(repr.plot.width = 10, repr.plot.height = 4) crops.use %>% select(area, crop.type, crop.technique, province) %>% group_by(province, crop.type, crop.technique) %>% summarise(area = sum(area)) %>% group_by(province, crop.type) %>% mutate(area = area / sum(area)) %>% ggplot(aes(x = "", y = area, fill = crop.technique)) + geom_bar(stat = "identity", width = 1) + labs(title = "Crop Method Predominance", x = "", y = "", fill = "Irrigated") + coord_polar(theta = "y") + facet_wrap(crop.type ~ province, ncol= 9) + theme(axis.text = element_blank(), axis.ticks = element_blank(), panel.grid = element_blank()) options(repr.plot.width = 10, repr.plot.height = 35) crops.use %>% select(year, crop.technique, crop.group,crop.type, crop, area) %>% group_by(year, crop.technique, crop.group,crop.type, crop) %>% summarise(area = sum(area)) %>% group_by(year, crop.technique, crop.type) %>% top_n(10, area) %>% ggplot(aes(x = interaction(crop, area), y = area, fill = crop.group)) + geom_col() + theme(legend.position = 'top') + guides(fill = guide_legend(nrow = 2)) + labs(title = "Dry Crop's Area by Year", x = "Crop", y = "Area", fill = "Crop Group") + coord_flip() + facet_wrap( ~ crop.type + year + crop.technique, scales = "free", ncol = 2) + scale_x_discrete(labels = function(x) str_to_title(gsub("\\..+", "", x))) crops.woody.use.ca <- crops.woody.use %>% select(crop.group, province, area) %>% filter(area > 0) %>% xtabs(formula = area ~ ., drop.unused.levels = TRUE) %>% ca() summary(crops.woody.use.ca) options(repr.plot.width = 7, repr.plot.height = 5) ggplot(mapping = aes(x =Dim1, y = Dim2 )) + labs(title = "Woody Crop Groups by Province", colour = "") + geom_point(data = as.data.frame(crops.woody.use.ca$colcoord), mapping = aes(colour = 'Province')) + geom_point(data = as.data.frame(crops.woody.use.ca$rowcoord), mapping = aes(colour = 'Crop Type')) + geom_text(data = as.data.frame(crops.woody.use.ca$rowcoord), label = str_to_title(rownames(crops.woody.use.ca$rowcoord)), hjust = 0.5, vjust = - 1, size = 3) + geom_text(data = as.data.frame(crops.woody.use.ca$colcoord), label = str_to_title(rownames(crops.woody.use.ca$colcoord)), hjust = 0.5, vjust = 1.5, size = 4) crops.herbaceous.use.ca <- crops.herbaceous.use %>% select(crop.group, province, area) %>% filter(area > 0) %>% xtabs(formula = area ~ ., drop.unused.levels = TRUE) %>% ca() summary(crops.herbaceous.use.ca) options(repr.plot.width = 7, repr.plot.height = 5) ggplot(mapping = aes(x =Dim1, y = Dim2 )) + labs(title = "Herbaceous Crop Groups by Province", colour = "") + geom_point(data = as.data.frame(crops.herbaceous.use.ca$colcoord), mapping = aes(colour = 'Province')) + geom_point(data = as.data.frame(crops.herbaceous.use.ca$rowcoord), mapping = aes(colour = 'Crop Type')) + geom_text(data = as.data.frame(crops.herbaceous.use.ca$rowcoord), label =str_to_title(rownames(crops.herbaceous.use.ca$rowcoord)), hjust = 0.5, vjust = 1.5, size = 3) + geom_text(data = as.data.frame(crops.herbaceous.use.ca$colcoord), label = str_to_title(rownames(crops.herbaceous.use.ca$colcoord)), hjust = 0.5, vjust = - 0.75, size = 4)