import os import sys sys.path.insert(0, os.path.abspath('../../../')) os.chdir(os.path.abspath('../../../')) import src.cdsretrieve as retrieve import src.preprocess as preprocess import numpy as np import xarray as xr retrieve.retrieve_SEAS5( variables=['2m_temperature', '2m_dewpoint_temperature'], target_months=[8], area=[70, -130, 20, -70], years=np.arange(1981, 2021), folder='../California_example/SEAS5/') retrieve.retrieve_ERA5(variables=['2m_temperature', '2m_dewpoint_temperature'], target_months=[8], area=[70, -130, 20, -70], folder='../California_example/ERA5/') SEAS5_California = preprocess.merge_SEAS5(folder ='../California_example/SEAS5/', target_months = [8]) ERA5_California = xr.open_mfdataset('../California_example/ERA5/ERA5_????.nc',combine='by_coords') ERA5_anomaly = ERA5_California['t2m'] - ERA5_California['t2m'].sel(time=slice('1979','2010')).mean('time') ERA5_sd_anomaly = ERA5_anomaly / ERA5_California['t2m'].sel(time=slice('1979','2010')).std('time') LSMask = xr.open_dataset('../California_example/ERA_landsea_mask.nc') # convert the longitude from 0:360 to -180:180 LSMask['longitude'] = (((LSMask['longitude'] + 180) % 360) - 180) area_weights = np.cos(np.deg2rad(ERA5_sd_anomaly.latitude)) ERA5_California_events = ( ERA5_California['t2m'].sel( # Select 2 metre temperature longitude = slice(-125,-100), # Select the longitude latitude = slice(45,20)). # And the latitude where(ERA5_sd_anomaly.sel(time = '2020').squeeze('time') > 2). ##Mask the region where 2020 sd >2. where(LSMask['lsm'].sel(time = '1979').squeeze('time') > 0.5). #Select land-only gridcells weighted(area_weights). mean(['longitude', 'latitude']) #And take the mean ) ERA5_California_events.plot() SEAS5_California_events = ( SEAS5_California['t2m'].sel( longitude = slice(-125,-100), # Select the longitude latitude = slice(45,20)). # And the latitude where(ERA5_sd_anomaly.sel(time = '2020').squeeze('time') > 2). #Mask the region where 2020 sd >2. where(LSMask['lsm'].sel(time = '1979').squeeze('time') > 0.5). #Select land-only gridcells weighted(area_weights). mean(['longitude', 'latitude'])) SEAS5_California_events.rename('t2m').to_dataframe().to_csv('Data/SEAS5_California_events.csv') ERA5_California_events.rename('t2m').to_dataframe().to_csv('Data/ERA5_California_events.csv') setwd('../../..') getwd() SEAS5_California_events <- read.csv("Data/SEAS5_California_events.csv", stringsAsFactors=FALSE) ERA5_California_events <- read.csv("Data/ERA5_California_events.csv", stringsAsFactors=FALSE) ## Convert Kelvin to Celsius SEAS5_California_events$t2m <- SEAS5_California_events$t2m - 273.15 ERA5_California_events$t2m <- ERA5_California_events$t2m - 273.15 ## Convert character time to Date format ERA5_California_events$time <- lubridate::ymd(ERA5_California_events$time) SEAS5_California_events$time <- lubridate::ymd(SEAS5_California_events$time) require(UNSEEN) require(ggplot2) timeseries = unseen_timeseries( ensemble = SEAS5_California_events, obs = ERA5_California_events, ensemble_yname = "t2m", ensemble_xname = "time", obs_yname = "t2m", obs_xname = "time", ylab = "August California temperature (C)") timeseries + theme(text = element_text(size = 14)) SEAS5_California_events_hindcast <- SEAS5_California_events[ SEAS5_California_events$time < '2017-02-01' & SEAS5_California_events$number < 25,] SEAS5_California_events_forecasts <- SEAS5_California_events[ SEAS5_California_events$time > '2017-02-01',] ERA5_California_events_hindcast <- ERA5_California_events[ ERA5_California_events$time > '1981-02-01' & ERA5_California_events$time < '2017-02-01',] unseen_timeseries( ensemble = SEAS5_California_events_hindcast, obs = ERA5_California_events_hindcast, ensemble_yname = "t2m", ensemble_xname = "time", obs_yname = "t2m", obs_xname = "time", ylab = "August California temperature (C)") + theme(text = element_text(size = 14)) Independence_California = independence_test( ensemble = SEAS5_California_events_hindcast, var_name = "t2m" ) Independence_California + theme(text = element_text(size = 14)) Stability_California = stability_test( ensemble = SEAS5_California_events_hindcast, lab = 'August California temperature (C)', var_name = 't2m' ) Stability_California Stability_California = stability_test( ensemble = SEAS5_California_events_hindcast, lab = 'August temperature (C)', var_name = 't2m', fontsize = 10 ) # ggsave(Stability_California,height = 120, width = 120, units = 'mm', filename = "graphs/California_stability.pdf") Fidelity_California = fidelity_test( obs = ERA5_California_events_hindcast$t2m, ensemble = SEAS5_California_events_hindcast$t2m, units = 'C', biascor = FALSE, fontsize = 14 ) Fidelity_California obs = ERA5_California_events_hindcast$t2m ensemble = SEAS5_California_events_hindcast$t2m ensemble_biascor = ensemble + (mean(obs) - mean(ensemble)) fidelity_test( obs = obs, ensemble = ensemble_biascor, units = 'C', biascor = FALSE, fontsize = 14 ) timeseries_font10 = timeseries + theme(text = element_text(size = 10)) Independence_font10 = Independence_California + theme(text = element_text(size = 10)) Stability_font10 = stability_test( ensemble = SEAS5_California_events_hindcast, lab = 'August temperature (C)', var_name = 't2m', fontsize = 10, panel_labels = c("c", "d") ) Fidelity_font10 = fidelity_test( obs = ERA5_California_events_hindcast$t2m, ensemble = SEAS5_California_events_hindcast$t2m, ylab = '', yticks = FALSE, units = 'C', biascor = FALSE, fontsize = 10, panel_labels = c("e", "f", "g", "h") ) Evaluations = ggpubr::ggarrange(timeseries_font10, Independence_font10, Stability_font10, Fidelity_font10, labels = c("a","b", "", ""), font.label = list(size = 10, color = "black", face = "bold", family = NULL), ncol = 2, nrow = 2) Evaluations # ggsave(Evaluations,height = 180, width = 180, units = 'mm', filename = "graphs/California_evaluation_test2.pdf") require('extRemes') source('src/evt_plot.r') obs <- ERA5_California_events[ ERA5_California_events$time > '1981-02-01',] UNSEEN_bc <- SEAS5_California_events[SEAS5_California_events$leadtime < 6 & SEAS5_California_events$number < 25,] UNSEEN_bc$t2m <- (UNSEEN_bc$t2m + mean(ERA5_California_events_hindcast$t2m) - mean(SEAS5_California_events_hindcast$t2m) ) str(UNSEEN_bc) timeseries = unseen_timeseries( ensemble = UNSEEN_bc, obs = obs, ensemble_yname = "t2m", ensemble_xname = "time", obs_yname = "t2m", obs_xname = "time", ylab = "August California temperature (C)") timeseries + theme(text = element_text(size = 14)) ## Fit stationary distributions fit_obs_Gumbel <- fevd(x = obs$t2m, type = "Gumbel" ) fit_obs_GEV <- fevd(x = obs$t2m, type = "GEV" ) ## And the nonstationary distribution fit_obs_GEV_nonstat <- fevd(x = obs$t2m, type = "GEV", location.fun = ~ c(1:length(obs$time)), ##Fitting the gev with a location and scale parameter linearly correlated to the covariate (years) scale.fun = ~ c(1:length(obs$time)), use.phi = TRUE ) #And test the fit ##1. Stationary Gumbel vs stationary GEV lr.test(fit_obs_Gumbel, fit_obs_GEV_nonstat) ##2. Stationary GEV vs Nonstationary GEV lr.test(fit_obs_GEV, fit_obs_GEV_nonstat) #Create the ensemble covariate year_vector = as.integer(format(UNSEEN_bc$time, format="%Y")) covariate_ens = year_vector - 1980 # Fit the stationary distribution fit_unseen_GEV <- fevd(x = UNSEEN_bc$t2m, type = 'GEV', use.phi = TRUE) fit_unseen_Gumbel <- fevd(x = UNSEEN_bc$t2m, type = 'Gumbel', use.phi = TRUE) # Fit the nonstationary distribution fit_unseen_GEV_nonstat <- fevd(x = UNSEEN_bc$t2m, type = 'GEV', location.fun = ~ covariate_ens, ##Fitting the gev with a location and scale parameter linearly correlated to the covariate (years) scale.fun = ~ covariate_ens, use.phi = TRUE) #And test the fit ##1. Stationary Gumbel vs stationary GEV lr.test(fit_unseen_Gumbel,fit_unseen_GEV) ##2. Stationary GEV vs Nonstationary GEV lr.test(fit_unseen_GEV, fit_unseen_GEV_nonstat) year_vector = as.integer(format(UNSEEN_bc$time, format="%Y")) covariate_ens = year_vector - 1980 Trend_2year <- unseen_trends1(ensemble = UNSEEN_bc$t2m, x_ens = year_vector, x_obs = 1981:2020, rp = 2, obs = obs$t2m, covariate_ens = covariate_ens, covariate_obs = c(1:(length(obs$time)-1)), covariate_values = c(1:length(obs$time)), GEV_type = 'GEV', ylab = 'August temperature (C)', title = '2-year') + ylim(c(20,28.5)) Trend_100year <- unseen_trends1(ensemble = UNSEEN_bc$t2m, x_ens = year_vector, x_obs = 1981:2020, rp = 100, obs = obs$t2m, covariate_ens = covariate_ens, covariate_obs = c(1:(length(obs$time)-1)), covariate_values = c(1:length(obs$time)), GEV_type = 'GEV', ylab = '', title = '100-year') + ylim(c(20,28.5)) ggpubr::ggarrange(Trend_2year,Trend_100year, labels = c("a","b"), common.legend = TRUE, font.label = list(size = 14, color = "black", face = "bold", family = NULL), ncol = 2, nrow = 1) #+ # ggsave(height = 100, width = 180, units = 'mm', filename = "graphs/California_trends.png") p2 <- unseen_trends2(ensemble = UNSEEN_bc$t2m, obs = obs[1:(length(obs$time)-1),]$t2m, covariate_ens = covariate_ens, covariate_obs = c(1:(length(obs$time)-1)), GEV_type = 'GEV', ylab = 'August temperature (C)') Distributions = p2 + geom_hline(yintercept = obs[obs$time == '2020-08-01',]$t2m) #+ Distributions Trends = ggpubr::ggarrange(Trend_2year,Trend_100year, labels = c("a","b"), common.legend = TRUE, font.label = list(size = 10, color = "black", face = "bold", family = NULL), ncol = 2, nrow = 1) ggpubr::ggarrange(Trends,Distributions, labels = c("","c"), font.label = list(size = 10, color = "black", face = "bold", family = NULL), ncol = 1, nrow = 2) + ggsave(height = 180, width = 180, units = 'mm', filename = "graphs/California_trends2.pdf")