import os
import sys
sys.path.insert(0, os.path.abspath('../../../'))
os.chdir(os.path.abspath('../../../'))

import src.cdsretrieve as retrieve
import src.preprocess as preprocess

import numpy as np
import xarray as xr

retrieve.retrieve_SEAS5(
    variables=['2m_temperature', '2m_dewpoint_temperature'],
    target_months=[8],
    area=[70, -130, 20, -70],
    years=np.arange(1981, 2021),
    folder='../California_example/SEAS5/')

retrieve.retrieve_ERA5(variables=['2m_temperature', '2m_dewpoint_temperature'],
                       target_months=[8],
                       area=[70, -130, 20, -70],
                       folder='../California_example/ERA5/')

SEAS5_California = preprocess.merge_SEAS5(folder ='../California_example/SEAS5/', target_months = [8])

ERA5_California = xr.open_mfdataset('../California_example/ERA5/ERA5_????.nc',combine='by_coords')

ERA5_anomaly = ERA5_California['t2m'] - ERA5_California['t2m'].sel(time=slice('1979','2010')).mean('time')
ERA5_sd_anomaly = ERA5_anomaly / ERA5_California['t2m'].sel(time=slice('1979','2010')).std('time')

LSMask = xr.open_dataset('../California_example/ERA_landsea_mask.nc')
# convert the longitude from 0:360 to -180:180
LSMask['longitude'] = (((LSMask['longitude'] + 180) % 360) - 180)

area_weights = np.cos(np.deg2rad(ERA5_sd_anomaly.latitude))

ERA5_California_events = (
    ERA5_California['t2m'].sel(  # Select 2 metre temperature
        longitude = slice(-125,-100),    # Select the longitude
        latitude = slice(45,20)).        # And the latitude
    where(ERA5_sd_anomaly.sel(time = '2020').squeeze('time') > 2). ##Mask the region where 2020 sd >2. 
    where(LSMask['lsm'].sel(time = '1979').squeeze('time') > 0.5). #Select land-only gridcells
    weighted(area_weights).
    mean(['longitude', 'latitude']) #And take the mean
)

ERA5_California_events.plot()

SEAS5_California_events = (
    SEAS5_California['t2m'].sel(
        longitude = slice(-125,-100),    # Select the longitude
        latitude = slice(45,20)).        # And the latitude
    where(ERA5_sd_anomaly.sel(time = '2020').squeeze('time') > 2). #Mask the region where 2020 sd >2.
    where(LSMask['lsm'].sel(time = '1979').squeeze('time') > 0.5). #Select land-only gridcells
    weighted(area_weights).
    mean(['longitude', 'latitude']))

SEAS5_California_events.rename('t2m').to_dataframe().to_csv('Data/SEAS5_California_events.csv')
ERA5_California_events.rename('t2m').to_dataframe().to_csv('Data/ERA5_California_events.csv')

setwd('../../..')
getwd()

SEAS5_California_events <- read.csv("Data/SEAS5_California_events.csv", stringsAsFactors=FALSE)
ERA5_California_events <- read.csv("Data/ERA5_California_events.csv", stringsAsFactors=FALSE)

## Convert Kelvin to Celsius
SEAS5_California_events$t2m <- SEAS5_California_events$t2m - 273.15
ERA5_California_events$t2m <- ERA5_California_events$t2m - 273.15

## Convert character time to Date format
ERA5_California_events$time <- lubridate::ymd(ERA5_California_events$time)
SEAS5_California_events$time <- lubridate::ymd(SEAS5_California_events$time)

require(UNSEEN)
require(ggplot2)

timeseries = unseen_timeseries(
    ensemble = SEAS5_California_events,
    obs = ERA5_California_events,
    ensemble_yname = "t2m",
    ensemble_xname = "time",
    obs_yname = "t2m",
    obs_xname = "time",
    ylab = "August California temperature (C)") 

timeseries + theme(text = element_text(size = 14))

SEAS5_California_events_hindcast <- SEAS5_California_events[
    SEAS5_California_events$time < '2017-02-01' &
    SEAS5_California_events$number < 25,]

SEAS5_California_events_forecasts <- SEAS5_California_events[
    SEAS5_California_events$time > '2017-02-01',]

ERA5_California_events_hindcast <- ERA5_California_events[
    ERA5_California_events$time > '1981-02-01' &
    ERA5_California_events$time < '2017-02-01',]

unseen_timeseries(
    ensemble = SEAS5_California_events_hindcast,
    obs = ERA5_California_events_hindcast,
    ensemble_yname = "t2m",
    ensemble_xname = "time",
    obs_yname = "t2m",
    obs_xname = "time",
    ylab = "August California temperature (C)") + 
theme(text = element_text(size = 14))

Independence_California = independence_test(
    ensemble = SEAS5_California_events_hindcast,
    var_name = "t2m"
    ) 

Independence_California +
    theme(text = element_text(size = 14))

Stability_California = stability_test(
        ensemble = SEAS5_California_events_hindcast,
        lab = 'August California temperature (C)',
        var_name = 't2m'
    )
Stability_California


Stability_California = stability_test(
        ensemble = SEAS5_California_events_hindcast,
        lab = 'August temperature (C)',
        var_name = 't2m',
        fontsize = 10
    
    )
# ggsave(Stability_California,height = 120, width = 120, units = 'mm', filename = "graphs/California_stability.pdf")

Fidelity_California = fidelity_test(
    obs = ERA5_California_events_hindcast$t2m,
    ensemble = SEAS5_California_events_hindcast$t2m,
    units = 'C',
    biascor = FALSE,
    fontsize = 14
)
Fidelity_California

obs = ERA5_California_events_hindcast$t2m
ensemble = SEAS5_California_events_hindcast$t2m
ensemble_biascor = ensemble + (mean(obs) - mean(ensemble))

fidelity_test(
    obs = obs,
    ensemble = ensemble_biascor,
    units = 'C',
    biascor = FALSE,
    fontsize = 14
)

timeseries_font10 = timeseries + theme(text = element_text(size = 10))
Independence_font10 = Independence_California + theme(text = element_text(size = 10))
Stability_font10 = stability_test(
    ensemble = SEAS5_California_events_hindcast,
    lab = 'August temperature (C)',
    var_name = 't2m',
    fontsize = 10,
    panel_labels = c("c", "d")
)
Fidelity_font10 = fidelity_test(
    obs = ERA5_California_events_hindcast$t2m,
    ensemble = SEAS5_California_events_hindcast$t2m,
    ylab = '', 
    yticks = FALSE, 
    units = 'C',
    biascor = FALSE,
    fontsize = 10,
    panel_labels = c("e", "f", "g", "h")
)

Evaluations = ggpubr::ggarrange(timeseries_font10,
                                Independence_font10,
                                Stability_font10,
                                Fidelity_font10,
                                labels = c("a","b", "", ""),
                                font.label = list(size = 10,
                                                  color = "black",
                                                  face = "bold",
                                                  family = NULL),
                                ncol = 2,
                                nrow = 2)
Evaluations
# ggsave(Evaluations,height = 180, width = 180, units = 'mm', filename = "graphs/California_evaluation_test2.pdf")

require('extRemes')

source('src/evt_plot.r')

obs <- ERA5_California_events[
    ERA5_California_events$time > '1981-02-01',]

UNSEEN_bc <- SEAS5_California_events[SEAS5_California_events$leadtime < 6 &
                                     SEAS5_California_events$number < 25,]

UNSEEN_bc$t2m <- (UNSEEN_bc$t2m +
                  mean(ERA5_California_events_hindcast$t2m) - mean(SEAS5_California_events_hindcast$t2m)
                               )
str(UNSEEN_bc)

timeseries = unseen_timeseries(
    ensemble = UNSEEN_bc,
    obs = obs,
    ensemble_yname = "t2m",
    ensemble_xname = "time",
    obs_yname = "t2m",
    obs_xname = "time",
    ylab = "August California temperature (C)") 

timeseries + theme(text = element_text(size = 14))

## Fit stationary distributions
fit_obs_Gumbel <- fevd(x = obs$t2m,
                    type = "Gumbel"
                   )
fit_obs_GEV <- fevd(x = obs$t2m,
                    type = "GEV"
                   )
## And the nonstationary distribution
fit_obs_GEV_nonstat <- fevd(x = obs$t2m,
                            type = "GEV",
                            location.fun = ~ c(1:length(obs$time)), ##Fitting the gev with a location and scale parameter linearly correlated to the covariate (years)
                            scale.fun = ~ c(1:length(obs$time)), 
                            use.phi = TRUE
                           )
#And test the fit
##1. Stationary Gumbel vs stationary GEV
lr.test(fit_obs_Gumbel, fit_obs_GEV_nonstat) 
##2. Stationary GEV vs Nonstationary GEV
lr.test(fit_obs_GEV, fit_obs_GEV_nonstat)

#Create the ensemble covariate
year_vector = as.integer(format(UNSEEN_bc$time, format="%Y"))
covariate_ens = year_vector - 1980 

# Fit the stationary distribution
fit_unseen_GEV <- fevd(x = UNSEEN_bc$t2m,
                       type = 'GEV',
                       use.phi = TRUE)

fit_unseen_Gumbel <- fevd(x = UNSEEN_bc$t2m,
                          type = 'Gumbel',
                          use.phi = TRUE)

# Fit the nonstationary distribution
fit_unseen_GEV_nonstat <- fevd(x = UNSEEN_bc$t2m,
                               type = 'GEV',
                               location.fun = ~ covariate_ens, ##Fitting the gev with a location and scale parameter linearly correlated to the covariate (years)
                               scale.fun = ~ covariate_ens, 
                               use.phi = TRUE)

#And test the fit
##1. Stationary Gumbel vs stationary GEV
lr.test(fit_unseen_Gumbel,fit_unseen_GEV) 
##2. Stationary GEV vs Nonstationary GEV
lr.test(fit_unseen_GEV, fit_unseen_GEV_nonstat) 

year_vector = as.integer(format(UNSEEN_bc$time, format="%Y"))
covariate_ens = year_vector - 1980 

Trend_2year <- unseen_trends1(ensemble = UNSEEN_bc$t2m,
                     x_ens = year_vector,
                     x_obs = 1981:2020,
                     rp = 2,
                     obs = obs$t2m,
                     covariate_ens = covariate_ens,
                     covariate_obs = c(1:(length(obs$time)-1)),
                     covariate_values = c(1:length(obs$time)),
                     GEV_type = 'GEV',
                     ylab = 'August temperature (C)',
                     title = '2-year') +
ylim(c(20,28.5))

Trend_100year <- unseen_trends1(ensemble = UNSEEN_bc$t2m,
                     x_ens = year_vector,
                     x_obs = 1981:2020,
                     rp = 100,
                     obs = obs$t2m,
                     covariate_ens = covariate_ens,
                     covariate_obs = c(1:(length(obs$time)-1)),
                     covariate_values = c(1:length(obs$time)),
                     GEV_type = 'GEV',
                     ylab = '',
                     title = '100-year') +
ylim(c(20,28.5))

ggpubr::ggarrange(Trend_2year,Trend_100year,
                  labels = c("a","b"),
                  common.legend = TRUE,
                  font.label = list(size = 14,
                                    color = "black",
                                    face = "bold",
                                    family = NULL),
                  ncol = 2,
                  nrow = 1) #+
# ggsave(height = 100, width = 180, units = 'mm', filename = "graphs/California_trends.png")

p2 <- unseen_trends2(ensemble = UNSEEN_bc$t2m,
                    obs = obs[1:(length(obs$time)-1),]$t2m,
                    covariate_ens = covariate_ens,
                    covariate_obs = c(1:(length(obs$time)-1)), 
                    GEV_type = 'GEV', 
                    ylab = 'August temperature (C)')

Distributions = p2 + geom_hline(yintercept = obs[obs$time == '2020-08-01',]$t2m) #+ 
Distributions

Trends = ggpubr::ggarrange(Trend_2year,Trend_100year,
                  labels = c("a","b"),
                  common.legend = TRUE,
                  font.label = list(size = 10,
                                    color = "black",
                                    face = "bold",
                                    family = NULL),
                  ncol = 2,
                  nrow = 1) 


ggpubr::ggarrange(Trends,Distributions,
                  labels = c("","c"),
                  font.label = list(size = 10,
                                    color = "black",
                                    face = "bold",
                                    family = NULL),
                  ncol = 1,
                  nrow = 2) +
ggsave(height = 180, width = 180, units = 'mm', filename = "graphs/California_trends2.pdf")