#!/usr/bin/env python
# coding: utf-8

# # Run the fusion EasyVVUQ campaign
# 
# Run an EasyVVUQ campaign to analyze the sensitivity of the temperature
# profile predicted by a simplified model of heat conduction in a
# tokamak plasma.
# 
# This is done with SC.

# In[ ]:


# import packages that we will use

import os
import easyvvuq as uq
import chaospy as cp
import pickle
import time
import numpy as np
import pandas as pd
import matplotlib
if not os.getenv("DISPLAY"): matplotlib.use('Agg')
import matplotlib.pylab as plt
from IPython.display import display
get_ipython().run_line_magic('matplotlib', 'inline')


# In[ ]:


# we need fipy -- install if not already available

try:
    import fipy
except ModuleNotFoundError:
    get_ipython().system(' pip install future')
    get_ipython().system(' pip install fipy')
    import fipy


# In[ ]:


# routine to write out (if needed) the fusion .template file

def write_template(params):
    str = ""
    first = True
    for k in params.keys():
        if first:
            str += '{"%s": "$%s"' % (k,k) ; first = False
        else:
            str += ', "%s": "$%s"' % (k,k)
    str += '}'
    print(str, file=open('fusion.template','w'))


# In[ ]:


# define parameters of the fusion model
def define_params():
    return {
        "Qe_tot":   {"type": "float",   "min": 1.0e6, "max": 50.0e6, "default": 2e6},
        "H0":       {"type": "float",   "min": 0.00,  "max": 1.0,    "default": 0},
        "Hw":       {"type": "float",   "min": 0.01,  "max": 100.0,  "default": 0.1},
        "Te_bc":    {"type": "float",   "min": 10.0,  "max": 1000.0, "default": 100},
        "chi":      {"type": "float",   "min": 0.01,  "max": 100.0,  "default": 1},
        "a0":       {"type": "float",   "min": 0.2,   "max": 10.0,   "default": 1},
        "R0":       {"type": "float",   "min": 0.5,   "max": 20.0,   "default": 3},
        "E0":       {"type": "float",   "min": 1.0,   "max": 10.0,   "default": 1.5},
        "b_pos":    {"type": "float",   "min": 0.95,  "max": 0.99,   "default": 0.98},
        "b_height": {"type": "float",   "min": 3e19,  "max": 10e19,  "default": 6e19},
        "b_sol":    {"type": "float",   "min": 2e18,  "max": 3e19,   "default": 2e19},
        "b_width":  {"type": "float",   "min": 0.005, "max": 0.025,  "default": 0.01},
        "b_slope":  {"type": "float",   "min": 0.0,   "max": 0.05,   "default": 0.01},
        "nr":       {"type": "integer", "min": 10,    "max": 1000,   "default": 100},
        "dt":       {"type": "float",   "min": 1e-3,  "max": 1e3,    "default": 100},
        "out_file": {"type": "string",  "default": "output.csv"}
    }


# In[ ]:


# define varying quantities
def define_vary():
    vary_all = {
        "Qe_tot":   cp.Uniform(1.8e6, 2.2e6),
        "H0":       cp.Uniform(0.0,   0.2),
        "Hw":       cp.Uniform(0.1,   0.5),
        "chi":      cp.Uniform(0.8,   1.2),
        "Te_bc":    cp.Uniform(80.0,  120.0),
        "a0":       cp.Uniform(0.9,   1.1),
        "R0":       cp.Uniform(2.7,   3.3),
        "E0":       cp.Uniform(1.4,   1.6),
        "b_pos":    cp.Uniform(0.95,  0.99),
        "b_height": cp.Uniform(5e19,  7e19),
        "b_sol":    cp.Uniform(1e19,  3e19),
        "b_width":  cp.Uniform(0.015, 0.025),
        "b_slope":  cp.Uniform(0.005, 0.020)
    }
    vary_2 =  {
        "Qe_tot":   cp.Uniform(1.8e6, 2.2e6),
        "Te_bc":    cp.Uniform(80.0,  120.0)
    }
    vary_5 =  {
        "Qe_tot":   cp.Uniform(1.8e6, 2.2e6),
        "H0":       cp.Uniform(0.0,   0.2),
        "Hw":       cp.Uniform(0.1,   0.5),
        "chi":      cp.Uniform(0.8,   1.2),
        "Te_bc":    cp.Uniform(80.0,  120.0)
    }
    vary_10 = {
        "Qe_tot":   cp.Uniform(1.8e6, 2.2e6),
        "H0":       cp.Uniform(0.0,   0.2),
        "Hw":       cp.Uniform(0.1,   0.5),
        "chi":      cp.Uniform(0.8,   1.2),
        "Te_bc":    cp.Uniform(80.0,  120.0),
        "b_pos":    cp.Uniform(0.95,  0.99),
        "b_height": cp.Uniform(5e19,  7e19),
        "b_sol":    cp.Uniform(1e19,  3e19),
        "b_width":  cp.Uniform(0.015, 0.025),
        "b_slope":  cp.Uniform(0.005, 0.020)
    }
    return vary_10


# In[ ]:


# define a model to run the fusion code directly from python, expecting a dictionary and returning a dictionary
def run_fusion_model(input):
    import json
    import fusion
    qois = ["te", "ne", "rho", "rho_norm"]
    del input['out_file']
    return {q: v for q,v in zip(qois, [t.tolist() for t in fusion.solve_Te(**input, plots=False, output=False)])}


# In[ ]:


# routine to run a SC campaign

def run_sc_case(sc_order=2, local=True, dask=True, batch_size=os.cpu_count(), use_files=True):
    """
    Inputs:
        sc_order: order of the sc expansion
        local: if using Dask, whether to use the local option (True)
        dask: whether to use dask (True)
        batch_size: for the non Dask option, number of cases to run in parallel (16)
    Outputs:
        results_df: Pandas dataFrame containing inputs to and output from the model
        results: Results of the sc analysis
        times: Information about the elapsed time for the various phases of the calculation
        sc_order: sc_order 
        count: number of sc samples
    """
    
    if dask:
        if local:
            print('Running locally')
            import multiprocessing.popen_spawn_posix
            from dask.distributed import Client, LocalCluster
            cluster = LocalCluster(threads_per_worker=1)
            client = Client(cluster) # processes=True, threads_per_worker=1)
        else:
            print('Running using SLURM')
            from dask.distributed import Client
            from dask_jobqueue import SLURMCluster
            cluster = SLURMCluster(
                job_extra=['--qos=p.tok.openmp.2h', '--mail-type=end', '--mail-user=dpc@rzg.mpg.de', '-t 2:00:00'], 
                queue='p.tok.openmp', 
                cores=8, 
                memory='8 GB',
                processes=8)
            cluster.scale(32)
            print(cluster)
            print(cluster.job_script())
            client = Client(cluster)
        print(client)

    else:
        import concurrent.futures
#        client = concurrent.futures.ProcessPoolExecutor(max_workers=batch_size)
        client = concurrent.futures.ThreadPoolExecutor(max_workers=batch_size)
#        client = None
    
    times = np.zeros(7)

    time_start = time.time()
    time_start_whole = time_start
    # Set up a fresh campaign called "fusion_sc."
    my_campaign = uq.Campaign(name='fusion_sc.')        

    # Define parameter space
    params = define_params()

    # Create an encoder and decoder for sc test app
    if use_files:
        encoder = uq.encoders.GenericEncoder(template_fname='fusion.template',
                                             delimiter='$',
                                             target_filename='fusion_in.json')


        decoder = uq.decoders.SimpleCSV(target_filename="output.csv",
                                        output_columns=["te", "ne", "rho", "rho_norm"])

        execute = uq.actions.ExecuteLocal('python3 %s/fusion_model.py fusion_in.json' % (os.getcwd()))

        actions = uq.actions.Actions(uq.actions.CreateRunDirectory('/tmp'), 
                                     uq.actions.Encode(encoder), execute, uq.actions.Decode(decoder))
    else:
        actions = uq.actions.Actions(uq.actions.ExecutePython(run_fusion_model))


    # Add the app (automatically set as current app)
    my_campaign.add_app(name="fusion", params=params, actions=actions)

    time_end = time.time()
    times[1] = time_end-time_start
    print('Time for phase 1 = %.3f' % (times[1]))

    time_start = time.time()
    # Associate a sampler with the campaign
    my_campaign.set_sampler(uq.sampling.SCSampler(vary=define_vary(), polynomial_order=sc_order))
    my_campaign.draw_samples()
    print('Number of samples = %s' % my_campaign.get_active_sampler().count)

    time_end = time.time()
    times[2] = time_end-time_start
    print('Time for phase 2 = %.3f' % (times[2]))

    time_start = time.time()
    # Perform the actions
    my_campaign.execute(pool=client).collate(progress_bar=True)

    if dask:
        client.close()
        client.shutdown()

    time_end = time.time()
    times[3] = time_end-time_start
    print('Time for phase 3 = %.3f' % (times[3]))

    time_start = time.time()
    # Collate the results
    results_df = my_campaign.get_collation_result()

    time_end = time.time()
    times[4] = time_end-time_start
    print('Time for phase 4 = %.3f' % (times[4]))

    time_start = time.time()
    # Post-processing analysis
    results = my_campaign.analyse(qoi_cols=["te", "ne", "rho", "rho_norm"])

    time_end = time.time()
    times[5] = time_end-time_start
    print('Time for phase 5 = %.3f' % (times[5]))

    time_start = time.time()
    # Save the results
    pickle.dump(results, open('fusion_results.pickle','bw'))
    time_end = time.time()
    times[6] = time_end-time_start
    print('Time for phase 6 = %.3f' % (times[6]))

    times[0] = time_end - time_start_whole

    return results_df, results, times, sc_order, my_campaign.get_active_sampler().count


# In[ ]:


# routines for plotting the results

def plot_Te(results, title=None):
    # plot the calculated Te: mean, with std deviation, 1, 10, 90 and 99%
    plt.figure()
    rho = results.describe('rho', 'mean')
    plt.plot(rho, results.describe('te', 'mean'), 'b-', label='Mean')
    plt.plot(rho, results.describe('te', 'mean')-results.describe('te', 'std'), 'b--', label='+1 std deviation')
    plt.plot(rho, results.describe('te', 'mean')+results.describe('te', 'std'), 'b--')
    plt.fill_between(rho, results.describe('te', 'mean')-results.describe('te', 'std'), results.describe('te', 'mean')+results.describe('te', 'std'), color='b', alpha=0.2)
    try:
        plt.plot(rho, results.describe('te', '10%'), 'b:', label='10 and 90 percentiles')
        plt.plot(rho, results.describe('te', '90%'), 'b:')
        plt.fill_between(rho, results.describe('te', '10%'), results.describe('te', '90%'), color='b', alpha=0.1)
        plt.fill_between(rho, results.describe('te', '1%'), results.describe('te', '99%'), color='b', alpha=0.05)
    except:
        print('Problem with some of the percentiles')
    plt.legend(loc=0)
    plt.xlabel('rho [$m$]')
    plt.ylabel('Te [$eV$]')
    if not title is None: plt.title(title)
    plt.savefig('Te.png')
    plt.savefig('Te.pdf')

def plot_ne(results, title=None):
    # plot the calculated ne: mean, with std deviation, 1, 10, 90 and 99%
    plt.figure()
    rho = results.describe('rho', 'mean')
    plt.plot(rho, results.describe('ne', 'mean'), 'b-', label='Mean')
    plt.plot(rho, results.describe('ne', 'mean')-results.describe('ne', 'std'), 'b--', label='+1 std deviation')
    plt.plot(rho, results.describe('ne', 'mean')+results.describe('ne', 'std'), 'b--')
    plt.fill_between(rho, results.describe('ne', 'mean')-results.describe('ne', 'std'), results.describe('ne', 'mean')+results.describe('ne', 'std'), color='b', alpha=0.2)
    try:
        plt.plot(rho, results.describe('ne', '10%'), 'b:', label='10 and 90 percentiles')
        plt.plot(rho, results.describe('ne', '90%'), 'b:')
        plt.fill_between(rho, results.describe('ne', '10%'), results.describe('ne', '90%'), color='b', alpha=0.1)
        plt.fill_between(rho, results.describe('ne', '1%'), results.describe('ne', '99%'), color='b', alpha=0.05)
    except:
        print('Problem with some of the percentiles')
    plt.legend(loc=0)
    plt.xlabel('rho [$m$]')
    plt.ylabel('ne [$m^{-3}$]')
    if not title is None: plt.title(title)
    plt.savefig('ne.png')
    plt.savefig('ne.pdf')

def plot_sobols_first(results, title=None, field='te'):
    # plot the first Sobol results
    plt.figure()
    rho = results.describe('rho', 'mean')
    for k in results.sobols_first()[field].keys(): plt.plot(rho, results.sobols_first()[field][k], label=k)
    plt.legend(loc=0)
    plt.xlabel('rho [$m$]')
    plt.ylabel('sobols_first')
    if not title is None: plt.title(field + ': ' + title)
    plt.savefig('sobols_first_%s.png' % (field))
    plt.savefig('sobols_first_%s.pdf' % (field))

def plot_sobols_second(results, title=None, field='te'):
    # plot the second Sobol results
    plt.figure()
    rho = results.describe('rho', 'mean')
    for k1 in results.sobols_second()[field].keys():
        for k2 in results.sobols_second()[field][k1].keys():
            plt.plot(rho, results.sobols_second()[field][k1][k2], label=k1+'/'+k2)
    plt.legend(loc=0, ncol=2)
    plt.xlabel('rho [$m$]')
    plt.ylabel('sobols_second')
    if not title is None: plt.title(field + ': ' + title)
    plt.savefig('sobols_second_%s.png' % (field))
    plt.savefig('sobols_second_%s.pdf' % (field))

def plot_sobols_total(results, title=None, field='te'):
    # plot the total Sobol results
    plt.figure()
    rho = results.describe('rho', 'mean')
    for k in results.sobols_total()[field].keys(): plt.plot(rho, results.sobols_total()[field][k], label=k)
    plt.legend(loc=0)
    plt.xlabel('rho [$m$]')
    plt.ylabel('sobols_total')
    if not title is None: plt.title(field + ': ' + title)
    plt.savefig('sobols_total_%s.png' % (field))
    plt.savefig('sobols_total_%s.pdf' % (field))

def plot_distribution(results, results_df, title=None):
    te_dist = results.raw_data['output_distributions']['te']
    rho_norm = results.describe('rho_norm', 'mean')
    for i in [np.maximum(0, int(i-1)) 
              for i in np.linspace(0,1,5) * rho_norm.shape]:
        plt.figure()
        pdf_raw_samples = cp.GaussianKDE(results_df.te[i])
        pdf_kde_samples = cp.GaussianKDE(te_dist.samples[i])
        plt.hist(results_df.te[i], density=True, bins=50, label='histogram of raw samples', alpha=0.25)
        if hasattr(te_dist, 'samples'):
            plt.hist(te_dist.samples[i], density=True, bins=50, label='histogram of kde samples', alpha=0.25)

        plt.plot(np.linspace(pdf_raw_samples.lower, pdf_raw_samples.upper), pdf_raw_samples.pdf(np.linspace(pdf_raw_samples.lower, pdf_raw_samples.upper)), label='PDF (raw samples)')
        plt.plot(np.linspace(pdf_kde_samples.lower, pdf_kde_samples.upper), pdf_kde_samples.pdf(np.linspace(pdf_kde_samples.lower, pdf_kde_samples.upper)), label='PDF (kde samples)')

        plt.legend(loc=0)
        plt.xlabel('Te [$eV$]')
        if title is None:
            plt.title('Distributions for rho_norm = %0.4f' % (rho_norm[i]))
        else:
            plt.title('%s\nDistributions for rho_norm = %0.4f' % (title, rho_norm[i]))
        plt.savefig('distribution_function_rho_norm=%0.4f.png' % (rho_norm[i]))
        plt.savefig('distribution_function_rho_norm=%0.4f.pdf' % (rho_norm[i]))


# In[ ]:


# Calculate the stochastic collocation expansion for a range of orders

if __name__ == '__main__':
    local = False        # if True, use local cores; if False, use SLURM
    dask = False         # if True, use DASK; if False, use a fall-back non-DASK option

    R = {}
    for sc_order in range(1, 4):
        R[sc_order] = {}
        (R[sc_order]['results_df'], 
         R[sc_order]['results'], 
         R[sc_order]['times'], 
         R[sc_order]['order'], 
         R[sc_order]['number_of_samples']) = run_sc_case(sc_order=sc_order, 
                                                           local=local, dask=dask, 
                                                           batch_size=7, use_files=False)


# In[ ]:


# save the results

if __name__ == '__main__':

    pickle.dump(R, open('collected_results.pickle','bw'))


# In[ ]:


# produce a table of the time taken for various phases
# the phases are:
#   1: creation of campaign
#   2: creation of samples
#   3: running the cases
#   4: calculation of statistics including Sobols
#   5: returning of analysed results
#   6: saving campaign and pickled results

if __name__ == '__main__':

    Timings = pd.DataFrame(np.array([R[r]['times'] for r in list(R.keys())]), 
                 columns=['Total', 'Phase 1', 'Phase 2', 'Phase 3', 'Phase 4', 'Phase 5', 'Phase 6'], 
                 index=[R[r]['order'] for r in list(R.keys())])
    Timings.to_csv(open('Timings.csv', 'w'))
    display(Timings)


# In[ ]:


# plot the convergence of the mean and standard deviation to that of the highest order

if __name__ == '__main__':
    last = -1
    O = [R[r]['order'] for r in list(R.keys())]
    if len(O[0:last]) > 0:
        plt.figure()
        plt.semilogy([o for o in O[0:last]],
                     [np.sqrt(np.mean((R[o]['results'].describe('te', 'mean') - 
                                       R[O[last]]['results'].describe('te', 'mean'))**2)) for o in O[0:last]],
                     'o-', label='mean')
        plt.semilogy([o for o in O[0:last]],
                     [np.sqrt(np.mean((R[o]['results'].describe('te', 'std') - 
                                       R[O[last]]['results'].describe('te', 'std'))**2)) for o in O[0:last]],
                     'o-', label='std')
        plt.xlabel('SC order')
        plt.ylabel('RMSerror compared to order=%s' % (O[last]))
        plt.legend(loc=0)
        plt.savefig('Convergence_mean_std.png')
        plt.savefig('Convergence_mean_std.pdf')


# In[ ]:


# plot the convergence of the first sobol to that of the highest order

if __name__ == '__main__':
    last = -1
    O = [R[r]['order'] for r in list(R.keys())]
    if len(O[0:last]) > 0:
        plt.figure()
        O = [R[r]['order'] for r in list(R.keys())]
        for v in list(R[O[last]]['results'].sobols_first('te').keys()):
            plt.semilogy([o for o in O[0:last]],
                         [np.sqrt(np.mean((R[o]['results'].sobols_first('te')[v] - 
                                           R[O[last]]['results'].sobols_first('te')[v])**2)) for o in O[0:last]],
                         'o-',
                         label=v)
        plt.xlabel('SC order')
        plt.ylabel('RMSerror for 1st sobol compared to order=%s' % (O[last]))
        plt.legend(loc=0)
        plt.savefig('Convergence_sobol_first.png')
        plt.savefig('Convergence_sobol_first.pdf')


# In[ ]:


# plot a standard set of graphs for the highest order case

if __name__ == '__main__':
    last = -1
    title = 'fusion test case with SC order = %i' % list(R.values())[last]['order']
    plot_Te(list(R.values())[last]['results'], title=title,)
    plot_ne(list(R.values())[last]['results'], title=title)
    plot_sobols_first(list(R.values())[last]['results'], title=title)
    try:
        plot_sobols_second(list(R.values())[last]['results'], title=title)
    except:
        print('Problem with sobols_second')
    try:
        plot_sobols_total(list(R.values())[last]['results'], title=title)
    except:
        print('Problem with sobols_total')
    try:
        plot_distribution(list(R.values())[last]['results'], list(R.values())[last]['results_df'], title=title)
    except:
        print('Problem with distribution')
    plot_sobols_first(list(R.values())[last]['results'], title=title, field='ne')
    try:
        plot_sobols_second(list(R.values())[last]['results'], title=title, field='ne')
    except:
        print('Problem with sobols_second')
    try:
        plot_sobols_total(list(R.values())[last]['results'], title=title, field='ne')
    except:
        print('Problem with sobols_total')


# In[ ]:


# prepare the test data

if __name__ == '__main__':

    test_campaign = uq.Campaign(name='fusion_pce.')        

    # Add the app (automatically set as current app)
    test_campaign.add_app(name="fusion", params=define_params(), 
                          actions=uq.actions.Actions(uq.actions.ExecutePython(run_fusion_model)))

    # Associate a sampler with the campaign
    test_campaign.set_sampler(uq.sampling.quasirandom.LHCSampler(vary=define_vary(), count=100))

    # Perform the actions
    test_campaign.execute(nsamples=1000).collate(progress_bar=True)

    # Collate the results
    test_df = test_campaign.get_collation_result()


# In[ ]:


# calculate the SC surrogates
if __name__ == '__main__':

    test_points = test_df[test_campaign.get_active_sampler().vary.get_keys()]
    test_results = test_df['te'].values
    test_predictions = {}
    for i in list(R.keys()):
        test_predictions[i] = np.squeeze(np.array(R[i]['results'].surrogate()(test_points)['te']))


# In[ ]:


# plot the performance of the SC surrogates
if __name__ == '__main__':

    for i in list(R.keys()):
        plt.semilogy(R[i]['results'].describe('rho', 'mean'), 
                 np.sqrt(((test_predictions[i] - test_results)**2).mean(axis=0)) / test_results.mean(axis=0), 
                 label='SC order %s with %s samples' % (R[i]['order'], R[i]['number_of_samples']))
    plt.xlabel('rho [m]') ; plt.ylabel('fractional RMS for predicted Te') ; plt.legend(loc=0)
    plt.title('Performance of the SC surrogate')
    plt.savefig('SC_surrogate.png')
    plt.savefig('SC_surrogate.pdf')


# In[ ]:


# plot the convergence of the surrogate based on 1000 random points
if __name__ == '__main__':
    _o = []
    _RMS = []
    for r in R.values():
        _RMS.append((np.sqrt((((test_predictions[r['order']] - test_results) / test_results)**2).mean())))
        _o.append(r['order'])

    plt.figure()
    plt.semilogy(_o, _RMS, 'o-')
    plt.xlabel('SC order')
    plt.ylabel('fractional RMS error for the SC surrogate')
    plt.legend(loc=0)
    plt.savefig('Convergence_SC_surrogate.png')
    plt.savefig('Convergence_SC_surrogate.pdf')