#!/usr/bin/env python # coding: utf-8 # # Run the fusion EasyVVUQ campaign # # Run an EasyVVUQ campaign to analyze the sensitivity of the temperature # profile predicted by a simplified model of heat conduction in a # tokamak plasma. # # This is done with SC. # In[ ]: # import packages that we will use import os import easyvvuq as uq import chaospy as cp import pickle import time import numpy as np import pandas as pd import matplotlib if not os.getenv("DISPLAY"): matplotlib.use('Agg') import matplotlib.pylab as plt from IPython.display import display get_ipython().run_line_magic('matplotlib', 'inline') # In[ ]: # we need fipy -- install if not already available try: import fipy except ModuleNotFoundError: get_ipython().system(' pip install future') get_ipython().system(' pip install fipy') import fipy # In[ ]: # routine to write out (if needed) the fusion .template file def write_template(params): str = "" first = True for k in params.keys(): if first: str += '{"%s": "$%s"' % (k,k) ; first = False else: str += ', "%s": "$%s"' % (k,k) str += '}' print(str, file=open('fusion.template','w')) # In[ ]: # define parameters of the fusion model def define_params(): return { "Qe_tot": {"type": "float", "min": 1.0e6, "max": 50.0e6, "default": 2e6}, "H0": {"type": "float", "min": 0.00, "max": 1.0, "default": 0}, "Hw": {"type": "float", "min": 0.01, "max": 100.0, "default": 0.1}, "Te_bc": {"type": "float", "min": 10.0, "max": 1000.0, "default": 100}, "chi": {"type": "float", "min": 0.01, "max": 100.0, "default": 1}, "a0": {"type": "float", "min": 0.2, "max": 10.0, "default": 1}, "R0": {"type": "float", "min": 0.5, "max": 20.0, "default": 3}, "E0": {"type": "float", "min": 1.0, "max": 10.0, "default": 1.5}, "b_pos": {"type": "float", "min": 0.95, "max": 0.99, "default": 0.98}, "b_height": {"type": "float", "min": 3e19, "max": 10e19, "default": 6e19}, "b_sol": {"type": "float", "min": 2e18, "max": 3e19, "default": 2e19}, "b_width": {"type": "float", "min": 0.005, "max": 0.025, "default": 0.01}, "b_slope": {"type": "float", "min": 0.0, "max": 0.05, "default": 0.01}, "nr": {"type": "integer", "min": 10, "max": 1000, "default": 100}, "dt": {"type": "float", "min": 1e-3, "max": 1e3, "default": 100}, "out_file": {"type": "string", "default": "output.csv"} } # In[ ]: # define varying quantities def define_vary(): vary_all = { "Qe_tot": cp.Uniform(1.8e6, 2.2e6), "H0": cp.Uniform(0.0, 0.2), "Hw": cp.Uniform(0.1, 0.5), "chi": cp.Uniform(0.8, 1.2), "Te_bc": cp.Uniform(80.0, 120.0), "a0": cp.Uniform(0.9, 1.1), "R0": cp.Uniform(2.7, 3.3), "E0": cp.Uniform(1.4, 1.6), "b_pos": cp.Uniform(0.95, 0.99), "b_height": cp.Uniform(5e19, 7e19), "b_sol": cp.Uniform(1e19, 3e19), "b_width": cp.Uniform(0.015, 0.025), "b_slope": cp.Uniform(0.005, 0.020) } vary_2 = { "Qe_tot": cp.Uniform(1.8e6, 2.2e6), "Te_bc": cp.Uniform(80.0, 120.0) } vary_5 = { "Qe_tot": cp.Uniform(1.8e6, 2.2e6), "H0": cp.Uniform(0.0, 0.2), "Hw": cp.Uniform(0.1, 0.5), "chi": cp.Uniform(0.8, 1.2), "Te_bc": cp.Uniform(80.0, 120.0) } vary_10 = { "Qe_tot": cp.Uniform(1.8e6, 2.2e6), "H0": cp.Uniform(0.0, 0.2), "Hw": cp.Uniform(0.1, 0.5), "chi": cp.Uniform(0.8, 1.2), "Te_bc": cp.Uniform(80.0, 120.0), "b_pos": cp.Uniform(0.95, 0.99), "b_height": cp.Uniform(5e19, 7e19), "b_sol": cp.Uniform(1e19, 3e19), "b_width": cp.Uniform(0.015, 0.025), "b_slope": cp.Uniform(0.005, 0.020) } return vary_10 # In[ ]: # define a model to run the fusion code directly from python, expecting a dictionary and returning a dictionary def run_fusion_model(input): import json import fusion qois = ["te", "ne", "rho", "rho_norm"] del input['out_file'] return {q: v for q,v in zip(qois, [t.tolist() for t in fusion.solve_Te(**input, plots=False, output=False)])} # In[ ]: # routine to run a SC campaign def run_sc_case(sc_order=2, local=True, dask=True, batch_size=os.cpu_count(), use_files=True): """ Inputs: sc_order: order of the sc expansion local: if using Dask, whether to use the local option (True) dask: whether to use dask (True) batch_size: for the non Dask option, number of cases to run in parallel (16) Outputs: results_df: Pandas dataFrame containing inputs to and output from the model results: Results of the sc analysis times: Information about the elapsed time for the various phases of the calculation sc_order: sc_order count: number of sc samples """ if dask: if local: print('Running locally') import multiprocessing.popen_spawn_posix from dask.distributed import Client, LocalCluster cluster = LocalCluster(threads_per_worker=1) client = Client(cluster) # processes=True, threads_per_worker=1) else: print('Running using SLURM') from dask.distributed import Client from dask_jobqueue import SLURMCluster cluster = SLURMCluster( job_extra=['--qos=p.tok.openmp.2h', '--mail-type=end', '--mail-user=dpc@rzg.mpg.de', '-t 2:00:00'], queue='p.tok.openmp', cores=8, memory='8 GB', processes=8) cluster.scale(32) print(cluster) print(cluster.job_script()) client = Client(cluster) print(client) else: import concurrent.futures # client = concurrent.futures.ProcessPoolExecutor(max_workers=batch_size) client = concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) # client = None times = np.zeros(7) time_start = time.time() time_start_whole = time_start # Set up a fresh campaign called "fusion_sc." my_campaign = uq.Campaign(name='fusion_sc.') # Define parameter space params = define_params() # Create an encoder and decoder for sc test app if use_files: encoder = uq.encoders.GenericEncoder(template_fname='fusion.template', delimiter='$', target_filename='fusion_in.json') decoder = uq.decoders.SimpleCSV(target_filename="output.csv", output_columns=["te", "ne", "rho", "rho_norm"]) execute = uq.actions.ExecuteLocal('python3 %s/fusion_model.py fusion_in.json' % (os.getcwd())) actions = uq.actions.Actions(uq.actions.CreateRunDirectory('/tmp'), uq.actions.Encode(encoder), execute, uq.actions.Decode(decoder)) else: actions = uq.actions.Actions(uq.actions.ExecutePython(run_fusion_model)) # Add the app (automatically set as current app) my_campaign.add_app(name="fusion", params=params, actions=actions) time_end = time.time() times[1] = time_end-time_start print('Time for phase 1 = %.3f' % (times[1])) time_start = time.time() # Associate a sampler with the campaign my_campaign.set_sampler(uq.sampling.SCSampler(vary=define_vary(), polynomial_order=sc_order)) my_campaign.draw_samples() print('Number of samples = %s' % my_campaign.get_active_sampler().count) time_end = time.time() times[2] = time_end-time_start print('Time for phase 2 = %.3f' % (times[2])) time_start = time.time() # Perform the actions my_campaign.execute(pool=client).collate(progress_bar=True) if dask: client.close() client.shutdown() time_end = time.time() times[3] = time_end-time_start print('Time for phase 3 = %.3f' % (times[3])) time_start = time.time() # Collate the results results_df = my_campaign.get_collation_result() time_end = time.time() times[4] = time_end-time_start print('Time for phase 4 = %.3f' % (times[4])) time_start = time.time() # Post-processing analysis results = my_campaign.analyse(qoi_cols=["te", "ne", "rho", "rho_norm"]) time_end = time.time() times[5] = time_end-time_start print('Time for phase 5 = %.3f' % (times[5])) time_start = time.time() # Save the results pickle.dump(results, open('fusion_results.pickle','bw')) time_end = time.time() times[6] = time_end-time_start print('Time for phase 6 = %.3f' % (times[6])) times[0] = time_end - time_start_whole return results_df, results, times, sc_order, my_campaign.get_active_sampler().count # In[ ]: # routines for plotting the results def plot_Te(results, title=None): # plot the calculated Te: mean, with std deviation, 1, 10, 90 and 99% plt.figure() rho = results.describe('rho', 'mean') plt.plot(rho, results.describe('te', 'mean'), 'b-', label='Mean') plt.plot(rho, results.describe('te', 'mean')-results.describe('te', 'std'), 'b--', label='+1 std deviation') plt.plot(rho, results.describe('te', 'mean')+results.describe('te', 'std'), 'b--') plt.fill_between(rho, results.describe('te', 'mean')-results.describe('te', 'std'), results.describe('te', 'mean')+results.describe('te', 'std'), color='b', alpha=0.2) try: plt.plot(rho, results.describe('te', '10%'), 'b:', label='10 and 90 percentiles') plt.plot(rho, results.describe('te', '90%'), 'b:') plt.fill_between(rho, results.describe('te', '10%'), results.describe('te', '90%'), color='b', alpha=0.1) plt.fill_between(rho, results.describe('te', '1%'), results.describe('te', '99%'), color='b', alpha=0.05) except: print('Problem with some of the percentiles') plt.legend(loc=0) plt.xlabel('rho [$m$]') plt.ylabel('Te [$eV$]') if not title is None: plt.title(title) plt.savefig('Te.png') plt.savefig('Te.pdf') def plot_ne(results, title=None): # plot the calculated ne: mean, with std deviation, 1, 10, 90 and 99% plt.figure() rho = results.describe('rho', 'mean') plt.plot(rho, results.describe('ne', 'mean'), 'b-', label='Mean') plt.plot(rho, results.describe('ne', 'mean')-results.describe('ne', 'std'), 'b--', label='+1 std deviation') plt.plot(rho, results.describe('ne', 'mean')+results.describe('ne', 'std'), 'b--') plt.fill_between(rho, results.describe('ne', 'mean')-results.describe('ne', 'std'), results.describe('ne', 'mean')+results.describe('ne', 'std'), color='b', alpha=0.2) try: plt.plot(rho, results.describe('ne', '10%'), 'b:', label='10 and 90 percentiles') plt.plot(rho, results.describe('ne', '90%'), 'b:') plt.fill_between(rho, results.describe('ne', '10%'), results.describe('ne', '90%'), color='b', alpha=0.1) plt.fill_between(rho, results.describe('ne', '1%'), results.describe('ne', '99%'), color='b', alpha=0.05) except: print('Problem with some of the percentiles') plt.legend(loc=0) plt.xlabel('rho [$m$]') plt.ylabel('ne [$m^{-3}$]') if not title is None: plt.title(title) plt.savefig('ne.png') plt.savefig('ne.pdf') def plot_sobols_first(results, title=None, field='te'): # plot the first Sobol results plt.figure() rho = results.describe('rho', 'mean') for k in results.sobols_first()[field].keys(): plt.plot(rho, results.sobols_first()[field][k], label=k) plt.legend(loc=0) plt.xlabel('rho [$m$]') plt.ylabel('sobols_first') if not title is None: plt.title(field + ': ' + title) plt.savefig('sobols_first_%s.png' % (field)) plt.savefig('sobols_first_%s.pdf' % (field)) def plot_sobols_second(results, title=None, field='te'): # plot the second Sobol results plt.figure() rho = results.describe('rho', 'mean') for k1 in results.sobols_second()[field].keys(): for k2 in results.sobols_second()[field][k1].keys(): plt.plot(rho, results.sobols_second()[field][k1][k2], label=k1+'/'+k2) plt.legend(loc=0, ncol=2) plt.xlabel('rho [$m$]') plt.ylabel('sobols_second') if not title is None: plt.title(field + ': ' + title) plt.savefig('sobols_second_%s.png' % (field)) plt.savefig('sobols_second_%s.pdf' % (field)) def plot_sobols_total(results, title=None, field='te'): # plot the total Sobol results plt.figure() rho = results.describe('rho', 'mean') for k in results.sobols_total()[field].keys(): plt.plot(rho, results.sobols_total()[field][k], label=k) plt.legend(loc=0) plt.xlabel('rho [$m$]') plt.ylabel('sobols_total') if not title is None: plt.title(field + ': ' + title) plt.savefig('sobols_total_%s.png' % (field)) plt.savefig('sobols_total_%s.pdf' % (field)) def plot_distribution(results, results_df, title=None): te_dist = results.raw_data['output_distributions']['te'] rho_norm = results.describe('rho_norm', 'mean') for i in [np.maximum(0, int(i-1)) for i in np.linspace(0,1,5) * rho_norm.shape]: plt.figure() pdf_raw_samples = cp.GaussianKDE(results_df.te[i]) pdf_kde_samples = cp.GaussianKDE(te_dist.samples[i]) plt.hist(results_df.te[i], density=True, bins=50, label='histogram of raw samples', alpha=0.25) if hasattr(te_dist, 'samples'): plt.hist(te_dist.samples[i], density=True, bins=50, label='histogram of kde samples', alpha=0.25) plt.plot(np.linspace(pdf_raw_samples.lower, pdf_raw_samples.upper), pdf_raw_samples.pdf(np.linspace(pdf_raw_samples.lower, pdf_raw_samples.upper)), label='PDF (raw samples)') plt.plot(np.linspace(pdf_kde_samples.lower, pdf_kde_samples.upper), pdf_kde_samples.pdf(np.linspace(pdf_kde_samples.lower, pdf_kde_samples.upper)), label='PDF (kde samples)') plt.legend(loc=0) plt.xlabel('Te [$eV$]') if title is None: plt.title('Distributions for rho_norm = %0.4f' % (rho_norm[i])) else: plt.title('%s\nDistributions for rho_norm = %0.4f' % (title, rho_norm[i])) plt.savefig('distribution_function_rho_norm=%0.4f.png' % (rho_norm[i])) plt.savefig('distribution_function_rho_norm=%0.4f.pdf' % (rho_norm[i])) # In[ ]: # Calculate the stochastic collocation expansion for a range of orders if __name__ == '__main__': local = False # if True, use local cores; if False, use SLURM dask = False # if True, use DASK; if False, use a fall-back non-DASK option R = {} for sc_order in range(1, 4): R[sc_order] = {} (R[sc_order]['results_df'], R[sc_order]['results'], R[sc_order]['times'], R[sc_order]['order'], R[sc_order]['number_of_samples']) = run_sc_case(sc_order=sc_order, local=local, dask=dask, batch_size=7, use_files=False) # In[ ]: # save the results if __name__ == '__main__': pickle.dump(R, open('collected_results.pickle','bw')) # In[ ]: # produce a table of the time taken for various phases # the phases are: # 1: creation of campaign # 2: creation of samples # 3: running the cases # 4: calculation of statistics including Sobols # 5: returning of analysed results # 6: saving campaign and pickled results if __name__ == '__main__': Timings = pd.DataFrame(np.array([R[r]['times'] for r in list(R.keys())]), columns=['Total', 'Phase 1', 'Phase 2', 'Phase 3', 'Phase 4', 'Phase 5', 'Phase 6'], index=[R[r]['order'] for r in list(R.keys())]) Timings.to_csv(open('Timings.csv', 'w')) display(Timings) # In[ ]: # plot the convergence of the mean and standard deviation to that of the highest order if __name__ == '__main__': last = -1 O = [R[r]['order'] for r in list(R.keys())] if len(O[0:last]) > 0: plt.figure() plt.semilogy([o for o in O[0:last]], [np.sqrt(np.mean((R[o]['results'].describe('te', 'mean') - R[O[last]]['results'].describe('te', 'mean'))**2)) for o in O[0:last]], 'o-', label='mean') plt.semilogy([o for o in O[0:last]], [np.sqrt(np.mean((R[o]['results'].describe('te', 'std') - R[O[last]]['results'].describe('te', 'std'))**2)) for o in O[0:last]], 'o-', label='std') plt.xlabel('SC order') plt.ylabel('RMSerror compared to order=%s' % (O[last])) plt.legend(loc=0) plt.savefig('Convergence_mean_std.png') plt.savefig('Convergence_mean_std.pdf') # In[ ]: # plot the convergence of the first sobol to that of the highest order if __name__ == '__main__': last = -1 O = [R[r]['order'] for r in list(R.keys())] if len(O[0:last]) > 0: plt.figure() O = [R[r]['order'] for r in list(R.keys())] for v in list(R[O[last]]['results'].sobols_first('te').keys()): plt.semilogy([o for o in O[0:last]], [np.sqrt(np.mean((R[o]['results'].sobols_first('te')[v] - R[O[last]]['results'].sobols_first('te')[v])**2)) for o in O[0:last]], 'o-', label=v) plt.xlabel('SC order') plt.ylabel('RMSerror for 1st sobol compared to order=%s' % (O[last])) plt.legend(loc=0) plt.savefig('Convergence_sobol_first.png') plt.savefig('Convergence_sobol_first.pdf') # In[ ]: # plot a standard set of graphs for the highest order case if __name__ == '__main__': last = -1 title = 'fusion test case with SC order = %i' % list(R.values())[last]['order'] plot_Te(list(R.values())[last]['results'], title=title,) plot_ne(list(R.values())[last]['results'], title=title) plot_sobols_first(list(R.values())[last]['results'], title=title) try: plot_sobols_second(list(R.values())[last]['results'], title=title) except: print('Problem with sobols_second') try: plot_sobols_total(list(R.values())[last]['results'], title=title) except: print('Problem with sobols_total') try: plot_distribution(list(R.values())[last]['results'], list(R.values())[last]['results_df'], title=title) except: print('Problem with distribution') plot_sobols_first(list(R.values())[last]['results'], title=title, field='ne') try: plot_sobols_second(list(R.values())[last]['results'], title=title, field='ne') except: print('Problem with sobols_second') try: plot_sobols_total(list(R.values())[last]['results'], title=title, field='ne') except: print('Problem with sobols_total') # In[ ]: # prepare the test data if __name__ == '__main__': test_campaign = uq.Campaign(name='fusion_pce.') # Add the app (automatically set as current app) test_campaign.add_app(name="fusion", params=define_params(), actions=uq.actions.Actions(uq.actions.ExecutePython(run_fusion_model))) # Associate a sampler with the campaign test_campaign.set_sampler(uq.sampling.quasirandom.LHCSampler(vary=define_vary(), count=100)) # Perform the actions test_campaign.execute(nsamples=1000).collate(progress_bar=True) # Collate the results test_df = test_campaign.get_collation_result() # In[ ]: # calculate the SC surrogates if __name__ == '__main__': test_points = test_df[test_campaign.get_active_sampler().vary.get_keys()] test_results = test_df['te'].values test_predictions = {} for i in list(R.keys()): test_predictions[i] = np.squeeze(np.array(R[i]['results'].surrogate()(test_points)['te'])) # In[ ]: # plot the performance of the SC surrogates if __name__ == '__main__': for i in list(R.keys()): plt.semilogy(R[i]['results'].describe('rho', 'mean'), np.sqrt(((test_predictions[i] - test_results)**2).mean(axis=0)) / test_results.mean(axis=0), label='SC order %s with %s samples' % (R[i]['order'], R[i]['number_of_samples'])) plt.xlabel('rho [m]') ; plt.ylabel('fractional RMS for predicted Te') ; plt.legend(loc=0) plt.title('Performance of the SC surrogate') plt.savefig('SC_surrogate.png') plt.savefig('SC_surrogate.pdf') # In[ ]: # plot the convergence of the surrogate based on 1000 random points if __name__ == '__main__': _o = [] _RMS = [] for r in R.values(): _RMS.append((np.sqrt((((test_predictions[r['order']] - test_results) / test_results)**2).mean()))) _o.append(r['order']) plt.figure() plt.semilogy(_o, _RMS, 'o-') plt.xlabel('SC order') plt.ylabel('fractional RMS error for the SC surrogate') plt.legend(loc=0) plt.savefig('Convergence_SC_surrogate.png') plt.savefig('Convergence_SC_surrogate.pdf')