#!/usr/bin/env python # coding: utf-8 # In[1]: import os import numpy as np import pandas as pd import matplotlib.pyplot as plt pd.options.display.max_columns = 99 # In[2]: os.chdir('..') # change to root directory # In[3]: # Load data import pickle from firenet.util import add_uncertainty_features with open('./data/d_data.pkl', 'rb') as infile: d_data = pickle.load(infile) d_data = add_uncertainty_features(d_data) d_data.keys() # # Single train/test split # In[4]: from firenet.ml import RegUncPredictor pred = RegUncPredictor(d_data) pred.preprocess() pred.train_regressor() # In[5]: pred.train_uncertainty() # In[6]: pred.reg.test() # In[7]: pred.unc.test() # In[8]: y_t, y_p, y_perr = pred.get_target_set() # In[9]: # Storing and loading models # from firenet.ml import ModelStore # ModelStore().store(pred, name='nnet') # Save to './models/nnet.pkl' by default # pred = ModelStore().load(d_data, name='nnet') # Load "nnet" model # In[10]: from firenet.plotting.truevspred import TrueVSPredPlotter firbands = d_data['fullbay'].columns[-6:] idx = y_t.index y_terr = d_data['fullbayerr'].loc[idx, firbands].divide(d_data['fullbay'].loc[idx] * np.log(10), axis=0) tvpplot = TrueVSPredPlotter(figsize=(12.8, 8.8)) tvpplot.create_panels(nrows=1) panel = tvpplot.get_panel(0) panel.stylized_plot(y_t, y_p, y_terr=y_terr, y_perr=y_perr, style='firflux') # # Full set predictor # Using a 4-fold train/test split, training 4 predictors. Each galaxy is used as a test sample once. # In[11]: from firenet.ml.fullsetpredictor import FullSetPredictor fspred = FullSetPredictor(d_data) # In[12]: fspred.prepare_splits(shuffle_state=123) # In[13]: fspred.train() # In[14]: # Storing and loading models # from firenet.ml.modelstore import ModelStore # ModelStore().store(fspred, name='fsnnet') # Save to './models/fsnnet.pkl' by default # fspred = ModelStore().load(d_data, name='fsnnet') # Load "fsnnet" model # In[15]: y_t, y_p, y_perr = fspred.get_combined_test() # In[16]: # See paper Fig. 3 from firenet.plotting.truevspred import TrueVSPredPlotter firbands = d_data['fullbay'].columns[-6:] idx = y_t.index y_terr = d_data['fullbayerr'].loc[idx, firbands].divide(d_data['fullbay'].loc[idx] * np.log(10), axis=0) tvpplot = TrueVSPredPlotter(figsize=(12.8, 8.8)) tvpplot.create_panels(nrows=1) panel = tvpplot.get_panel(0) panel.stylized_plot(y_t, y_p, y_terr=y_terr, y_perr=y_perr, style='firflux') # # Use all (DustPedia + H-ATLAS) data into single model # This model can then be used for other data sets (see notebook `04_predicting.ipynb`) # In[17]: # Use all data for training and testing (no longer unbiased test set) idx_tot = d_data['fullbay'].index.values.copy() np.random.seed(123) np.random.shuffle(idx_tot) pred = RegUncPredictor(d_data) pred.preprocess(idx_train=idx_tot, idx_test=idx_tot) pred.train_regressor() # In[18]: pred.train_uncertainty() # In[19]: pred.reg.test(tset='train') # In[20]: # Store all data model # ModelStore().store(pred, name='nnet_alldata')