Nathanaël Perraudin, Michaël Defferrard, Tomasz Kacprzak, Raphael Sgier
This notebook shows how to regress (multiple) parameters from (multiple) input channels (spherical maps). It doesn't use real cosmological data.
%load_ext autoreload
%autoreload 2
%matplotlib inline
import os
import shutil
# Run on CPU.
os.environ["CUDA_VISIBLE_DEVICES"] = ""
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import healpy as hp
import tensorflow as tf
from deepsphere import models, experiment_helper, plot
from deepsphere.data import LabeledDataset
from deepsphere.utils import HiddenPrints
plt.rcParams['figure.figsize'] = (17, 5)
EXP_NAME = 'regression'
Create a dataset by filtering random noise with different Gaussian filters.
Nside = 32
Nsamples = 1000
def arcmin2rad(x):
return x / 60 / 360 * 2 * np.pi
def gaussian_smoothing(sig, sigma, nest=True):
if nest:
sig = hp.reorder(sig, n2r=True)
smooth = hp.sphtfunc.smoothing(sig, sigma=sigma)
if nest:
smooth = hp.reorder(smooth, r2n=True)
smooth /= np.linalg.norm(smooth)
return smooth
x = np.random.randn(Nsamples, hp.nside2npix(Nside))
t = np.random.rand(Nsamples, 2) + 0.5
with HiddenPrints():
xs1 = np.array(list(map(gaussian_smoothing, x, t[:, 0] / Nside)))
xs2 = np.array(list(map(gaussian_smoothing, x, t[:, 1] / Nside)))
xs = np.stack((xs1, xs2), axis=2)
Plot the spherical map before and after smoothing.
idx = 0
cm = plt.cm.RdBu_r
cm.set_under('w')
hp.mollview(x[idx], title='Gaussian noise', nest=True, cmap=cm)
hp.mollview(xs[idx, :, 0], title='Gaussian noise smoothed with sigma={:.2f}'.format(t[idx, 0]), nest=True, cmap=cm)
# Normalize and transform the data, i.e. extract features.
x_raw = xs / np.mean(xs**2) # Apply some normalization (We do not want to affect the mean)
# Create the label vector.
labels = t - 0.5
# Random train / test split.
ntrain = int(0.8 * Nsamples)
x_raw_train = x_raw[:ntrain]
x_raw_test = x_raw[ntrain:]
labels_train = labels[:ntrain]
labels_test = labels[ntrain:]
params = dict()
params['dir_name'] = EXP_NAME
# Types of layers.
params['conv'] = 'chebyshev5' # Graph convolution: chebyshev5 or monomials.
params['pool'] = 'max' # Pooling: max or average.
params['activation'] = 'relu' # Non-linearity: relu, elu, leaky_relu, softmax, tanh, etc.
params['statistics'] = None # Statistics (for invariance): None, mean, var, meanvar, hist.
# Architecture.
params['statistics'] = 'mean' # For predictions to be invariant to rotation.
params['nsides'] = [Nside, Nside//2, Nside//4, Nside//8] # Pooling: number of pixels per layer.
params['F'] = [16, 32, 64] # Graph convolutional layers: number of feature maps.
params['M'] = [64, 2] # Fully connected layers: output dimensionalities. Predict two parameters.
params['input_channel'] = 2 # Two channels (spherical maps) per sample.
params['loss'] = 'l2' # Regression loss.
params['K'] = [5] * len(params['F']) # Polynomial orders.
params['batch_norm'] = [True] * len(params['F']) # Batch normalization.
# Regularization.
params['regularization'] = 0 # Amount of L2 regularization over the weights (will be divided by the number of weights).
params['dropout'] = 1 # Percentage of neurons to keep.
# Training.
params['num_epochs'] = 10 # Number of passes through the training data.
params['batch_size'] = 16 # Number of samples per training batch. Should be a power of 2 for greater speed.
params['eval_frequency'] = 15 # Frequency of model evaluations during training (influence training time).
params['scheduler'] = lambda step: 1e-4 # Constant learning rate.
params['optimizer'] = lambda lr: tf.train.GradientDescentOptimizer(lr)
#params['optimizer'] = lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5)
#params['optimizer'] = lambda lr: tf.train.AdamOptimizer(lr, beta1=0.9, beta2=0.99, epsilon=1e-8)
model = models.deepsphere(**params)
NN architecture input: M_0 = 12288 layer 1: cgconv1 representation: M_0 * F_1 / p_1 = 12288 * 16 / 4 = 49152 weights: F_0 * F_1 * K_1 = 2 * 16 * 5 = 160 biases: F_1 = 16 batch normalization layer 2: cgconv2 representation: M_1 * F_2 / p_2 = 3072 * 32 / 4 = 24576 weights: F_1 * F_2 * K_2 = 16 * 32 * 5 = 2560 biases: F_2 = 32 batch normalization layer 3: cgconv3 representation: M_2 * F_3 / p_3 = 768 * 64 / 4 = 12288 weights: F_2 * F_3 * K_3 = 32 * 64 * 5 = 10240 biases: F_3 = 64 batch normalization Statistical layer: mean representation: 1 * 64 = 64 layer 4: fc1 representation: M_4 = 64 weights: M_3 * M_4 = 64 * 64 = 4096 biases: M_4 = 64 layer 5: fc2 representation: M_5 = 2 weights: M_4 * M_5 = 64 * 2 = 128
# Cleanup before running again.
shutil.rmtree('summaries/{}/'.format(EXP_NAME), ignore_errors=True)
shutil.rmtree('checkpoints/{}/'.format(EXP_NAME), ignore_errors=True)
training = LabeledDataset(x_raw_train, labels_train)
testing = LabeledDataset(x_raw_test, labels_test)
accuracy_validation, loss_validation, loss_training, t_step = model.fit(training, testing)
step 15 / 500 (epoch 0.30 / 10): learning_rate = 1.00e-04, training loss = 1.04e+00 validation loss: 1.80e+00 CPU time: 162s, wall time: 14s step 30 / 500 (epoch 0.60 / 10): learning_rate = 1.00e-04, training loss = 5.58e-01 validation loss: 1.43e+00 CPU time: 324s, wall time: 26s step 45 / 500 (epoch 0.90 / 10): learning_rate = 1.00e-04, training loss = 5.38e-01 validation loss: 1.30e+00 CPU time: 486s, wall time: 37s step 60 / 500 (epoch 1.20 / 10): learning_rate = 1.00e-04, training loss = 6.24e-01 validation loss: 1.26e+00 CPU time: 647s, wall time: 48s step 75 / 500 (epoch 1.50 / 10): learning_rate = 1.00e-04, training loss = 3.10e-01 validation loss: 1.20e+00 CPU time: 807s, wall time: 59s step 90 / 500 (epoch 1.80 / 10): learning_rate = 1.00e-04, training loss = 3.11e-01 validation loss: 1.16e+00 CPU time: 970s, wall time: 70s step 105 / 500 (epoch 2.10 / 10): learning_rate = 1.00e-04, training loss = 2.80e-01 validation loss: 1.14e+00 CPU time: 1131s, wall time: 81s step 120 / 500 (epoch 2.40 / 10): learning_rate = 1.00e-04, training loss = 4.30e-01 validation loss: 1.12e+00 CPU time: 1293s, wall time: 92s step 135 / 500 (epoch 2.70 / 10): learning_rate = 1.00e-04, training loss = 4.26e-01 validation loss: 1.13e+00 CPU time: 1460s, wall time: 103s step 150 / 500 (epoch 3.00 / 10): learning_rate = 1.00e-04, training loss = 2.24e-01 validation loss: 1.10e+00 CPU time: 1623s, wall time: 115s step 165 / 500 (epoch 3.30 / 10): learning_rate = 1.00e-04, training loss = 2.93e-01 validation loss: 1.08e+00 CPU time: 1786s, wall time: 126s step 180 / 500 (epoch 3.60 / 10): learning_rate = 1.00e-04, training loss = 2.20e-01 validation loss: 1.07e+00 CPU time: 1947s, wall time: 137s step 195 / 500 (epoch 3.90 / 10): learning_rate = 1.00e-04, training loss = 2.08e-01 validation loss: 1.06e+00 CPU time: 2107s, wall time: 148s step 210 / 500 (epoch 4.20 / 10): learning_rate = 1.00e-04, training loss = 2.60e-01 validation loss: 1.06e+00 CPU time: 2270s, wall time: 159s step 225 / 500 (epoch 4.50 / 10): learning_rate = 1.00e-04, training loss = 1.62e-01 validation loss: 1.04e+00 CPU time: 2434s, wall time: 170s step 240 / 500 (epoch 4.80 / 10): learning_rate = 1.00e-04, training loss = 1.69e-01 validation loss: 1.00e+00 CPU time: 2595s, wall time: 181s step 255 / 500 (epoch 5.10 / 10): learning_rate = 1.00e-04, training loss = 1.70e-01 validation loss: 1.00e+00 CPU time: 2759s, wall time: 192s step 270 / 500 (epoch 5.40 / 10): learning_rate = 1.00e-04, training loss = 3.43e-01 validation loss: 9.98e-01 CPU time: 2924s, wall time: 204s step 285 / 500 (epoch 5.70 / 10): learning_rate = 1.00e-04, training loss = 2.94e-01 validation loss: 1.01e+00 CPU time: 3085s, wall time: 215s step 300 / 500 (epoch 6.00 / 10): learning_rate = 1.00e-04, training loss = 1.34e-01 validation loss: 1.00e+00 CPU time: 3246s, wall time: 226s step 315 / 500 (epoch 6.30 / 10): learning_rate = 1.00e-04, training loss = 1.83e-01 validation loss: 9.85e-01 CPU time: 3408s, wall time: 237s step 330 / 500 (epoch 6.60 / 10): learning_rate = 1.00e-04, training loss = 1.46e-01 validation loss: 9.83e-01 CPU time: 3571s, wall time: 248s step 345 / 500 (epoch 6.90 / 10): learning_rate = 1.00e-04, training loss = 1.37e-01 validation loss: 9.71e-01 CPU time: 3722s, wall time: 259s step 360 / 500 (epoch 7.20 / 10): learning_rate = 1.00e-04, training loss = 1.49e-01 validation loss: 9.82e-01 CPU time: 3881s, wall time: 270s step 375 / 500 (epoch 7.50 / 10): learning_rate = 1.00e-04, training loss = 1.08e-01 validation loss: 9.63e-01 CPU time: 4037s, wall time: 280s step 390 / 500 (epoch 7.80 / 10): learning_rate = 1.00e-04, training loss = 1.22e-01 validation loss: 9.29e-01 CPU time: 4199s, wall time: 292s step 405 / 500 (epoch 8.10 / 10): learning_rate = 1.00e-04, training loss = 1.33e-01 validation loss: 9.37e-01 CPU time: 4360s, wall time: 302s step 420 / 500 (epoch 8.40 / 10): learning_rate = 1.00e-04, training loss = 2.94e-01 validation loss: 9.35e-01 CPU time: 4521s, wall time: 314s step 435 / 500 (epoch 8.70 / 10): learning_rate = 1.00e-04, training loss = 2.24e-01 validation loss: 9.54e-01 CPU time: 4684s, wall time: 325s step 450 / 500 (epoch 9.00 / 10): learning_rate = 1.00e-04, training loss = 9.48e-02 validation loss: 9.43e-01 CPU time: 4845s, wall time: 336s step 465 / 500 (epoch 9.30 / 10): learning_rate = 1.00e-04, training loss = 1.42e-01 validation loss: 9.30e-01 CPU time: 5008s, wall time: 347s step 480 / 500 (epoch 9.60 / 10): learning_rate = 1.00e-04, training loss = 1.13e-01 validation loss: 9.33e-01 CPU time: 5168s, wall time: 358s step 495 / 500 (epoch 9.90 / 10): learning_rate = 1.00e-04, training loss = 1.07e-01 validation loss: 9.22e-01 CPU time: 5333s, wall time: 369s step 500 / 500 (epoch 10.00 / 10): learning_rate = 1.00e-04, training loss = 8.68e-02 validation loss: 9.30e-01 CPU time: 5410s, wall time: 374s
plot.plot_loss(loss_training, loss_validation, t_step, params['eval_frequency'])
pred_test = model.predict(x_raw_test)
pred_train = model.predict(x_raw_train)
/mnt/scratch/lts2/mdeff/deepsphere/deepsphere/../checkpoints/regression INFO:tensorflow:Restoring parameters from /mnt/scratch/lts2/mdeff/deepsphere/deepsphere/../checkpoints/regression/model-500 /mnt/scratch/lts2/mdeff/deepsphere/deepsphere/../checkpoints/regression INFO:tensorflow:Restoring parameters from /mnt/scratch/lts2/mdeff/deepsphere/deepsphere/../checkpoints/regression/model-500
np.mean(np.abs(pred_test - labels_test)) / np.std(labels_test)
0.1659902090010819
def plot_prediction(param=0):
fig, ax = plt.subplots()
ax.plot(labels_train[:, param], pred_train[:, param], 'o', label='train')
ax.plot(labels_test[:, param], pred_test[:, param], 'o', label='test')
ax.plot([0, 1], [0, 1], linewidth=4, label='ground truth')
ax.set_title('Prediction of parameter {}'.format(param))
ax.legend()
plot_prediction(0)
plot_prediction(1)