import keras
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Dropout, Activation, Flatten, Input, Lambda
from keras.layers import Conv2D, MaxPooling2D, Conv1D, MaxPooling1D, LSTM, ConvLSTM2D, GRU, BatchNormalization, LocallyConnected2D, Permute
from keras.layers import Concatenate, Reshape, Softmax, Conv2DTranspose, Embedding, Multiply
from keras.callbacks import ModelCheckpoint, EarlyStopping, Callback
from keras import regularizers
from keras import backend as K
import keras.losses
import tensorflow as tf
from tensorflow.python.framework import ops
import isolearn.keras as iso
import numpy as np
import tensorflow as tf
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)
import pandas as pd
import os
import pickle
import numpy as np
import scipy.sparse as sp
import scipy.io as spio
import matplotlib.pyplot as plt
import isolearn.keras as iso
from seqprop.visualization import *
from seqprop.generator import *
from seqprop.predictor import *
from seqprop.optimizer import *
from definitions.mpradragonn_deep_factorized_model import load_saved_predictor
import warnings
warnings.simplefilter("ignore")
from keras.backend.tensorflow_backend import set_session
def contain_tf_gpu_mem_usage() :
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
set_session(sess)
contain_tf_gpu_mem_usage()
Using TensorFlow backend.
#Download MPRA-DragoNN models
#!wget https://github.com/kundajelab/MPRA-DragoNN/raw/master/kipoi/ConvModel/pretrained.hdf5
#!wget https://github.com/kundajelab/MPRA-DragoNN/raw/master/kipoi/DeepFactorizedModel/pretrained.hdf5
def get_punish_margin_conv_activity(activity_margin=1.) :
def _penalty(conv_out) :
total_conv_out = K.abs(K.sum(conv_out, axis=-2))
margin_cost = K.switch(total_conv_out < K.constant(activity_margin, shape=(1,)), K.zeros_like(total_conv_out), total_conv_out - K.constant(activity_margin, shape=(1,)))
return K.mean(margin_cost, -1)
return _penalty
#Define target isoform loss function
def get_earthmover_loss(target_output_ixs, pwm_start=0, pwm_end=70, pwm_target_bits=1.8, pwm_entropy_weight=0.0, conv_1_penalty=0.0, conv_1_margin=1.0, conv_2_penalty=0.0, conv_2_margin=1.0, conv_3_penalty=0.0, conv_3_margin=1.0) :
punish_c = 0.0
punish_g = 0.0
entropy_mse = get_margin_entropy(pwm_start=pwm_start, pwm_end=pwm_end, min_bits=pwm_target_bits)
punish_c_func = get_punish_c(pwm_start=pwm_start, pwm_end=pwm_end)
punish_g_func = get_punish_g(pwm_start=pwm_start, pwm_end=pwm_end)
pred_mask = np.zeros((1, 1, 12))
pred_mask[0, 0, target_output_ixs] = 1.
pred_mask = K.constant(pred_mask, dtype=tf.float32)
n_masked = len(target_output_ixs)
def loss_func(predictor_outputs) :
pwm_logits, pwm, sampled_pwm, pred_score = predictor_outputs
#Specify costs
#fitness_loss = -1.0 * K.mean(pred_score[..., 0], axis=0)
fitness_loss = -1.0 * K.sum(pred_score * pred_mask, axis=(0, -1)) / (n_masked * K.cast(K.shape(pred_score)[0], tf.float32))
seq_loss = 0.0
seq_loss += punish_c * K.mean(punish_c_func(sampled_pwm), axis=0)
seq_loss += punish_g * K.mean(punish_g_func(sampled_pwm), axis=0)
entropy_loss = pwm_entropy_weight * entropy_mse(pwm)
#Compute total loss
total_loss = fitness_loss + seq_loss + entropy_loss
return K.reshape(K.sum(total_loss, axis=0), (1,))
def val_loss_func(predictor_outputs) :
pwm_logits, pwm, sampled_pwm, pred_score = predictor_outputs
#Specify costs
#fitness_loss = -1.0 * K.mean(pred_score[..., 0], axis=0)
fitness_loss = -1.0 * K.sum(pred_score * pred_mask, axis=(0, -1)) / (n_masked * K.cast(K.shape(pred_score)[0], tf.float32))
seq_loss = 0.0
seq_loss += punish_c * K.mean(punish_c_func(sampled_pwm), axis=0)
seq_loss += punish_g * K.mean(punish_g_func(sampled_pwm), axis=0)
entropy_loss = pwm_entropy_weight * entropy_mse(pwm)
#Compute total loss
total_loss = fitness_loss + seq_loss + entropy_loss
return K.reshape(K.mean(total_loss, axis=0), (1,))
return loss_func, val_loss_func
def get_nop_transform() :
def _transform_func(pwm) :
return pwm
return _transform_func
class ValidationCallback(Callback):
def __init__(self, val_name, val_loss_model, val_steps) :
self.val_name = val_name
self.val_loss_model = val_loss_model
self.val_steps = val_steps
self.val_loss_history = []
#Track val loss
self.val_loss_history.append(self.val_loss_model.predict(x=None, steps=self.val_steps)[0])
def on_batch_end(self, batch, logs={}) :
#Track val loss
val_loss_value = self.val_loss_model.predict(x=None, steps=self.val_steps)[0]
self.val_loss_history.append(val_loss_value)
#Function for running SeqProp on a set of objectives to optimize
def run_seqprop(target_output_ixs_list, sequence_templates, loss_funcs, val_loss_funcs, transform_funcs, n_sequences=1, n_samples=1, n_valid_samples=1, eval_mode='sample', normalize_logits=False, n_epochs=10, steps_per_epoch=100) :
n_objectives = len(sequence_templates)
seqprop_predictors = []
valid_monitors = []
train_histories = []
valid_histories = []
for obj_ix in range(n_objectives) :
print("Optimizing objective " + str(obj_ix) + '...')
sequence_template = sequence_templates[obj_ix]
loss_func = loss_funcs[obj_ix]
val_loss_func = val_loss_funcs[obj_ix]
transform_func = transform_funcs[obj_ix]
target_output_ixs = target_output_ixs_list[obj_ix]
#Build Generator Network
_, seqprop_generator = build_generator(seq_length=len(sequence_template), n_sequences=n_sequences, n_samples=n_samples, sequence_templates=[sequence_template * n_sequences], batch_normalize_pwm=normalize_logits, pwm_transform_func=transform_func, validation_sample_mode='sample')
#for layer in seqprop_generator.layers :
# if 'policy' not in layer.name :
# layer.name += "_trainversion"
_, valid_generator = build_generator(seq_length=len(sequence_template), n_sequences=n_sequences, n_samples=n_valid_samples, sequence_templates=[sequence_template * n_sequences], batch_normalize_pwm=normalize_logits, pwm_transform_func=None, validation_sample_mode='sample', master_generator=seqprop_generator)
for layer in valid_generator.layers :
#if 'policy' not in layer.name :
layer.name += "_valversion"
#Build Predictor Network and hook it on the generator PWM output tensor
_, seqprop_predictor = build_predictor(seqprop_generator, load_saved_predictor(model_path, library_context=None), n_sequences=n_sequences, n_samples=n_samples, eval_mode=eval_mode)
#for layer in seqprop_predictor.layers :
# if '_trainversion' not in layer.name and 'policy' not in layer.name :
# layer.name += "_trainversion"
_, valid_predictor = build_predictor(valid_generator, load_saved_predictor(model_path, library_context=None), n_sequences=n_sequences, n_samples=n_valid_samples, eval_mode='sample')
for layer in valid_predictor.layers :
if '_valversion' not in layer.name :# and 'policy' not in layer.name :
layer.name += "_valversion"
#Build Loss Model (In: Generator seed, Out: Loss function)
_, loss_model = build_loss_model(seqprop_predictor, loss_func)
_, valid_loss_model = build_loss_model(valid_predictor, val_loss_func)
#Specify Optimizer to use
#opt = keras.optimizers.SGD(lr=0.5)
#opt = keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=0, nesterov=True)
opt = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)
#Compile Loss Model (Minimize self)
loss_model.compile(loss=lambda true, pred: pred, optimizer=opt)
def get_logit(p) :
return np.log(p / (1. - p))
#Specify callback entities
#measure_func = lambda pred_outs: np.mean(get_logit(np.expand_dims(pred_outs[0], axis=0) if len(pred_outs[0].shape) <= 2 else pred_outs[0]), axis=0)
measure_func = lambda pred_outs: np.expand_dims(np.mean(np.expand_dims(pred_outs[0][..., target_output_ixs], axis=0) if len(pred_outs[0].shape) <= 2 else pred_outs[0][..., target_output_ixs], axis=(0, -1)), axis=-1)
#train_monitor = FlexibleSeqPropMonitor(predictor=seqprop_predictor, plot_on_train_end=False, plot_every_epoch=False, track_every_step=True, measure_func=measure_func, measure_name='Activity', plot_pwm_start=500, plot_pwm_end=700, sequence_template=sequence_template, plot_pwm_indices=np.arange(n_sequences).tolist(), figsize=(12, 1.0))
valid_monitor = FlexibleSeqPropMonitor(predictor=valid_predictor, plot_on_train_end=True, plot_every_epoch=False, track_every_step=True, measure_func=measure_func, measure_name='Activity', plot_pwm_start=0, plot_pwm_end=145, sequence_template=sequence_template, plot_pwm_indices=np.arange(n_sequences).tolist(), figsize=(12, 1.0))
train_history = ValidationCallback('loss', loss_model, 1)
valid_history = ValidationCallback('val_loss', valid_loss_model, 1)
callbacks =[
#EarlyStopping(monitor='loss', min_delta=0.001, patience=5, verbose=0, mode='auto'),
valid_monitor,
train_history,
valid_history
]
#Fit Loss Model
_ = loss_model.fit(
[], np.ones((1, 1)), #Dummy training example
epochs=n_epochs,
steps_per_epoch=steps_per_epoch,
callbacks=callbacks
)
valid_monitor.predictor = None
train_history.val_loss_model = None
valid_history.val_loss_model = None
seqprop_predictors.append(seqprop_predictor)
valid_monitors.append(valid_monitor)
train_histories.append(train_history)
valid_histories.append(valid_history)
return seqprop_predictors, valid_monitors, train_histories, valid_histories
#Specfiy file path to pre-trained predictor network
save_dir = os.path.join(os.getcwd(), '')
model_name = 'pretrained_deep_factorized_model.hdf5'
model_path = os.path.join(save_dir, model_name)
import random
def set_seed(seed_value) :
# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
os.environ['PYTHONHASHSEED']=str(seed_value)
# 2. Set the `python` built-in pseudo-random generator at a fixed value
random.seed(seed_value)
# 3. Set the `numpy` pseudo-random generator at a fixed value
np.random.seed(seed_value)
# 4. Set the `tensorflow` pseudo-random generator at a fixed value
tf.set_random_seed(seed_value)
# 5. Configure a new global `tensorflow` session
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
seq_template = 'N' * 145
rand_seed = 14755
#Run SeqProp Optimization
print("Running optimization experiment 'MPRA-DragoNN Activity Maximization'")
#Number of PWMs to generate per objective
n_sequences = 10
#Number of One-hot sequences to sample from the PWM at each grad step
n_samples = 1
#Number of epochs per objective to optimize
n_epochs = 1
#Number of steps (grad updates) per epoch
steps_per_epoch = 200
#Number of One-hot validation sequences to sample from the PWM
n_valid_samples = 10
experiment_name_list = ['PWM', 'PWM-IN', 'Sampled', 'Sampled-IN']
eval_mode_list = ['pwm', 'pwm', 'sample', 'sample']
normalize_logits_list = [False, True, False, True]
result_dict = {
'PWM' : {},
'PWM-In' : {},
'Sampled' : {},
'Sampled-In' : {}
}
for experiment_name, eval_mode, normalize_logits in zip(experiment_name_list, eval_mode_list, normalize_logits_list) :
print("Experiment name = " + str(experiment_name))
print("Eval mode = " + str(eval_mode))
print("Normalize logits = " + str(normalize_logits))
K.clear_session()
set_seed(rand_seed)
target_output_ixs = [
[5]
]
sequence_templates = [
seq_template
]
losses, val_losses = zip(*[
get_earthmover_loss(
target_output_ixs[0],
pwm_start=0,
pwm_end=145,
pwm_target_bits=1.8,
pwm_entropy_weight=0.0
)
])
transforms = [
None
]
seqprop_predictors, valid_monitors, train_histories, valid_histories = run_seqprop(target_output_ixs, sequence_templates, losses, val_losses, transforms, n_sequences, n_samples, n_valid_samples, eval_mode, normalize_logits, n_epochs, steps_per_epoch)
seqprop_predictor, valid_monitor, train_history, valid_history = seqprop_predictors[0], valid_monitors[0], train_histories[0], valid_histories[0]
#Retrieve optimized PWMs and predicted cleavage distributionns
_, optimized_pwm, _, _ = seqprop_predictor.predict(x=None, steps=1)
consensus_seqs = []
for i in range(optimized_pwm.shape[0]) :
consensus_seq = ''
for j in range(optimized_pwm.shape[1]) :
max_nt_ix = np.argmax(optimized_pwm[i, j, :, 0])
if max_nt_ix == 0 :
consensus_seq += 'A'
elif max_nt_ix == 1 :
consensus_seq += 'C'
elif max_nt_ix == 2 :
consensus_seq += 'G'
elif max_nt_ix == 3 :
consensus_seq += 'T'
consensus_seqs.append(consensus_seq)
result_dict[experiment_name] = {
'seqprop_predictor' : seqprop_predictor,
'valid_monitor' : valid_monitor,
'train_history' : train_history,
'valid_history' : valid_history,
'consensus_seqs' : consensus_seqs
}
Running optimization experiment 'MPRA-DragoNN Activity Maximization' Experiment name = PWM Eval mode = pwm Normalize logits = False Optimizing objective 0... Epoch 1/1 200/200 [==============================] - 12s 60ms/step - loss: -24.5096
Experiment name = PWM-IN Eval mode = pwm Normalize logits = True Optimizing objective 0... Epoch 1/1 200/200 [==============================] - 13s 63ms/step - loss: -34.4220
Experiment name = Sampled Eval mode = sample Normalize logits = False Optimizing objective 0... Epoch 1/1 200/200 [==============================] - 12s 61ms/step - loss: 0.2476
Experiment name = Sampled-IN Eval mode = sample Normalize logits = True Optimizing objective 0... Epoch 1/1 200/200 [==============================] - 12s 62ms/step - loss: -2.3397
save_figs = True
fig_prefix = "eval_seqprop_mpradragonn_k562_sv40_earthmover_experiment_200_updates_"
for experiment_ix, experiment_name in enumerate(experiment_name_list) :
print("Experiment name = " + str(experiment_name))
seqprop_predictor = result_dict[experiment_name]['seqprop_predictor']
valid_monitor = result_dict[experiment_name]['valid_monitor']
train_history = result_dict[experiment_name]['train_history']
valid_history = result_dict[experiment_name]['valid_history']
consensus_seqs = result_dict[experiment_name]['consensus_seqs']
#Store statistics for optimized sequences
fig_name = fig_prefix + experiment_name + "_" if save_figs else None
valid_monitor.plot_metrics_and_pwm(fig_name=fig_name)
f = plt.figure(figsize=(6, 4))
l1 = plt.plot(np.arange(n_epochs * steps_per_epoch + 1), np.array(train_history.val_loss_history) / n_sequences, color='darkgreen', linewidth=2, linestyle='-', label='Train')
l2 = plt.plot(np.arange(n_epochs * steps_per_epoch + 1), np.array(valid_history.val_loss_history), color='darkorange', linewidth=2, linestyle='--', label='Valid')
plt.xlabel("Weight Updates", fontsize=16)
plt.ylabel("Loss", fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlim(0, n_epochs * steps_per_epoch)
plt.ylim(min(np.min(train_history.val_loss_history) / n_sequences, np.min(valid_history.val_loss_history)), max(np.max(train_history.val_loss_history) / n_sequences, np.max(valid_history.val_loss_history)))
plt.legend(handles=[l1[0], l2[0]], fontsize=14)
plt.tight_layout()
if save_figs :
plt.savefig(fig_name + '_loss.png', transparent=True, dpi=150)
plt.savefig(fig_name + '_loss.svg')
plt.savefig(fig_name + '_loss.eps')
plt.show()
for i in range(len(consensus_seqs)) :
consensus_seq = consensus_seqs[i]
print(">consensus_sequence_" + str(i))
print(consensus_seq)
print("--- Comparison of loss convergence ---")
for history_prefix in ['train', 'valid'] :
loss_normalizer = n_sequences if history_prefix == 'train' else 1.
y_label_prefix = 'Train' if history_prefix == 'train' else 'Validation'
f = plt.figure(figsize=(6, 4))
ls = []
min_y_val = -1.0
max_y_val = 0.1
for experiment_ix, experiment_name in enumerate(experiment_name_list) :
curr_history = result_dict[experiment_name][history_prefix + '_history']
l1 = plt.plot(np.arange(n_epochs * steps_per_epoch + 1), np.array(curr_history.val_loss_history) / loss_normalizer, linewidth=2, linestyle='-', label=experiment_name)
ls.append(l1[0])
#max_y_val = max(max_y_val, np.max(curr_history.val_loss_history) / loss_normalizer)
plt.xlabel("Weight Updates", fontsize=16)
plt.ylabel(y_label_prefix + " Loss", fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlim(0, n_epochs * steps_per_epoch)
plt.ylim(min_y_val, max_y_val)
plt.legend(handles=ls, fontsize=14)
plt.tight_layout()
if save_figs :
plt.savefig(fig_prefix + history_prefix + '_loss_cmp.png', transparent=True, dpi=150)
plt.savefig(fig_prefix + history_prefix + '_loss_cmp.svg')
plt.savefig(fig_prefix + history_prefix + '_loss_cmp.eps')
plt.show()
print("--- Comparison of activity convergence ---")
f = plt.figure(figsize=(6, 4))
ls = []
min_y_val = -0.1
max_y_val = 1.0
for experiment_ix, experiment_name in enumerate(experiment_name_list) :
curr_monitor = result_dict[experiment_name]['valid_monitor']
meas_history = curr_monitor.measure_history
meas_history = [np.mean(meas_history[k]) for k in range(len(meas_history))]
l1 = plt.plot(np.arange(n_epochs * steps_per_epoch + 1), np.array(meas_history), linewidth=2, linestyle='-', label=experiment_name)
ls.append(l1[0])
#max_y_val = max(max_y_val, np.max(meas_history))
plt.xlabel("Weight Updates", fontsize=16)
plt.ylabel("Validation Activity Score", fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlim(0, n_epochs * steps_per_epoch)
plt.ylim(min_y_val, max_y_val)
plt.legend(handles=ls, fontsize=14)
plt.tight_layout()
if save_figs :
plt.savefig(fig_prefix + '_valid_logodds_cmp.png', transparent=True, dpi=150)
plt.savefig(fig_prefix + '_valid_logodds_cmp.svg')
plt.savefig(fig_prefix + '_valid_logodds_cmp.eps')
plt.show()
Experiment name = PWM
>consensus_sequence_0 TCTTCCTCCCCCCCCCCCCCCCCCTCGAACCCCCCCCCCCCCAACTCCCCGTGCATTTTCCGAACCATAGGAGCGTAGGTCTTTCAACACTGCGCAGAGTGGTTTCCCCCTCCCCCCCCCCCCCCCCCCCCCCCCCCCGCCGGGA >consensus_sequence_1 GCTTCCTCCCCCCCCCCCCCCCCCCTTAGTCCCCCCCCCCCCCCCCCCCCCCCACTCCTTGCGCATTTGCGGTGCAGCGGTTGTTATTTCTGCGTATGGGGTCCCCCTTTTTTTTCCCCCCCCCCCCCCCCCCCCCTCCGCCAGA >consensus_sequence_2 TCCTCTTCCCTCGGTTTTCCCCCCCCCCCCCCCCCCCCGAACCCCCCACCCCCTTCCCGTGCGTTTGATGGGCGGTGGGTTTCTTTCTTTGCGGGTTTGCGGTTTTTTTTTTCCCCCCCCCCCCCCCCCCCCCCCCCCCGCGCGA >consensus_sequence_3 GCCTCTCCCCCCCCTCCCCCCCCCCTTAGTCCCCCCCCCCCCCCCCCCCCCCCCCCCCTGCGCATTAGTTACATGAATGTGTTTCTTTAAATACGGCGGGTGTTTTCTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGCGGGA >consensus_sequence_4 GCCTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCTTCCCGTTCGTTCAGTTTAGATAGACGGGTTCCTTTAAAACTTGGGCGTGTTCCCTCTTCTCCCCCCCCCCCCCCCCCCCCCCTTCCCCGAGGGA >consensus_sequence_5 GCCTCCTCCCCCCCTCCCCCCCCCAATTTTTCCCCCCCCCCCCCCCCCCCCCACCCCCATTGGCATTTGGTAGCAGATGGTTCTCAACCCCTGGCTTAGCTCCTTGGTGTTTCCCCCCCCCCCCCCCCCCCTCCCCCCCGCCGGA >consensus_sequence_6 TCCTCCTCCCCCCCCCCCCCCCCCCTTCTCCCCCCCCCCCCCCCCCCCAATGGGAATTCGGTGGAGTGGCCTCTAAAACAATGCGCTTGGTAGTCCCTGGGGGGTTCTTTTCCCCCCCCCCCCCCCCCCCCCCCCCCTCGCCCGA >consensus_sequence_7 GCCTCTCCCCCCCCTTCCCCCCTGGGGTGTTCTTCCCATGGGTATCCCCCCCCCTTCCCGTTTAGTTGGGTGGGGGGTTTGTGTGCTCTGTGGTTTTCCCCCCCTTACGTTTGTTCCCCCCCCCCCCCCCCCCCCCCCCGCCGGA >consensus_sequence_8 TCCTCCTCCCCCCCCCCCCCCCCCCGTTCTCCCCCCCCCCCCCCCCCCATCCCACCCCTTGCGCAGTTGGTCGGTAGGTGGTTCTTCATTTTCGTCGGGGTTCCCCCTCCCCTTTCTCCCCCCCCCCCCCCCCCCCCCCCCGCGA >consensus_sequence_9 GCCTCCCCCCCCCCTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCTTGCGTTTGGGAGTTGCTCGGTGGGGTTTCCCAATTATTCGGTGGGGTGTTCCCTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGGA Experiment name = PWM-IN
>consensus_sequence_0 TGGAGCGCCACCGCTACATGCTGGACAGGAATGCACCAATCGTCATGCCGCTGCGTCGTCGCATCACGTCAATGATCGGGAGGACAGCGTGATGCAGAGTCTAAGCCGTGCGCTTGCATCAGCCTCCGCATGCCCCCGCCTCCCG >consensus_sequence_1 TAACACGCCCCCGCAAGGGGTGAACTATCGTTGCGTAATGAATGGAGCTCCCCCCCCAAAAACCTAGTTCGCGTGCGACACGCTCACGAACCCGTATCCAACGATGCCCCGCTACATCATCCCCCCCCCCCCCCCCCGCGGGGAG >consensus_sequence_2 TAACATACCCCCGCGCGGAAGGTAACCATCAAGTCGTCGGTCCCCATCCACCACCACCACTGCACATAAGCGCGAGCGCGGAGCGATCCCGAACAGATGGCACGTCGCCCCTTCTGCAATATGCCCCCCCCCCCCCGCCGCGGGC >consensus_sequence_3 CGACATGACGTGACGCGACCAGGGATGCGACATCGATGAGTCTCAGAACCCCCCCCCCGAAACATGACGCAACATCAAGTCAGACGTCCTGCAACTGTCCATCGTGAGAAGTCTCGCGAAGCGTCCCCCCTCCCGGCCAACCCCG >consensus_sequence_4 TAAGACGCAACATCTGCGAATGTGCAGCAGAAGTACTCTCGTCGCGACTCCTAAACCACCACAAAAACACCCCCGCGCAGATCCAGCGTTTTTCGTCCTGTGGCGCAACTTCCATCATGGGCTCGTCGCACGCCTGCGCATGCGC >consensus_sequence_5 TAAAAAAGATACGCAAATAGTTAGTATTTTCGCTCCCCTCCACCCCAAAACCGTTCCAGCGCCAAGAAAATCGTAAGGCGGAGCGAACGCACGCCTTTCGTCGCTCCCGCCTCTATGATCCACCCCCCCCCCCCCCGGGGGGCGG >consensus_sequence_6 GTCATGTCCCGTCACATGGACTCACTACCGATCGAACTCATCGACATGTCGCGTGCGTCACGTCCCCTGCATCATCGTGATGGGGCAATGAGCTCGCTGCGGAAAGGCGGTACTGCCGCATGTCCGCAACACCGGGCGCGCCAGT >consensus_sequence_7 TTACGTCATCACGCATGCTGAAACCTTAAGGAAAGGCGAGGTCAGACCGCGTTGCGTCAGATCGCACGCCCGCGCCTTAGTGGGATTGCATCGACGTTTATCGAGTCCGCATGCGTCATCCTCCCGCCCCTCCGCGCCACTCCCG >consensus_sequence_8 TGACTCATGACGTCATCTTGCAGACGGTTCGCCACTCCAAACAACCCCGACTTCCATCGAGTTAAATGCTTCGGGACATGCCTGAGCGGTATCGTTGGAAACGGAGTCCGCCTGCTGACGACTACGCCTCCGGCCCCGGACGCGC >consensus_sequence_9 TGACACAACACGCTTCAAGCGAGGACGTCACCTCCACCTGTCGTGACATCACGGGTGCGTTAGCACTCCGCAGTCATTAGACGTGTGCTCATGACTTCCCTGAAGCGGCGCGCTGCGAGATGCAAGCCTCCGCCCGCCCGCGCGC Experiment name = Sampled
>consensus_sequence_0 GGGAAAGTCTGATTCCCCAGTATTAAACCATCAAACCAACTGGAAAACCTATGGCTCGTTGCTCCTAAATTAACATTAAGAGTTCATCATTATACATAATCCGAGATGAACGGAAACACATAACTGAATCTTGAAGACGTATTAC >consensus_sequence_1 GGGGCTTCAAGGACCGATTGCGTACTGTCGGGACACCACCAATGGACCTACAGACCAAACATACTACTTCCCAGGATTAATGATCACTAACCCAATAGACCCCTTTCATTAGGTTTGGTTTCCGGCGTTCGTGATTTACCGTTAA >consensus_sequence_2 CGCCGTAAATAACCTAACCTGCTCACCGATATCCCCTAGGAGGCCCTCAAAGAACACCATATGGTAAAATCGATTACTCATATAAACTGAGAGGATGTGGGAAGTCCAAATCTCTTTTATATGAGCCAATATTCCACTTTCGCCA >consensus_sequence_3 GGTAGACGGTTCACCAATCATCGGAGCTATCTGCCAATTGAGTTGCTACCCAAGCGGTTAAGAACCATTTACCCTCAAGTAGGCCTGAAAATAAATGGCCTTCGTCACACTGGCCGCTAAGGTTTATCCTAACATATCCATTCAC >consensus_sequence_4 TGCCATCATAAGGGAAGAGTTGAATAGATGAAGACTCCACGTCGTTGCTCACGACCTCCGGTAAAATCTGGGGCTAGGAATACCATAAACGTTTGTCATGTCGCGATAAACTTAGTAGTGGGTCGTCTGAAATTTTAATTATCAC >consensus_sequence_5 GGCATACAATGTTATTATCTTTACCATAAGCTGTTTCAGCAACGCTATACGACTACCAATTAGAAGGTCGTATCTAGTCACAGGCGATGCCTTGGTTAACATGGACGAGCCTATGAACGATGTCCAAACCGATAAGTGGGTTGGA >consensus_sequence_6 GTGGGTACTATCCCGGTCTCACCACTCTCCTTAGATCTAATATATAATTTTGATGTTGACCCTCATCATTTGCTTACCGTTTTCAGAATGTATACAGTAGAAGAGACAGTAAATGTTGTATGACCGGAATGGTTTGTATTTTACC >consensus_sequence_7 TTTGAACTTATCAATCGTTTGTTTGTTCAGTCATGGCTGATTCACCACTTCACTCAACACGTCTGCTACACACGGGTTAATGAGCTCCCAAGGGCTTTGTTGTTTTTTGGTACCGTCATCTGCGTTTTGTGTCAATCCATCACGA >consensus_sequence_8 TGGAGACATAGTAAGGCTTGCAGAAGTTAGCTCTACTCAAACAGCTCACGAACCGACATATCACAGCGTTTGGGGAGTGCACTGTGTAGTAATACAGGTATCAGTATAAGCGATGGGGTTAAAAACTGTAAAGCTGCCGATACCC >consensus_sequence_9 GGCAGCCTGAGGCATGAAAATGAATCTTCCCTTCGGCCTGCGGATTAATAACAGTTCGTTAAACACACGGTTGGCATACGACCTAAACTCGGCAGTCGATTCCGTACAATGGAAAAGTTAGACATGCGTACGCTTTTGGTTTGAC Experiment name = Sampled-IN
>consensus_sequence_0 TGGTCGGTTAGAGTAACATATTCTTCTCTTATCGTCACGTAGGCCTTGCTCTTACGTAATGTTGTGATATTATCATAAGATTATAATGGCTACACATATCATCAAATGACGTGATATTAATAACGGCTTAAGAGAGGCTACGCCA >consensus_sequence_1 CGGAAATTAAAGAGAGCGTACGGCCGGAAGTGACGTCAACCATCGGCCTGCACCCTAATCACGGACATGACGTAAGTGTTTTATAACCGACTATCAAGCATACTGACGTCATATTGTCGTTGTGGTGTTCGTCATATAATGTAAC >consensus_sequence_2 GGCAGTGACGTAAGGCCTAAGCAACCCGATATGTTGGGTCTCGGTATGACGTAACCCATCGTGACTCATGCGCCTACGCTTAACTAACGCGATAGCAAGAGATGACGCCATTTGTTCGGAATGACGCAATCCTCTCTTAAGGGCC >consensus_sequence_3 TAGACTATAGGATGACGTCATCTAATGTAACCCGCCCTTGCCGTATGAATGACCCAACCACACATAACTGATGACGAATGATGACGTGACATAAGTTCGCTTCTGGCTACTTGCCGCAAACGGTGACGTCACCCTACCCTTTGGG >consensus_sequence_4 GTCGGACTCAACAGAACGGCTGACGTCATGCCGCCATGACCTCACGCGACATAAAGTGACGGCATATCACGACATATCGGGTGCGACAAGCTACGTCATATGACGTCACTTCAAGTGGGGTTACGCTTCCGGTTTGAGGTCTCAC >consensus_sequence_5 GGGTTACGTTATTACGTGATCTCGCCCTACGGGTGTCGCGAAACCCATCACGGGACTGAACGTGATTTCGTATCCAGTATGACGTAATACCGGACTTAAGGAACTCGCGCGGACAAAAGCGGTCTTTACGATTAATTATGTCAAC >consensus_sequence_6 GTGAGTCACGCAATGGTATGCCCTCACGCCCTCTAACTGACGCAATTGGACGACGATTGACCTCCGTTTCATGACAGAGCTTGCGGAATGAACTTACGCCAGAAAGCCAGATAGGCCATATGACGTAATAAGCCCTTATGGTGAC >consensus_sequence_7 GTTTTGTGACATGAATGTATGTTGGGTCATGTAACGTAACCTCTGACCTACGCCATATCCCCCCGCCCAAAGCAGGTTATGACGCAACTCACGTGACGTAATGAGAACGCTGACGTCATCTTGGGGTGGTTAGACGTCATGTCAG >consensus_sequence_8 CGGCATGACCTAAAGTCAACATGACGTAAGGTCTCGCGCAATAGTTCGGGCAACCGCCCACGCCGTTATCTCGGCCGAACATTGATTATGACTGACGGTTACGTCATACGGAAGTTAGGGGAGACGTCTCGATCTTCAGATTGGC >consensus_sequence_9 GCCGGCCTTTCGACTATAAAACAGGACTTGACGGCGCCAACCGACGTCATACGGGCCGTAATGCAAACCGGCGCCTTATGACGTACTTTCGATAGTCGGTTGGCCTTACGGGAGACATGACGTAAGCCTGACGTTATTGCGTCAC --- Comparison of loss convergence ---
--- Comparison of activity convergence ---
seq_template = 'N' * 145
rand_seed = 14755
#Run SeqProp Optimization
print("Running optimization experiment 'MPRA-DragoNN Activity Maximization'")
#Number of PWMs to generate per objective
n_sequences = 10
#Number of One-hot sequences to sample from the PWM at each grad step
n_samples = 1
#Number of epochs per objective to optimize
n_epochs = 1
#Number of steps (grad updates) per epoch
steps_per_epoch = 2000
#Number of One-hot validation sequences to sample from the PWM
n_valid_samples = 10
experiment_name_list = ['PWM', 'PWM-IN', 'Sampled', 'Sampled-IN']
eval_mode_list = ['pwm', 'pwm', 'sample', 'sample']
normalize_logits_list = [False, True, False, True]
result_dict = {
'PWM' : {},
'PWM-In' : {},
'Sampled' : {},
'Sampled-In' : {}
}
for experiment_name, eval_mode, normalize_logits in zip(experiment_name_list, eval_mode_list, normalize_logits_list) :
print("Experiment name = " + str(experiment_name))
print("Eval mode = " + str(eval_mode))
print("Normalize logits = " + str(normalize_logits))
K.clear_session()
set_seed(rand_seed)
target_output_ixs = [
[5]
]
sequence_templates = [
seq_template
]
losses, val_losses = zip(*[
get_earthmover_loss(
target_output_ixs[0],
pwm_start=0,
pwm_end=145,
pwm_target_bits=1.8,
pwm_entropy_weight=0.0
)
])
transforms = [
None
]
seqprop_predictors, valid_monitors, train_histories, valid_histories = run_seqprop(target_output_ixs, sequence_templates, losses, val_losses, transforms, n_sequences, n_samples, n_valid_samples, eval_mode, normalize_logits, n_epochs, steps_per_epoch)
seqprop_predictor, valid_monitor, train_history, valid_history = seqprop_predictors[0], valid_monitors[0], train_histories[0], valid_histories[0]
#Retrieve optimized PWMs and predicted cleavage distributionns
_, optimized_pwm, _, _ = seqprop_predictor.predict(x=None, steps=1)
consensus_seqs = []
for i in range(optimized_pwm.shape[0]) :
consensus_seq = ''
for j in range(optimized_pwm.shape[1]) :
max_nt_ix = np.argmax(optimized_pwm[i, j, :, 0])
if max_nt_ix == 0 :
consensus_seq += 'A'
elif max_nt_ix == 1 :
consensus_seq += 'C'
elif max_nt_ix == 2 :
consensus_seq += 'G'
elif max_nt_ix == 3 :
consensus_seq += 'T'
consensus_seqs.append(consensus_seq)
result_dict[experiment_name] = {
'seqprop_predictor' : seqprop_predictor,
'valid_monitor' : valid_monitor,
'train_history' : train_history,
'valid_history' : valid_history,
'consensus_seqs' : consensus_seqs
}
Running optimization experiment 'MPRA-DragoNN Activity Maximization' Experiment name = PWM Eval mode = pwm Normalize logits = False Optimizing objective 0... Epoch 1/1 2000/2000 [==============================] - 115s 57ms/step - loss: -30.1295
Experiment name = PWM-IN Eval mode = pwm Normalize logits = True Optimizing objective 0... Epoch 1/1 2000/2000 [==============================] - 117s 58ms/step - loss: -62.9490
Experiment name = Sampled Eval mode = sample Normalize logits = False Optimizing objective 0... Epoch 1/1 2000/2000 [==============================] - 114s 57ms/step - loss: 0.1781
Experiment name = Sampled-IN Eval mode = sample Normalize logits = True Optimizing objective 0... Epoch 1/1 2000/2000 [==============================] - 116s 58ms/step - loss: -25.5014
save_figs = True
fig_prefix = "eval_seqprop_mpradragonn_k562_sv40_earthmover_experiment_2000_updates_"
for experiment_ix, experiment_name in enumerate(experiment_name_list) :
print("Experiment name = " + str(experiment_name))
seqprop_predictor = result_dict[experiment_name]['seqprop_predictor']
valid_monitor = result_dict[experiment_name]['valid_monitor']
train_history = result_dict[experiment_name]['train_history']
valid_history = result_dict[experiment_name]['valid_history']
consensus_seqs = result_dict[experiment_name]['consensus_seqs']
#Store statistics for optimized sequences
fig_name = fig_prefix + experiment_name + "_" if save_figs else None
valid_monitor.plot_metrics_and_pwm(fig_name=fig_name)
f = plt.figure(figsize=(6, 4))
l1 = plt.plot(np.arange(n_epochs * steps_per_epoch + 1), np.array(train_history.val_loss_history) / n_sequences, color='darkgreen', linewidth=2, linestyle='-', label='Train')
l2 = plt.plot(np.arange(n_epochs * steps_per_epoch + 1), np.array(valid_history.val_loss_history), color='darkorange', linewidth=2, linestyle='--', label='Valid')
plt.xlabel("Weight Updates", fontsize=16)
plt.ylabel("Loss", fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlim(0, n_epochs * steps_per_epoch)
plt.ylim(min(np.min(train_history.val_loss_history) / n_sequences, np.min(valid_history.val_loss_history)), max(np.max(train_history.val_loss_history) / n_sequences, np.max(valid_history.val_loss_history)))
plt.legend(handles=[l1[0], l2[0]], fontsize=14)
plt.tight_layout()
if save_figs :
plt.savefig(fig_name + '_loss.png', transparent=True, dpi=150)
plt.savefig(fig_name + '_loss.svg')
plt.savefig(fig_name + '_loss.eps')
plt.show()
for i in range(len(consensus_seqs)) :
consensus_seq = consensus_seqs[i]
print(">consensus_sequence_" + str(i))
print(consensus_seq)
print("--- Comparison of loss convergence ---")
for history_prefix in ['train', 'valid'] :
loss_normalizer = n_sequences if history_prefix == 'train' else 1.
y_label_prefix = 'Train' if history_prefix == 'train' else 'Validation'
f = plt.figure(figsize=(6, 4))
ls = []
min_y_val = -3.5
max_y_val = 0.1
for experiment_ix, experiment_name in enumerate(experiment_name_list) :
curr_history = result_dict[experiment_name][history_prefix + '_history']
l1 = plt.plot(np.arange(n_epochs * steps_per_epoch + 1), np.array(curr_history.val_loss_history) / loss_normalizer, linewidth=2, linestyle='-', label=experiment_name)
ls.append(l1[0])
#max_y_val = max(max_y_val, np.max(curr_history.val_loss_history) / loss_normalizer)
plt.xlabel("Weight Updates", fontsize=16)
plt.ylabel(y_label_prefix + " Loss", fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlim(0, n_epochs * steps_per_epoch)
plt.ylim(min_y_val, max_y_val)
plt.legend(handles=ls, fontsize=14)
plt.tight_layout()
if save_figs :
plt.savefig(fig_prefix + history_prefix + '_loss_cmp.png', transparent=True, dpi=150)
plt.savefig(fig_prefix + history_prefix + '_loss_cmp.svg')
plt.savefig(fig_prefix + history_prefix + '_loss_cmp.eps')
plt.show()
print("--- Comparison of activity convergence ---")
f = plt.figure(figsize=(6, 4))
ls = []
min_y_val = -0.1
max_y_val = 3.5
for experiment_ix, experiment_name in enumerate(experiment_name_list) :
curr_monitor = result_dict[experiment_name]['valid_monitor']
meas_history = curr_monitor.measure_history
meas_history = [np.mean(meas_history[k]) for k in range(len(meas_history))]
l1 = plt.plot(np.arange(n_epochs * steps_per_epoch + 1), np.array(meas_history), linewidth=2, linestyle='-', label=experiment_name)
ls.append(l1[0])
#max_y_val = max(max_y_val, np.max(meas_history))
plt.xlabel("Weight Updates", fontsize=16)
plt.ylabel("Validation Activity Score", fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlim(0, n_epochs * steps_per_epoch)
plt.ylim(min_y_val, max_y_val)
plt.legend(handles=ls, fontsize=14)
plt.tight_layout()
if save_figs :
plt.savefig(fig_prefix + '_valid_logodds_cmp.png', transparent=True, dpi=150)
plt.savefig(fig_prefix + '_valid_logodds_cmp.svg')
plt.savefig(fig_prefix + '_valid_logodds_cmp.eps')
plt.show()
Experiment name = PWM
>consensus_sequence_0 TCCCCTCCCCGGGCCCCCCCCCCCCGCAACCCCCCCCCCCCCCCCCCCCCCGGCGTTTCCCCTTCCTCAGGCCTTTTCTTTTTTTCTTTCCCCCCCGCGGGTTCCCCCCCCCCCCCCTCCCCCCCCCCCCCCCCCCCCCCCGGGA >consensus_sequence_1 TCCCCTTCCCGGGCCCCCCCCCCCCGGGGTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGCGGATGCGCGTCAGTCTTTTTTCTTCCCCCCCGGGGGTTCCCCCCCCCCGTCTCTTTTCCCCCCCCCCCCCCCCCCGCCCGA >consensus_sequence_2 TCCCCTTCCCGGGGCCCCTCCCCCCCCCCCCCCCCCCCCCATCCCCCCCCCCCCCCCCGCGCGGACGTTCGGGGTTTTTTTTTTTTTTTCCCGCCGGTCCCCTTCTCCCCCCGTGTTTTTCCCCCCCCCCCCCCCCCGCGCCGGG >consensus_sequence_3 GCCTCTTCCCCCCCCCCCCCCCCGCCGGGTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCGCGCGTTCCCTTCCTCCTTTTTTTTTTTTTCCCCCCGCGCGTTTCTCCCCCCCCCCTCTCCCCCCCCCCCCCCCCCCCGCGCCCGA >consensus_sequence_4 CCCTCTCCCGGGGGGCCCCCCCCCCCCCCCCCGGGGAATTCCCCCCCCCCCCCCCGGGCCTGCCGACCCCTTCGACTGTTTCTCTTTCCCCCCCCCCGGGTCTCCCCCCGCCGGTTTTCCCCCCCCCCCCCCCCCCCGCGCCCGG >consensus_sequence_5 GCCTCTTCCCGGGCCTCCCCCCCCCGGGTTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGCAATTCGTTTCGTCCGTTTTTTCCCTCCCCTCCGGCGCCTCCCCCCCCGTCTTCCCCCCCCCCCCCCCCCCCCCCGCGCCCGA >consensus_sequence_6 TCCTCTCCCCCCCCCCCCCCCCCCCGCGCCCCCCCCCCCCCCCCCCCCCGGGGGAACCCCGGCCCTTTCCCCTTTCCCTCCCCCCCCTGGTTCCCCCCCCCGGTTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGCGCCCGA >consensus_sequence_7 TCCCCCCCGCGCCTCCCCCCCGCCCGTTTTTCCCCCCCTCCGTTTCCCCCCCCCCCCCCGCGCGTGCACGCCGGGCCTTTTTTTGTTTTTTGTTTTTCCCCCCCCCCCGCGTGTTCCCCCCCCCCCCCCCCCCCCCCGCGCCCGA >consensus_sequence_8 GCCTCTTCCGGGCCCCCCCCCCCCCGGCGTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGCGTGACTCGTGAGTTTTTTCTCTTTCCCCGCCCGGGTTTCCCCCCCCCGCCCATCTCCCCCCCCCCCCCCCCCGCGCCCGA >consensus_sequence_9 CCCTCTTCCCGGGCCCCCCCCCCCCCCCCCCCCCGGCGCTTTTCCCCCCCCCCCCGCGGGTGCCCGTCCCTCCGTGGCCTTTTCCCCTTCCCCCCCGCCGCTTCCCCCTCCCGGATCCCCCCCCCCCCCCCCCCCCCGCGCCCGG Experiment name = PWM-IN
>consensus_sequence_0 TGACGCATGATGTCGGGGGTGACTCCAGCTGTGCACCAATCGTCACGCCACTTCCGGATGTCACGTCCCGTATGATCAGGAGGAGTGCGTCGTCCAGAGTCTAAGACGTGCGGATGCATCAGCCTCCGCCTGCCCAAGCATGCTC >consensus_sequence_1 GGACGTCACACATGTCCGGGTGTACTGACGTAGCGAGGTGCATGTAGCTCCGCCCCCTGATGGTAAGATCCCGTGCATCAGGGTGACGATCCCTAATGCATGCGTGCCTCGTTACATCATTCCCCTCCCCCCCCCCCGGCGGGAG >consensus_sequence_2 CGACATGTCCTCTCGCGAGATTTGGGCAACATGTCGTAGGTCTCGTCGCACCTACGCAACACCTCTTCCGGGACATGCCGGAGCGATCCGGAACTCATCGAACGCCGGAACTGACGCAATATGCCCCCCCCCTCCCGCCGCGGGC >consensus_sequence_3 GGAAATGACGTATCGCGAGCACGCATGATTCATCCATGGGACTCAGCACGACCGTGACGAAGCATGCCGCATCATCAAGTCTGACATCCTGCGACTGCCCATGGTCACAAGTCTCGCGATATGTCCCCCCTCCCCGCCCCGCCCG >consensus_sequence_4 TATGATGCAACATGTCCGCGACTACAGCAGAAGCACTCTCGACCCGACCCTGCCGGAAGGACTTCCGCACCTCCGCGCGGATCCAGCGTTTGTCGACCTGAGTCGTCACTTGACTCATCAAGTCGTCGCAAGCTTGCGCATGCGC >consensus_sequence_5 TGCGCATGCTGATGCAATATTAAGGAATCTGAGTGCGTCGTCCCCCCCTGCTGTTCCCGTGCACTCAACATGTCAGGGGGGAACGCACGCACGCGTTACCTCCCGCCCCGTTGCATCATGGAACCCCCCCCCCCCGGGGGGGGGG >consensus_sequence_6 GTCACGTCGGGTCAGGCGGACACATTTCCCATGGACCTCAACGACATGTCTCGTACGTCACGTCCCCTGCATGATCGACATGTGACATTGACGTCGCTGTCCCTACGCGGTCCTGCCGCATGTCCGCAACACCGCGCGCGCCACT >consensus_sequence_7 TTACATCATGACGCTTGCGACGTTGCTCAGGAATGGCGAGGACTCACGCCGTTGCATCATGTCGCACGCCCGCGCCTTAATGGGCTTGCATCGACGTTTTACGGAACCGGATGCGTCATCCTCCCGTCCCGTCGCGCCGGATCCG >consensus_sequence_8 GGACGCGTGACGTCGTCAGGACGGACGGAATGCACCTCGCTCTCCCCCCCCTTGCGTCGACTCAGATGCTTCGGGACATGCCTGAGCGGTATCGTTGGATATGGCGTACGCATCCTGACGACTTCGCCGCATGCCCGCCACGCGC >consensus_sequence_9 TAAAACAGGACGTCTCATGCGTTGTCGTCACGACCAAGTGTCGACCCGTCGCGTGTGGCGATGATCTCCGCATTCATTGGACGCATGCTCAAGACGTCCCTGCAGGCGGATGTTCCGAAATGCAAGCCTCCGCACGCCCGCGGGA Experiment name = Sampled
>consensus_sequence_0 TGGAATAATTGGGACACAATAATTACACCACTAAACCAATTAACACTATCATAGCTCGATAATAACTAATTTACTTTTATAATTCGTGGTTGAAAATTTTTGGATACGATGGTTTATAAGTATTGACATTAACTTTAAATTTCCC >consensus_sequence_1 TGGACTTTAAAGACCAAATTAAGGCTCAAAGCCAATAACCTATTGAAATCCCGACACTATAATAAATAACAATCCATCATTAATCATAAATATCTATATATTGATTGAACGTTTAGCGATTACTATCTTCATCATTAAAACTCCA >consensus_sequence_2 TGCGATATATAAGGCCATATAATTTTGTACAAACCATACACTTATGAAACTTATTAATACATCTACTAATTTTATCAATATAATTATCGAAAAGGTTAATATAATACTAATTTGTGGTATAAAATGCTTTAATTTTATTATTCCA >consensus_sequence_3 TGGAATAATTATTTCAACATAAGCAAATACCTACAAAACCTGTCAACTCTACTACATTTAATCATTTGTTAAACTTAATTTGATATTAATTTAAATTGTATTTATAAAATTGGTTTACGTGATATGTTATAATATTATTATTCCA >consensus_sequence_4 TGGAATAATATTTAATAAAAAAAATTATTACTTACCCCACTTGTTGGCCCTCATAATCAACTAAAATATTGACAAACGTAGCAAATTTGTGTTTAAGATTACCAAATTTGTGTTTTAGTTATTTTTTTTAAAATTTATTATTCCC >consensus_sequence_5 TGCAATAAATAATATTTATATAATCATAATCTACTCTACCAACTATATAATCTTACTCGTTATAATTTCATATTTATACAATTGCGTTAAATAAATTAATATCTATTTGTATATGATAAATATAATTTTTAACATTTTTATTCCA >consensus_sequence_6 TGCATTAATTTTAACTAACTACAAATACAACCTACTCTCCGAACCAATATTGATTATGACTATCATAAATTTTATACTATTTTTGGATATAACCAATTTGATTAGAAGTTGATTTTTAAAATAACATATTAATTTTATTATTCCC >consensus_sequence_7 TGGTAATGTAGCGAATATTTATAAACTCAAACTAAACACATTTAGTTGTACCAACAACACCACTGTTTCTCTAAACTTTTTATCAAACACATTTAAATAAGTTATTTTGGCACATTGTTTTAATTTATTGATTATTAATTTTCCA >consensus_sequence_8 TGGAATTTTAAGTATAAATATACAATTTACCAATACCCCAACAACTCACAAACCGATAACTGCAAAATATTCATTATTTTACTTATAATTATGAAATGAATCTGTATAGACGTAATGATTAAAAATAGTGAACCTATTTATTCCA >consensus_sequence_9 TGCAAAATTTTGAACCAACTACAAAAAACCGCTCCCCCTGTCCCTCTCCTCACTTACGCTAACAAAAAGTTTGTAAAGTGACCAAAACGTGATCTTTGAATAAATTTATATGAAAATTAAATTTATCTTTTGCTTTTTTATTCAA Experiment name = Sampled-IN
>consensus_sequence_0 TGGTTCGGCACGCTGACGCAATATGGCGGTCTCCCCGTCACGAGGGGCGGGCGACGCCATAAGGGGGCGGCAACGCTACGTCATCCCGACGGGCATGCAGATGACGTCATCCCAAGGCTCCGCCATCTTAGGTTGCGTCACTTCC >consensus_sequence_1 GTGAAATATCGCGAGATCCGGCCCCGGAAATGATGTAATCCGTGCGTCATCGCCCGTCCTTGCGGGGCGTCGTTTGACCGAAAGGGGGGGGGGTGACGTAACGCAAAGTCATCGCTCGACCGGAAATGATGTAATCTTGCGTCAC >consensus_sequence_2 GGAAATGACGTCATCCCGCCGGAACACGTGATACCGCCCCGCGACAGCCCGCCCTTGCGCGAACGAAATCACGGGGCGGTCACGCGACGTTAAGGCGGGGGGTGACGCAATCGTAACCCGATGACGTCATACTTCTAGTATGGGC >consensus_sequence_3 GGGATGACGTCATTTCGTCAGCCCCCGCAGACGCGATATGGCGTCCCGTCGTCTGGCCGTCACGGGGCGCACGTCACACGGCAAGGGGGCGCCACACGGCATATGACGTATTGCTCCGAATGCTGACGATATGGCGTCATATTGA >consensus_sequence_4 CGGGCCCGAACAGGAAGTGATGACGTCATCTCGCCGAAATCTCGCGAGAGATTTTGCGACGTCATGACGCGGCATAGCGATCGATCCGGGCCCCGCCATCTTGCGTCAGCCCAAGTGCGCATGCGCTTACGGTATGACGTCACAC >consensus_sequence_5 GTCGGACGGCATGACGTCATCACGCGGGACGGATGTCGCGAGACTTATCGCGCGGCACTCCCCCCATAGGGGGAGGGCATGACGTATGCCCGGACGTTTACGCCCCGCCCCCCCGTTATGACGCGATATGACGGCATGACGTCAC >consensus_sequence_6 GCCTATGACGTCATTTTGTCGCGTCATACGGTCGGCCTGACGCCATAAGGGGGGGCATGACGTAACTGACACGGCATCGTTACCGGCCCGAACATCCGGTAAAGCGCCGCTGACGCAATATGGCGCCCCCCCCCGGAAATGGCGC >consensus_sequence_7 GGGCCGTGACGTGACGTAACCCCCCGTCACGTGAACGAACGCCCCCGTTACGCCATCTTGCGCCGCCGTTTTGTCGGAAAGAGGGGCGGGTCGTCTGGACGTCATAACGATGACGCCATCTTGGGCCGCCGCAATATGACGTCAT >consensus_sequence_8 GGAAATGACGTAAACGTCACATGACGTAGTCTCGCGAGAAACGATGGCGTCATGCCATCCCCCCGTTCTTTCGCGCGCCCGTTCGTCATTACGGGGGGTCACGACATCCCGATATGACGTAATTCGCCGCGCCGCCCTGATTGGC >consensus_sequence_9 GCCCGCCATTTTGGGCCCCGCCATCTTGGGGTGACGTAACCCCCCGGCATAAGGGGGGTGGAACAAACGCACGTCATTAGGGGGCGGTCCGGATGTCGTTTCGGCTAGGCCCCCCGATGACGTAATGCTGACAGTATGACGTCAC --- Comparison of loss convergence ---
--- Comparison of activity convergence ---
seq_template = 'N' * 145
rand_seed = 14755
#Run SeqProp Optimization
print("Running optimization experiment 'MPRA-DragoNN Activity Maximization'")
#Number of PWMs to generate per objective
n_sequences = 10
#Number of One-hot sequences to sample from the PWM at each grad step
n_samples = 1
#Number of epochs per objective to optimize
n_epochs = 1
#Number of steps (grad updates) per epoch
steps_per_epoch = 20000
#Number of One-hot validation sequences to sample from the PWM
n_valid_samples = 10
experiment_name_list = ['PWM', 'PWM-IN', 'Sampled', 'Sampled-IN']
eval_mode_list = ['pwm', 'pwm', 'sample', 'sample']
normalize_logits_list = [False, True, False, True]
result_dict = {
'PWM' : {},
'PWM-In' : {},
'Sampled' : {},
'Sampled-In' : {}
}
for experiment_name, eval_mode, normalize_logits in zip(experiment_name_list, eval_mode_list, normalize_logits_list) :
print("Experiment name = " + str(experiment_name))
print("Eval mode = " + str(eval_mode))
print("Normalize logits = " + str(normalize_logits))
K.clear_session()
set_seed(rand_seed)
target_output_ixs = [
[5]
]
sequence_templates = [
seq_template
]
losses, val_losses = zip(*[
get_earthmover_loss(
target_output_ixs[0],
pwm_start=0,
pwm_end=145,
pwm_target_bits=1.8,
pwm_entropy_weight=0.0
)
])
transforms = [
None
]
seqprop_predictors, valid_monitors, train_histories, valid_histories = run_seqprop(target_output_ixs, sequence_templates, losses, val_losses, transforms, n_sequences, n_samples, n_valid_samples, eval_mode, normalize_logits, n_epochs, steps_per_epoch)
seqprop_predictor, valid_monitor, train_history, valid_history = seqprop_predictors[0], valid_monitors[0], train_histories[0], valid_histories[0]
#Retrieve optimized PWMs and predicted cleavage distributionns
_, optimized_pwm, _, _ = seqprop_predictor.predict(x=None, steps=1)
consensus_seqs = []
for i in range(optimized_pwm.shape[0]) :
consensus_seq = ''
for j in range(optimized_pwm.shape[1]) :
max_nt_ix = np.argmax(optimized_pwm[i, j, :, 0])
if max_nt_ix == 0 :
consensus_seq += 'A'
elif max_nt_ix == 1 :
consensus_seq += 'C'
elif max_nt_ix == 2 :
consensus_seq += 'G'
elif max_nt_ix == 3 :
consensus_seq += 'T'
consensus_seqs.append(consensus_seq)
result_dict[experiment_name] = {
'seqprop_predictor' : seqprop_predictor,
'valid_monitor' : valid_monitor,
'train_history' : train_history,
'valid_history' : valid_history,
'consensus_seqs' : consensus_seqs
}
Running optimization experiment 'MPRA-DragoNN Activity Maximization' Experiment name = PWM Eval mode = pwm Normalize logits = False Optimizing objective 0... Epoch 1/1 20000/20000 [==============================] - 1137s 57ms/step - loss: -43.8387
Experiment name = PWM-IN Eval mode = pwm Normalize logits = True Optimizing objective 0... Epoch 1/1 20000/20000 [==============================] - 1157s 58ms/step - loss: -70.7330
Experiment name = Sampled Eval mode = sample Normalize logits = False Optimizing objective 0... Epoch 1/1 20000/20000 [==============================] - 1147s 57ms/step - loss: -4.2241
Experiment name = Sampled-IN Eval mode = sample Normalize logits = True Optimizing objective 0... Epoch 1/1 20000/20000 [==============================] - 1164s 58ms/step - loss: -33.8914
save_figs = True
fig_prefix = "eval_seqprop_mpradragonn_k562_sv40_earthmover_experiment_20000_updates_"
for experiment_ix, experiment_name in enumerate(experiment_name_list) :
print("Experiment name = " + str(experiment_name))
seqprop_predictor = result_dict[experiment_name]['seqprop_predictor']
valid_monitor = result_dict[experiment_name]['valid_monitor']
train_history = result_dict[experiment_name]['train_history']
valid_history = result_dict[experiment_name]['valid_history']
consensus_seqs = result_dict[experiment_name]['consensus_seqs']
#Store statistics for optimized sequences
fig_name = fig_prefix + experiment_name + "_" if save_figs else None
valid_monitor.plot_metrics_and_pwm(fig_name=fig_name)
f = plt.figure(figsize=(6, 4))
l1 = plt.plot(np.arange(n_epochs * steps_per_epoch + 1), np.array(train_history.val_loss_history) / n_sequences, color='darkgreen', linewidth=2, linestyle='-', label='Train')
l2 = plt.plot(np.arange(n_epochs * steps_per_epoch + 1), np.array(valid_history.val_loss_history), color='darkorange', linewidth=2, linestyle='--', label='Valid')
plt.xlabel("Weight Updates", fontsize=16)
plt.ylabel("Loss", fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlim(0, n_epochs * steps_per_epoch)
plt.ylim(min(np.min(train_history.val_loss_history) / n_sequences, np.min(valid_history.val_loss_history)), max(np.max(train_history.val_loss_history) / n_sequences, np.max(valid_history.val_loss_history)))
plt.legend(handles=[l1[0], l2[0]], fontsize=14)
plt.tight_layout()
if save_figs :
plt.savefig(fig_name + '_loss.png', transparent=True, dpi=150)
plt.savefig(fig_name + '_loss.svg')
plt.savefig(fig_name + '_loss.eps')
plt.show()
for i in range(len(consensus_seqs)) :
consensus_seq = consensus_seqs[i]
print(">consensus_sequence_" + str(i))
print(consensus_seq)
print("--- Comparison of loss convergence ---")
for history_prefix in ['train', 'valid'] :
loss_normalizer = n_sequences if history_prefix == 'train' else 1.
y_label_prefix = 'Train' if history_prefix == 'train' else 'Validation'
f = plt.figure(figsize=(6, 4))
ls = []
min_y_val = -4.0
max_y_val = 0.1
for experiment_ix, experiment_name in enumerate(experiment_name_list) :
curr_history = result_dict[experiment_name][history_prefix + '_history']
l1 = plt.plot(np.arange(n_epochs * steps_per_epoch + 1), np.array(curr_history.val_loss_history) / loss_normalizer, linewidth=2, linestyle='-', label=experiment_name)
ls.append(l1[0])
#max_y_val = max(max_y_val, np.max(curr_history.val_loss_history) / loss_normalizer)
plt.xlabel("Weight Updates", fontsize=16)
plt.ylabel(y_label_prefix + " Loss", fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlim(0, n_epochs * steps_per_epoch)
plt.ylim(min_y_val, max_y_val)
plt.legend(handles=ls, fontsize=14)
plt.tight_layout()
if save_figs :
plt.savefig(fig_prefix + history_prefix + '_loss_cmp.png', transparent=True, dpi=150)
plt.savefig(fig_prefix + history_prefix + '_loss_cmp.svg')
plt.savefig(fig_prefix + history_prefix + '_loss_cmp.eps')
plt.show()
print("--- Comparison of activity convergence ---")
f = plt.figure(figsize=(6, 4))
ls = []
min_y_val = -0.1
max_y_val = 4.0
for experiment_ix, experiment_name in enumerate(experiment_name_list) :
curr_monitor = result_dict[experiment_name]['valid_monitor']
meas_history = curr_monitor.measure_history
meas_history = [np.mean(meas_history[k]) for k in range(len(meas_history))]
l1 = plt.plot(np.arange(n_epochs * steps_per_epoch + 1), np.array(meas_history), linewidth=2, linestyle='-', label=experiment_name)
ls.append(l1[0])
#max_y_val = max(max_y_val, np.max(meas_history))
plt.xlabel("Weight Updates", fontsize=16)
plt.ylabel("Validation Activity Score", fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlim(0, n_epochs * steps_per_epoch)
plt.ylim(min_y_val, max_y_val)
plt.legend(handles=ls, fontsize=14)
plt.tight_layout()
if save_figs :
plt.savefig(fig_prefix + '_valid_logodds_cmp.png', transparent=True, dpi=150)
plt.savefig(fig_prefix + '_valid_logodds_cmp.svg')
plt.savefig(fig_prefix + '_valid_logodds_cmp.eps')
plt.show()
Experiment name = PWM
>consensus_sequence_0 CCCCCTCCCCGGGGCCCCCCCCCCCGGAATTCCCCCCCCCCCCCCCCCCGGGGGGGATGCGCCTGTTCCGGTCCTTCCGTTCCGCACGTCCCCGCCGGGGCCCCCCCCCCCCCCCCTACCAACCCCCCCCCCCCCCCCGGGGGGC >consensus_sequence_1 GCCACTTCCGGGTCTCCCCCCCCCGCGGGTTCCCCCCCCCCGCGCCTCCCCCCCCCCCCGGGGCGGATGCGCGTCAGTCCTTTCCCCTCCCCCCCCGGGGCCCCCCCCCCGCGTCCCCCCCCCCCCCCCCCCCCCCCGGGCGGGC >consensus_sequence_2 CCCCCTCCCCGCGGCTCCGCCCCCCCCCCCCCCCCCCGGCTTCCCCCCCCCCCCCCCGGGGGGGATGTCCGCGAGTCCTTTCCCCACTTCCCGCACGCCCGCGTCCCCCTCCGCGTCCCCCCCCCCCCCCCCCCCCCGGGGGGGG >consensus_sequence_3 GCCACTTCCGGTGTCGCCTCCCCGGGGGGTTTCCCCCCCCCCCCCCCCCCCCCCCCCGGGCGTCTGCACTTCCCGAATGTCTTCCCTTCCCGCTTGCGGATGCACTTCCCCCCCCCAGCAACCCCCCCCCCCCCCCCGGGGGGGG >consensus_sequence_4 CCCCCCCCCGGGGGGGCCCCCCCCCCCCCCCGGGGGAATCCCCCCCCCCCCCCCCGGGCCAGGCACTCCCTTCGTCTGTTTTTCCAATTCCCCCCGGGCGTCCCCCCCCCCGGGACCCCCCCCCCCCCCCCCCCGGCGCGCCTGC >consensus_sequence_5 GCCACTTCCGCCCGCTCCGGTCCGGGGGGTTCCCCCCCCCCCCCCCCCCCCCCCCCCGGCGGAAGTTCGTGTCGTGTGTTTTCCGCTTCTCCTGCGCATGCCCCCCCCCCGCGTTTCCCCCCCCCCCCCCCCCCCCCGGGGGGGG >consensus_sequence_6 GCCTCTTCCGGGCCCCCCCCCCCCCGGGGTGTTCCCCCCCCCCCCCCCGGGGGGAACTCCGTCGCGTGCGCCTTTTCCCCCTGCAAGTTCCGCCCCCCCGGGGCGCCCCCCCCTCCCCAACCCCCCCCCCCCCCCCCGCGCCGGG >consensus_sequence_7 CCCCCCCCGCGCCTCCGCGCCGCCGGGGCCCTCCCCGCTGCCGGTTCCCCCCCCCCCGCGCGGATGCACGCCCTGCGTTTTCCGCATGCTCCCGGTCCCCCCCCCCCCGCGGGGCCCCCCCCCCCCCCCCCCCCCCCGCGCCGGC >consensus_sequence_8 GCAACTTCCGGGACGTCTCTTCCGGGGGATTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGGGGGTCTGTCGTGTGTTTCGCGAGATCGCCTGTCCGGGCTTCCCCCCCCCGCCGCGGAACCCCCCCCCCCCCCCCGCGGGGGC >consensus_sequence_9 CCCCCTTCCCGGGGGGGCCCCCCCTTCCCCCGCTGCAACTCCCCCCCCCCCCCCCGCGGGTGCCAGTCCGTCCCTTGCGTTTCCGCCTCCCCCGCCGGGTACTCCCCCCCCGGGATCCCCCCCCCCCCCCCCCCCGCGCGCGGGC Experiment name = PWM-IN
>consensus_sequence_0 CGGAACATCACTTCCGGTTTGACACGACATCTGCATCCCACGTTACCCTTCCGGAAGAACGGATGTCCGGAATCATGAGGAGGAGTGCGTCGTGCCGAGTCTTTGCCGTCCGGATGCATCAGCCTCCGCATGCCCGCGCATGCGC >consensus_sequence_1 CGACACATCTCCGCATGGGGAGATCTCACGTTGCGTATCGCATCCCCCTCCGCCTTGTGTCAGGACGTACGCGTGCATGATGGTCCGGAACCTGAATGCATGCATGCCTCGTTACATCATCCGCCCCCCCCCCCCCCGGGGCGCG >consensus_sequence_2 TATCATATCAACGCATGTCCGGACAGCAACATGACGTTCGCATCGTCGCATGACCCAACCTCGCCATGTGGCGCATGCATGATCGACCCGGAAGAGAGGGGACGACGGACCTGACGCAATATGCCCCCCCCCCCCCTCCGCGGGC >consensus_sequence_3 GGAAATGGCGCATCACGAAAAGACTTGTGGCATCGATGAGTCTCAGCCCCGCCTTCCATCAGCATGTCGCTGGGTGATGTGACTCATCCCTCAACAGTACGTCATGACAACTCTTGCGAAGGGTCCCGCCTCCCGCGCATGCCCG >consensus_sequence_4 TCATGTCCCGCCACGTCGGAAGTTCAGCAGAAGCACTCTCGTCGGAACTTCCGCATCACTTCAAACACTTCCGCGCGCAGATCCAGCGTTTCTGAACCTCAGACGCAAGTTCCATCATCAACTCGTCTCCCGACCGCGCATGCGC >consensus_sequence_5 TGCGCATGCTCTTCCGTTTGCGACGTATCCCTATCCGCATCACTCCCACTCACATGTAGAGCACGCAACATGTCCGGGTGGAGCGAACGCATCCGTTACGTCCGCCGCCGTTACATCATGCTTCCCCCCCCCCCCCGGGGGGGGG >consensus_sequence_6 GTCATGTCCGGTCGCGTGCACGCACTTCCGAACGACCTCATCGACATGTCTCGTCCGTCACGTCAGCTGCATGATGGTCATGTAGCGATGACGTCGCTGCCCCTAGGCGCGTCAGACGCATGTCCGCAACATCGGGCGCCCCACT >consensus_sequence_7 TTACGTCATGCCGCATGTCCGGACGTACAGGAATGGCGAGGAGACATGTCGTTGCGTCAGATCGCACGCCCCCGCCTTAGTGGGATTGCATCGACGTTTAACGAATCCGGATGCGTCATCCTCCCGTCCCTTCACGGGAGATCCG >consensus_sequence_8 CGACGCATGATGCAATTCTTCAGACGTATCGCGTCGTCGCTCTCCCCCCACAACCATCGAGTCATGTGCGACGGGACATGCCTGTGCGGTATCGATCCATGCGGCGTGCGCATGATGATGACTACCCCTCCTGCCCCGCCGGCGC >consensus_sequence_9 GGACGCATGCCGCATGATGCGTACACGTCTCGACCACCAGTCGACATGTCGGACGCACGCAAGGTCACCGTACTCATGTCAGGCGGGGTCATGACTTCGACGCACGGCGGACTTGCGTAATGCACGCCTCCGCATGCGCATGCTC Experiment name = Sampled
>consensus_sequence_0 TAGAATATAATGTTATAATATAATATAATATAATAAAAAAAAAAAAAAAAATAAAATAAAATATAATAATATAATAAAAAAAAATATAATATAATATTATAATATAATAAAATAATAAAAAAAAAAAATTAAAATTCTTATGCTA >consensus_sequence_1 TAGAATATAATGTTATATTATAAAATATAATATAATATAAAATAAAAAAATAAAAAATAAAATATAATATAATATAATAAAAAAAAAAATATAATATAATAAAAAAAAATAAATTATAAAATAAAAATTTAAAATTCTTATGCTA >consensus_sequence_2 TAGAATATAATATTATATTATATAATATAATATAAAAAAAATAATAAAATAAAATAAAAATAAAATATAATATTATAATATAATAATAATATAATATAATAAAATATAATATAATATAATAATAAAATTAAAATATATTATGCTA >consensus_sequence_3 TATAATATAATGTTATATTATAATATAATAATAAAATAAAATAAAAAAATAATAATATAATAATATAATATAATAAAATAAAATAATATAATATAATATAAAAAAAAAAAATAATATAATATTATAATATAAAATTCTTATGCTA >consensus_sequence_4 TAGAATATAATGTTATAATATAATATAATAATATAATATAATAAAATATAATATAATATAATATAATATAATATAAAAAAAAATAATATAATATAATATAATAAAATAATATTATAATATAATAAAATTAAAATATATTATGCTA >consensus_sequence_5 TATAATATAATGTTATAATATAATAATATAATAATATAAAATAAAAAAATAATATTATATTATATTATATTATATTATAAAAAAAAATATAAAAAAAAAAAAAATATAATATAATATTATAATATAATATAAAATTCTTATGCTA >consensus_sequence_6 TATAATATAATGATATTATTATAATAATATAATATAATATAATAAAATAATATAATATAATATAATAAAATAATAAAATAAAATAATATTATAATATAATATAATATAAAAAATAATAAAAAAAAAATATAAAATTCTTATGCTA >consensus_sequence_7 TATAATATAATATTATATTATAAAATAAAAAATAAAAAAATATAATAAAATATAATATAATATTATAATATAATAAAAAAAAAAAAAAAAATAAAATAAAAAAATAATATAATATAATAAAAAAAAAATTAAAATTCTTATGCCA >consensus_sequence_8 TATAATATAAGGTAATATATTATATTATATAATATATAATATAATATAATATAATAATAAAATATAATATAATAAAATAAAAAAAAAATATAATATAATATAATATAATAAAATATAAAAAAAAAAAATTAAAATTCTTATGCTA >consensus_sequence_9 TATAATATAATGTAATATAATAAAAAAAAAAAAAAAAAAAATAAAAAAATATAATATAATATAATATAATATAATATTATAATATAATATAATATATTATAAAATATAAAATATAATATAAAATAAATTAAAATATCTTATGCTA Experiment name = Sampled-IN
>consensus_sequence_0 GTGGCGCCCCGCGTTACGCCATCTTGGGGCAGCCCGGAAATGCCGGGGTCCGATCGGCATTAACCCCCCGGAAAAAGGGGGGCCCCCGTATTGTTAAGGGCGGACGTCATTCCGCCGTCCCGCCATCTTGGGTTACGTCACTTCC >consensus_sequence_1 CGGATATGACGTCACTTCCTGTCCCGGAAATGACGTCATCCGTCCGGAATCGCCCGTCATTTCGGGCCCGCCTTTGTCGCCATAAGGGGGCGCTTACGTAATGACTGAGGATGACTGGACCGGAAATGACGTAATGCTGCGTCAC >consensus_sequence_2 GGAAATGACGTAATCCCCCCGGCATTTTGGCGCCCCCCCCGCCATCTTGGGGGGTGACGTGACGGGTCGCACGTCAGGCTTTTCGCCCCCCGAAAGGGGGGGTCACGTAATCCCCCCCCGATTGCGTCAGCGAGCGGCTATGGGC >consensus_sequence_3 GTTATGACGTCATTACGTCAGCCCCCGGAAACGTCATGTAGCCCCCCGACGTCAGGCCCCTACGGGCGGAACGTTACGCCATTAGGGGCGGTGACGCAACTTTTGTCGCGCAGTACGGAACAAGCCGGAAATGACGTCATTTCCG >consensus_sequence_4 GCCCCCCCCGCCAGTCAGGATGACGTCATCCCGCGGAAATCTCGCGAGATTTAGGGTTACGTCATCCCCCCCCTTTTCAGAGATACGGGGCCCCGCCATCTTGGGCCAGCCCGTGCCCCCTAGCGCCGGCGATATGACGTGACAC >consensus_sequence_5 GGCGGACGCCATTACGTCATCTCGCGCGGCCGGTATCGCGAGAGTTATCGCGCGGCCCTCCCCCGATAAGGGGGCGGCACGACGTAAGGGGCGACCCTTTCGCCCCGCCCCCCCGTTATGACGCAATATGACGTCATTACGTCAC >consensus_sequence_6 GCGAATGACGTCATTTTGTCGCGAGATTTTGTCGGCCCGTCCCGATAGGGCGGGCGGTCACGTAATGAACCCCCGGAAATCGCGCCCCCCCTTAGCTAGGGGGGGCCCGATGACGTCATCCTGGCCCCCCCCCCGGAAATGGCGC >consensus_sequence_7 CGGAAGTCTCGCGAGATTTTGTCGCGCCATCATAGGTCAAGCCGCCGGTCCGCCATCTTGCGCCGCCGTTTTGTCGCCAAAAACGGGCGGACGCAAGGACGTCAAAGCGCCGCCGCCATCTTGTGGCCCCGCCATTTTGGGCCAG >consensus_sequence_8 GGACGTAACTCACACGTCACGTGACCAAATCTCGCGAGAAACGATGGCGTCATCCCGTGCCCCCCATTATGGGGGGGGCCCAACGTCATGTCGGACGGTCACGACATATCGTTATGACGTAGTTCGCCCCGCCGCGCTTATTGGT >consensus_sequence_9 GCCCGCCATTTTGGGCCCCGCCATCTTGGGGTGACGTAACCCCCCGGGATTAACGCGGCACGACGTCATCCCCCCCACCAGGGGGCGGAGCGACGTTATGACATATCGCCCCCCCGATGACGTCATCCCGCCGATATGACGTCAC --- Comparison of loss convergence ---
--- Comparison of activity convergence ---