from __future__ import print_function
import keras
from keras.models import Sequential, Model, load_model
from keras import backend as K
import tensorflow as tf
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sns
import aparent.visualization as vis
from aparent.predictor import *
import urllib
import urllib.request
import pickle
from time import sleep
from scipy.stats import ttest_ind
from scipy.stats import pearsonr, spearmanr
from sklearn.linear_model import LinearRegression
from scipy.optimize import minimize
Using TensorFlow backend.
df = pd.read_csv('../../../aparent/data/polyadb_features_pas_3_utr3_perturb.csv', sep='\t')
save_dict = np.load("../../../aparent/data/polyadb_features_pas_3_utr3_perturb_no_x.npz")
m, l, c, y = save_dict['m'], save_dict['l'], save_dict['c'], save_dict['y']
s = np.load('../predictions/apa_perturb_data/aparent_all_libs_resnet_no_clinvar_wt_ep_5_utr3_native_scores.npy')
'''
min_total_c = 10.
total_c = np.sum(c, axis=(1, 2))
s = s[total_c >= min_total_c, :]
m = m[total_c >= min_total_c, :]
l = l[total_c >= min_total_c, :]
c = c[total_c >= min_total_c, :]
y = y[total_c >= min_total_c, :]
'''
min_total_c = 100.
total_c = np.sum(c[..., 1], axis=-1)
min_total_l = 100
min_l = np.min(l + (l == 0.) * 1e6, axis=-1)
max_total_l = 10000
max_l = np.max(l, axis=-1)
s = s[(total_c >= min_total_c) & ((min_l >= min_total_l) & (max_l <= max_total_l)), :]
m = m[(total_c >= min_total_c) & ((min_l >= min_total_l) & (max_l <= max_total_l)), :]
l = l[(total_c >= min_total_c) & ((min_l >= min_total_l) & (max_l <= max_total_l)), :]
c = c[(total_c >= min_total_c) & ((min_l >= min_total_l) & (max_l <= max_total_l)), :]
y = y[(total_c >= min_total_c) & ((min_l >= min_total_l) & (max_l <= max_total_l)), :]
print("s.shape = " + str(s.shape))
print("m.shape = " + str(m.shape))
print("l.shape = " + str(l.shape))
print("c.shape = " + str(c.shape))
print("y.shape = " + str(y.shape))
s.shape = (5022, 10) m.shape = (5022, 10) l.shape = (5022, 10) c.shape = (5022, 10, 28) y.shape = (5022, 10, 28)
#Define tissue-/cell- types
cell_types = np.array([
'rpm',
'NT',
'CDC73',
'CPSF1',
'CPSF2',
'CPSF3',
'CPSF3L',
'CPSF4',
'CPSF6',
'CSTF1',
'CSTF3',
'CTR9',
'FIP1L1',
'LEO1',
'NUDT21',
'PABPC1',
'PABPN1',
'PAF1',
'PAPOLA',
'PCF11',
'RBBP6',
'RPRD1A',
'RPRD1B',
'SCAF8',
'SF3A1',
'SRSF3',
'SYMPK',
'THOC5'
], dtype=np.object)
cell_type_dict = {
cell_type : cell_type_i for cell_type_i, cell_type in enumerate(cell_types)
}
#Slice celltypes
cell_type_1 = 'NT'
cell_type_2 = 'CPSF6'
c_1 = c[:, :, cell_type_dict[cell_type_1]]
y_1 = y[:, :, cell_type_dict[cell_type_1]]
c_2 = c[:, :, cell_type_dict[cell_type_2]]
y_2 = y[:, :, cell_type_dict[cell_type_2]]
l_prox_cumulative = np.log(np.cumsum(l[:, ::-1], axis=1) * m[:, ::-1] + 1.)[:, ::-1]
l_cumulative = np.log(np.cumsum(l, axis=1) * m + 1.)
l = np.log(l * m + 1.)
prox_index = np.array([np.nonzero(m[i, :])[0][0] for i in range(m.shape[0])])
dist_index = np.array([np.nonzero(m[i, :])[0][-1] for i in range(m.shape[0])])
#Proximal-most vs Distal-most PAS scores
s_prox = []
s_dist = []
for i in range(s.shape[0]) :
s_prox.append(s[i, prox_index[i]])
s_dist.append(s[i, dist_index[i]])
s_prox = np.array(s_prox)
s_dist = np.array(s_dist)
s_min = -8.
s_max = 4.
prox_hist, bin_edges = np.histogram(s_prox, bins=50, range=(s_min, s_max), density=True)
dist_hist, _ = np.histogram(s_dist, bins=50, range=(s_min, s_max), density=True)
median_prox = np.median(s_prox)
median_dist = np.median(s_dist)
bin_width = bin_edges[1] - bin_edges[0]
bin_centers = bin_edges[:-1] + bin_width / 2.
f = plt.figure(figsize=(6, 4))
plt.bar(bin_centers, prox_hist, width=bin_width, color='green', alpha=0.5, label='Proximal')
plt.bar(bin_centers, dist_hist, width=bin_width, color='red', alpha=0.5, label='Distal')
plt.axvline(x=median_prox, linestyle='--', linewidth=2, color='darkgreen')
plt.axvline(x=median_dist, linestyle='--', linewidth=2, color='darkred')
plt.xlim(s_min, s_max)
plt.ylim(0, 0.35)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xlabel("APARENT-ResNet Score", fontsize=12)
plt.ylabel("% Sequences", fontsize=12)
plt.legend(fontsize=12)
plt.tight_layout()
plt.show()
l_dist = []
l_cumulative_dist = []
for i in range(s.shape[0]) :
l_dist.append(l[i, dist_index[i]])
l_cumulative_dist.append(l_cumulative[i, dist_index[i]])
l_dist = np.array(l_dist)
l_cumulative_dist = np.array(l_cumulative_dist)
#Proximal-most vs Distal-most PAS scores
s_dists = []
s_dist_means = []
s_dist_medians = []
s_dist_stds = []
for k in range(0, 10) :
s_d = []
for i in range(s.shape[0]) :
if dist_index[i] - k >= 0 :
s_d.append(s[i, dist_index[i] - k])
s_dists.append(np.array(s_d))
s_dist_means.append(np.mean(s_dists[-1]))
s_dist_medians.append(np.median(s_dists[-1]))
s_dist_stds.append(np.std(s_dists[-1]))
s_dist_means = np.array(s_dist_means)
s_dist_medians = np.array(s_dist_medians)
s_dist_stds = np.array(s_dist_stds)
f = plt.figure(figsize=(8, 4))
sns.stripplot(data=s_dists[::-1], alpha=0.1, jitter=0.25)
plt.errorbar(np.arange(10), s_dist_medians[::-1], yerr=s_dist_stds[::-1], linewidth=3, color='red', linestyle='-')
plt.xlim(-0.5, 9.5)
plt.ylim(-8., 4.)
plt.xticks(
np.arange(10),
["-" + str(9-j) if j != 29 else "Distal\nPAS" for j in range(10)], fontsize=12, rotation=45
)
plt.yticks(fontsize=12)
plt.ylabel("APARENT-ResNet Score", fontsize=12)
plt.tight_layout()
plt.show()
f = plt.figure(figsize=(8, 4))
sns.stripplot(data=s_dists[::-1], alpha=0.1, jitter=0.25)
sns.boxplot(data=s_dists[::-1], linewidth=2, fliersize=0.)
plt.xlim(-0.5, 9.5)
plt.ylim(-8., 4.)
plt.xticks(
np.arange(10),
["-" + str(9-j) if j != 9 else "Distal\nPAS" for j in range(10)], fontsize=12, rotation=45
)
plt.yticks(fontsize=12)
plt.ylabel("APARENT-ResNet Score", fontsize=12)
plt.tight_layout()
plt.show()
f = plt.figure(figsize=(8, 4))
plt.errorbar(np.arange(10), s_dist_medians[::-1], yerr=s_dist_stds[::-1], linewidth=3, color='red', linestyle='-')
plt.xlim(-0.5, 9.5)
plt.ylim(-3.5, 1.)
plt.xticks(
np.arange(10),
["-" + str(9-j) if j != 29 else "Distal\nPAS" for j in range(10)], fontsize=12, rotation=45
)
plt.yticks(fontsize=12)
plt.ylabel("APARENT-ResNet Score", fontsize=12)
plt.tight_layout()
plt.show()
#Fit regression / classification models on native measures
def logistic_model_predict(s, m, l, w_pas, w_len, w_bias) :
score_exp = np.exp(w_pas * s + w_len * l + w_bias) * m
return score_exp / np.sum(score_exp, axis=-1)[:, None]
def logistic_model_mse(w_bundle, s, m, l, y_true) :
w_pas = w_bundle[0]
w_len = w_bundle[1]
w_bias = w_bundle[2:2+10]
y_pred = logistic_model_predict(s, m, l, w_pas, w_len, w_bias)
y_pred_clip = np.clip(y_pred, 1e-7, 1. - 1e-7)
y_true_clip = np.clip(y_true, 1e-7, 1. - 1e-7)
kl = np.sum(y_true_clip * np.log(y_true_clip / y_pred_clip), axis=-1)
return np.mean(kl)
chosen_tissue_types = ['NT']
model_spearman_rs = np.zeros(len(chosen_tissue_types))
y_preds = []
for tissue_ix in range(len(chosen_tissue_types)) :
print("Training on tissue = '" + chosen_tissue_types[tissue_ix] + "'")
y_tissue = y[..., cell_type_dict[chosen_tissue_types[tissue_ix]]]
w0 = np.zeros(2+10)
res = minimize(logistic_model_mse, w0, args=(s, m, l_cumulative, y_tissue), method='BFGS', options={'disp': True})
w_pas = res.x[0]
w_len = res.x[1]
w_bias = res.x[2:2+10]
print(res.x)
y_pred_tissue = logistic_model_predict(s, m, l_cumulative, w_pas, w_len, w_bias)
y_preds.append(y_pred_tissue)
spearman_r_val, _ = spearmanr(y_pred_tissue[np.arange(y_tissue.shape[0]), dist_index], y_tissue[np.arange(y_tissue.shape[0]), dist_index])
model_spearman_rs[tissue_ix] = spearman_r_val
print("- Spearman r = " + str(round(spearman_r_val, 3)))
print("- n = " + str(y_tissue.shape[0]))
f = plt.figure(figsize=(3, 3))
plt.scatter(y_pred_tissue[np.arange(y_tissue.shape[0]), dist_index], y_tissue[np.arange(y_tissue.shape[0]), dist_index], color='black', s=8, alpha=0.2)
plt.xlim(0., 1.)
plt.ylim(0., 1.)
plt.tight_layout()
plt.show()
Training on tissue = 'NT' Optimization terminated successfully. Current function value: 0.297444 Iterations: 105 Function evaluations: 1498 Gradient evaluations: 107 [ 0.46941184 -0.35526653 -1.30689904 0.52319977 0.4682442 0.2754141 0.1836425 0.23573602 -0.11909025 0.01295298 0.67265352 -0.94624276] - Spearman r = 0.625 - n = 5022
#Fit regression / classification models on native measures (Independent regression weights per PAS)
def logistic_model_predict(s, m, l, w_pas, w_len, w_bias) :
score_exp = np.exp(w_pas * s + w_len * l + w_bias) * m
return score_exp / np.sum(score_exp, axis=-1)[:, None]
def logistic_model_mse(w_bundle, s, m, l, y_true) :
w_pas = w_bundle[0:10]
w_len = w_bundle[10:20]
w_bias = w_bundle[20:30]
y_pred = logistic_model_predict(s, m, l, w_pas, w_len, w_bias)
y_pred_clip = np.clip(y_pred, 1e-7, 1. - 1e-7)
y_true_clip = np.clip(y_true, 1e-7, 1. - 1e-7)
kl = np.sum(y_true_clip * np.log(y_true_clip / y_pred_clip), axis=-1)
return np.mean(kl)
chosen_tissue_types = ['NT']
model_spearman_rs = np.zeros(len(chosen_tissue_types))
y_preds = []
for tissue_ix in range(len(chosen_tissue_types)) :
print("Training on tissue = '" + chosen_tissue_types[tissue_ix] + "'")
y_tissue = y[..., cell_type_dict[chosen_tissue_types[tissue_ix]]]
w0 = np.zeros(30)
res = minimize(logistic_model_mse, w0, args=(s, m, l_cumulative, y_tissue), method='BFGS', options={'disp': True})
w_pas = res.x[0:10]
w_len = res.x[10:20]
w_bias = res.x[20:30]
print(res.x)
y_pred_tissue = logistic_model_predict(s, m, l_cumulative, w_pas, w_len, w_bias)
y_preds.append(y_pred_tissue)
spearman_r_val, _ = spearmanr(y_pred_tissue[np.arange(y_tissue.shape[0]), dist_index], y_tissue[np.arange(y_tissue.shape[0]), dist_index])
model_spearman_rs[tissue_ix] = spearman_r_val
print("- Spearman r = " + str(round(spearman_r_val, 3)))
print("- n = " + str(y_tissue.shape[0]))
f = plt.figure(figsize=(3, 3))
plt.scatter(y_pred_tissue[np.arange(y_tissue.shape[0]), dist_index], y_tissue[np.arange(y_tissue.shape[0]), dist_index], color='black', s=8, alpha=0.2)
plt.xlim(0., 1.)
plt.ylim(0., 1.)
plt.tight_layout()
plt.show()
Training on tissue = 'NT' Optimization terminated successfully. Current function value: 0.292747 Iterations: 143 Function evaluations: 4640 Gradient evaluations: 145 [ 0.38278536 0.56378862 0.55127036 0.50562707 0.48221289 0.6771202 0.10395366 0.6936327 0.66904288 0.12772881 0. -0.3646651 -0.41507464 -0.52916818 -0.3768265 -0.38065306 -0.41928044 -0.40489881 -0.34176212 -0.50646243 -1.8648517 0.23405808 0.53503883 1.25696534 -0.06523703 0.04892737 -0.12228334 -0.04533058 0.08163913 -0.05900107] - Spearman r = 0.632 - n = 5022
#Right-justify feature matrices
s_r = np.zeros(s.shape)
m_r = np.zeros(m.shape)
l_r = np.zeros(l.shape)
l_r = np.zeros(l.shape)
l_cumulative_r = np.zeros(l_cumulative.shape)
l_prox_cumulative_r = np.zeros(l_prox_cumulative.shape)
y_r = np.zeros(y.shape)
dist_index_r = np.copy(dist_index)
dist_index_r[:] = m.shape[1]-1
for i in range(m.shape[0]) :
n_pas = int(np.sum(m[i, :]))
for j in range(0, m.shape[1]) :
if n_pas-1-j >= 0 and m[i, n_pas-1-j] == 1. :
s_r[i, m.shape[1]-1-j] = s[i, n_pas-1-j]
m_r[i, m.shape[1]-1-j] = m[i, n_pas-1-j]
l_r[i, m.shape[1]-1-j] = l[i, n_pas-1-j]
l_cumulative_r[i, m.shape[1]-1-j] = l_cumulative[i, n_pas-1-j]
l_prox_cumulative_r[i, m.shape[1]-1-j] = l_prox_cumulative[i, n_pas-1-j]
y_r[i, m.shape[1]-1-j] = y[i, n_pas-1-j]
#Fit regression / classification models on native measures (right-justified)
def logistic_model_predict(s, m, l, w_pas, w_len, w_bias) :
score_exp = np.exp(w_pas * s + w_len * l + w_bias) * m
return score_exp / np.sum(score_exp, axis=-1)[:, None]
def logistic_model_mse(w_bundle, s, m, l, y_true) :
w_pas = w_bundle[0]
w_len = w_bundle[1]
w_bias = w_bundle[2:2+10]
y_pred = logistic_model_predict(s, m, l, w_pas, w_len, w_bias)
y_pred_clip = np.clip(y_pred, 1e-7, 1. - 1e-7)
y_true_clip = np.clip(y_true, 1e-7, 1. - 1e-7)
kl = np.sum(y_true_clip * np.log(y_true_clip / y_pred_clip), axis=-1)
return np.mean(kl)
chosen_tissue_types = ['NT']
model_spearman_rs = np.zeros(len(chosen_tissue_types))
y_preds = []
for tissue_ix in range(len(chosen_tissue_types)) :
print("Training on tissue = '" + chosen_tissue_types[tissue_ix] + "'")
y_tissue_r = y_r[..., cell_type_dict[chosen_tissue_types[tissue_ix]]]
w0 = np.zeros(2+10)
res = minimize(logistic_model_mse, w0, args=(s_r, m_r, l_cumulative_r, y_tissue_r), method='BFGS', options={'disp': True})
w_pas = res.x[0]
w_len = res.x[1]
w_bias = res.x[2:2+10]
print(res.x)
y_pred_tissue_r = logistic_model_predict(s_r, m_r, l_cumulative_r, w_pas, w_len, w_bias)
y_preds.append(y_pred_tissue_r)
spearman_r_val, _ = spearmanr(y_pred_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r], y_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r])
model_spearman_rs[tissue_ix] = spearman_r_val
print("- Spearman r = " + str(round(spearman_r_val, 3)))
print("- n = " + str(y_tissue_r.shape[0]))
f = plt.figure(figsize=(3, 3))
plt.scatter(y_pred_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r], y_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r], color='black', s=8, alpha=0.2)
plt.xlim(0., 1.)
plt.ylim(0., 1.)
plt.tight_layout()
plt.show()
Training on tissue = 'NT' Optimization terminated successfully. Current function value: 0.302773 Iterations: 89 Function evaluations: 1274 Gradient evaluations: 91 [ 0.45093615 -0.0673798 2.03546178 0.91747305 0.70005375 0.33132832 -0.15047316 -0.27811664 -0.52313202 -0.81023784 -1.01919246 -1.20324994] - Spearman r = 0.622 - n = 5022
#Fit regression / classification models on native measures (right-justified)
def logistic_model_predict(s, m, l, w_pas, w_len, w_bias) :
score_exp = np.exp(w_pas * s + w_len * l + w_bias) * m
return score_exp / np.sum(score_exp, axis=-1)[:, None]
def logistic_model_mse(w_bundle, s, m, l, y_true) :
w_pas = w_bundle[0:10]
w_len = w_bundle[10:20]
w_bias = w_bundle[20:30]
y_pred = logistic_model_predict(s, m, l, w_pas, w_len, w_bias)
y_pred_clip = np.clip(y_pred, 1e-7, 1. - 1e-7)
y_true_clip = np.clip(y_true, 1e-7, 1. - 1e-7)
kl = np.sum(y_true_clip * np.log(y_true_clip / y_pred_clip), axis=-1)
return np.mean(kl)
chosen_tissue_types = ['NT']
model_spearman_rs = np.zeros(len(chosen_tissue_types))
y_preds = []
for tissue_ix in range(len(chosen_tissue_types)) :
print("Training on tissue = '" + chosen_tissue_types[tissue_ix] + "'")
y_tissue_r = y_r[..., cell_type_dict[chosen_tissue_types[tissue_ix]]]
w0 = np.zeros(30)
res = minimize(logistic_model_mse, w0, args=(s_r, m_r, l_cumulative_r, y_tissue_r), method='BFGS', options={'disp': True})
w_pas = res.x[0:10]
w_len = res.x[10:20]
w_bias = res.x[20:30]
print(res.x)
y_pred_tissue_r = logistic_model_predict(s_r, m_r, l_cumulative_r, w_pas, w_len, w_bias)
y_preds.append(y_pred_tissue_r)
spearman_r_val, _ = spearmanr(y_pred_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r], y_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r])
model_spearman_rs[tissue_ix] = spearman_r_val
print("- Spearman r = " + str(round(spearman_r_val, 3)))
print("- n = " + str(y_tissue_r.shape[0]))
f = plt.figure(figsize=(3, 3))
plt.scatter(y_pred_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r], y_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r], color='black', s=8, alpha=0.2)
plt.xlim(0., 1.)
plt.ylim(0., 1.)
plt.tight_layout()
plt.show()
Training on tissue = 'NT' Optimization terminated successfully. Current function value: 0.295065 Iterations: 168 Function evaluations: 5440 Gradient evaluations: 170 [-0.54813119 0.26427883 0.30412155 0.15813086 0.28523733 0.33724423 0.3261071 0.35811057 0.45170473 0.59936153 0. 0.09826074 -0.01367696 -0.07057042 -0.06002848 -0.06163822 -0.0718277 -0.0539588 -0.0877641 -0.34386414 0.26174613 0.47367662 0.4353824 -0.02046226 -0.27616845 -0.28070102 -0.48793646 -0.72895501 -0.61745249 1.24071716] - Spearman r = 0.635 - n = 5022
y_1_prox = []
y_2_prox = []
for i in range(s.shape[0]) :
y_1_prox.append(y_1[i, prox_index[i]])
y_2_prox.append(y_2[i, prox_index[i]])
y_1_prox = np.array(y_1_prox)
y_2_prox = np.array(y_2_prox)
logodds_1_prox = np.log(np.clip(y_1_prox, 1e-6, 1. - 1e-6) / (1. - np.clip(y_1_prox, 1e-6, 1. - 1e-6)))
logodds_2_prox = np.log(np.clip(y_2_prox, 1e-6, 1. - 1e-6) / (1. - np.clip(y_2_prox, 1e-6, 1. - 1e-6)))
y_1_dist = []
y_2_dist = []
for i in range(s.shape[0]) :
y_1_dist.append(y_1[i, dist_index[i]])
y_2_dist.append(y_2[i, dist_index[i]])
y_1_dist = np.array(y_1_dist)
y_2_dist = np.array(y_2_dist)
logodds_1_dist = np.log(np.clip(y_1_dist, 1e-6, 1. - 1e-6) / (1. - np.clip(y_1_dist, 1e-6, 1. - 1e-6)))
logodds_2_dist = np.log(np.clip(y_2_dist, 1e-6, 1. - 1e-6) / (1. - np.clip(y_2_dist, 1e-6, 1. - 1e-6)))
keep_index = ((y_1_prox > 0.01) & (y_1_prox < .99))
keep_index = keep_index & ((y_2_prox > 0.01) & (y_2_prox < .99))
s_prox_std = (s_prox - np.mean(s_prox)) / np.std(s_prox)
s_dist_std = (s_dist - np.mean(s_dist)) / np.std(s_dist)
f = plt.figure(figsize=(4, 4))
plt.scatter(y_1_prox[keep_index], y_2_prox[keep_index], cmap='bwr', c=(s_prox_std - s_dist_std)[keep_index], vmin=-2., vmax=2., s=15, alpha=0.5)
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xticks([0.0, 0.25, 0.5, 0.75, 1.0], fontsize=12)
plt.yticks([0.0, 0.25, 0.5, 0.75, 1.0], fontsize=12)
plt.xlabel(cell_type_1, fontsize=12)
plt.ylabel(cell_type_2, fontsize=12)
plt.title("Measured PPUI", fontsize=12)
plt.tight_layout()
plt.show()
keep_index = ((y_1_dist > 0.01) & (y_1_dist < .99))
keep_index = keep_index & ((y_2_dist > 0.01) & (y_2_dist < .99))
s_prox_std = (s_prox - np.mean(s_prox)) / np.std(s_prox)
s_dist_std = (s_dist - np.mean(s_dist)) / np.std(s_dist)
f = plt.figure(figsize=(4, 4))
plt.scatter(y_1_dist[keep_index], y_2_dist[keep_index], cmap='bwr', c=(s_prox_std - s_dist_std)[keep_index], vmin=-2., vmax=2., s=15, alpha=0.5)
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xticks([0.0, 0.25, 0.5, 0.75, 1.0], fontsize=12)
plt.yticks([0.0, 0.25, 0.5, 0.75, 1.0], fontsize=12)
plt.xlabel(cell_type_1, fontsize=12)
plt.ylabel(cell_type_2, fontsize=12)
plt.title("Measured PDUI", fontsize=12)
plt.tight_layout()
plt.show()
#Proximal-most vs Distal-most PAS scores
ys_1_dists = []
ys_1_dist_means = []
ys_1_dist_medians = []
ys_1_dist_stds = []
for k in range(0, 10) :
s_d = []
for i in range(s.shape[0]) :
if dist_index[i] - k >= 0 and (y_1[i, dist_index[i] - k] > 0. and y_1[i, dist_index[i] - k] < 1.) :
s_d.append(np.log(y_1[i, dist_index[i] - k] / (1. - y_1[i, dist_index[i] - k])))
ys_1_dists.append(np.array(s_d))
ys_1_dist_means.append(np.mean(ys_1_dists[-1]))
ys_1_dist_medians.append(np.median(ys_1_dists[-1]))
ys_1_dist_stds.append(np.std(ys_1_dists[-1]))
ys_1_dist_means = np.array(ys_1_dist_means)
ys_1_dist_medians = np.array(ys_1_dist_medians)
ys_1_dist_stds = np.array(ys_1_dist_stds)
#Proximal-most vs Distal-most PAS scores
ys_2_dists = []
ys_2_dist_means = []
ys_2_dist_medians = []
ys_2_dist_stds = []
for k in range(0, 10) :
s_d = []
for i in range(s.shape[0]) :
if dist_index[i] - k >= 0 and (y_2[i, dist_index[i] - k] > 0. and y_2[i, dist_index[i] - k] < 1.) :
s_d.append(np.log(y_2[i, dist_index[i] - k] / (1. - y_2[i, dist_index[i] - k])))
ys_2_dists.append(np.array(s_d))
ys_2_dist_means.append(np.mean(ys_2_dists[-1]))
ys_2_dist_medians.append(np.median(ys_2_dists[-1]))
ys_2_dist_stds.append(np.std(ys_2_dists[-1]))
ys_2_dist_means = np.array(ys_2_dist_means)
ys_2_dist_medians = np.array(ys_2_dist_medians)
ys_2_dist_stds = np.array(ys_2_dist_stds)
f = plt.figure(figsize=(8, 4))
plt.errorbar(np.arange(10), ys_1_dist_medians[::-1], yerr=ys_1_dist_stds[::-1], alpha=0.5, linewidth=3, color='red', linestyle='-')
plt.errorbar(np.arange(10), ys_2_dist_medians[::-1], yerr=ys_2_dist_stds[::-1], alpha=0.5, linewidth=3, color='deepskyblue', linestyle='-')
plt.xlim(-0.5, 9.5)
plt.ylim(-6.0, 2.5)
plt.tight_layout()
plt.show()