Notebook

In [1]:

from __future__ import print_function
import keras
from keras.models import Sequential, Model, load_model
from keras import backend as K

import tensorflow as tf

import os
import pandas as pd

import numpy as np

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sns

import aparent.visualization as vis

from aparent.predictor import *

import urllib
import urllib.request
import pickle
from time import sleep

from scipy.stats import ttest_ind
from scipy.stats import pearsonr, spearmanr

from sklearn.linear_model import LinearRegression

from scipy.optimize import minimize

Using TensorFlow backend.

In [2]:

df = pd.read_csv('../../../aparent/data/polyadb_features_pas_3_utr3_perturb.csv', sep='\t')

save_dict = np.load("../../../aparent/data/polyadb_features_pas_3_utr3_perturb_no_x.npz")
m, l, c, y = save_dict['m'], save_dict['l'], save_dict['c'], save_dict['y']

s = np.load('../predictions/apa_perturb_data/aparent_all_libs_resnet_no_clinvar_wt_ep_5_utr3_native_scores.npy')

'''
min_total_c = 10.
total_c = np.sum(c, axis=(1, 2))

s = s[total_c >= min_total_c, :]
m = m[total_c >= min_total_c, :]
l = l[total_c >= min_total_c, :]
c = c[total_c >= min_total_c, :]
y = y[total_c >= min_total_c, :]
'''

min_total_c = 100.
total_c = np.sum(c[..., 1], axis=-1)

min_total_l = 100
min_l = np.min(l + (l == 0.) * 1e6, axis=-1)

max_total_l = 10000
max_l = np.max(l, axis=-1)

s = s[(total_c >= min_total_c) & ((min_l >= min_total_l) & (max_l <= max_total_l)), :]
m = m[(total_c >= min_total_c) & ((min_l >= min_total_l) & (max_l <= max_total_l)), :]
l = l[(total_c >= min_total_c) & ((min_l >= min_total_l) & (max_l <= max_total_l)), :]
c = c[(total_c >= min_total_c) & ((min_l >= min_total_l) & (max_l <= max_total_l)), :]
y = y[(total_c >= min_total_c) & ((min_l >= min_total_l) & (max_l <= max_total_l)), :]

print("s.shape = " + str(s.shape))
print("m.shape = " + str(m.shape))
print("l.shape = " + str(l.shape))
print("c.shape = " + str(c.shape))
print("y.shape = " + str(y.shape))

s.shape = (5022, 10)
m.shape = (5022, 10)
l.shape = (5022, 10)
c.shape = (5022, 10, 28)
y.shape = (5022, 10, 28)

In [3]:

#Define tissue-/cell- types

cell_types = np.array([
    'rpm',
    'NT',
    'CDC73',
    'CPSF1',
    'CPSF2',
    'CPSF3',
    'CPSF3L',
    'CPSF4',
    'CPSF6',
    'CSTF1',
    'CSTF3',
    'CTR9',
    'FIP1L1',
    'LEO1',
    'NUDT21',
    'PABPC1',
    'PABPN1',
    'PAF1',
    'PAPOLA',
    'PCF11',
    'RBBP6',
    'RPRD1A',
    'RPRD1B',
    'SCAF8',
    'SF3A1',
    'SRSF3',
    'SYMPK',
    'THOC5'
], dtype=np.object)

cell_type_dict = {
    cell_type : cell_type_i for cell_type_i, cell_type in enumerate(cell_types)
}

#Slice celltypes

cell_type_1 = 'NT'
cell_type_2 = 'CPSF6'

c_1 = c[:, :, cell_type_dict[cell_type_1]]
y_1 = y[:, :, cell_type_dict[cell_type_1]]

c_2 = c[:, :, cell_type_dict[cell_type_2]]
y_2 = y[:, :, cell_type_dict[cell_type_2]]

In [5]:

l_prox_cumulative = np.log(np.cumsum(l[:, ::-1], axis=1) * m[:, ::-1] + 1.)[:, ::-1]
l_cumulative = np.log(np.cumsum(l, axis=1) * m + 1.)
l = np.log(l * m + 1.)

In [6]:

prox_index = np.array([np.nonzero(m[i, :])[0][0] for i in range(m.shape[0])])
dist_index = np.array([np.nonzero(m[i, :])[0][-1] for i in range(m.shape[0])])

In [7]:

#Proximal-most vs Distal-most PAS scores

s_prox = []
s_dist = []
for i in range(s.shape[0]) :
    s_prox.append(s[i, prox_index[i]])
    s_dist.append(s[i, dist_index[i]])

s_prox = np.array(s_prox)
s_dist = np.array(s_dist)

s_min = -8.
s_max = 4.

prox_hist, bin_edges = np.histogram(s_prox, bins=50, range=(s_min, s_max), density=True)
dist_hist, _ = np.histogram(s_dist, bins=50, range=(s_min, s_max), density=True)

median_prox = np.median(s_prox)
median_dist = np.median(s_dist)

bin_width = bin_edges[1] - bin_edges[0]
bin_centers = bin_edges[:-1] + bin_width / 2.

f = plt.figure(figsize=(6, 4))

plt.bar(bin_centers, prox_hist, width=bin_width, color='green', alpha=0.5, label='Proximal')
plt.bar(bin_centers, dist_hist, width=bin_width, color='red', alpha=0.5, label='Distal')

plt.axvline(x=median_prox, linestyle='--', linewidth=2, color='darkgreen')
plt.axvline(x=median_dist, linestyle='--', linewidth=2, color='darkred')

plt.xlim(s_min, s_max)
plt.ylim(0, 0.35)

plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.xlabel("APARENT-ResNet Score", fontsize=12)
plt.ylabel("% Sequences", fontsize=12)

plt.legend(fontsize=12)

plt.tight_layout()

plt.show()

In [8]:

l_dist = []
l_cumulative_dist = []
for i in range(s.shape[0]) :
    l_dist.append(l[i, dist_index[i]])
    l_cumulative_dist.append(l_cumulative[i, dist_index[i]])

l_dist = np.array(l_dist)
l_cumulative_dist = np.array(l_cumulative_dist)

In [9]:

#Proximal-most vs Distal-most PAS scores

s_dists = []
s_dist_means = []
s_dist_medians = []
s_dist_stds = []
for k in range(0, 10) :
    
    s_d = []
    for i in range(s.shape[0]) :
        if dist_index[i] - k >= 0 :
            s_d.append(s[i, dist_index[i] - k])
    
    s_dists.append(np.array(s_d))
    
    s_dist_means.append(np.mean(s_dists[-1]))
    s_dist_medians.append(np.median(s_dists[-1]))
    s_dist_stds.append(np.std(s_dists[-1]))

s_dist_means = np.array(s_dist_means)
s_dist_medians = np.array(s_dist_medians)
s_dist_stds = np.array(s_dist_stds)

In [10]:

f = plt.figure(figsize=(8, 4))

sns.stripplot(data=s_dists[::-1], alpha=0.1, jitter=0.25)

plt.errorbar(np.arange(10), s_dist_medians[::-1], yerr=s_dist_stds[::-1], linewidth=3, color='red', linestyle='-')

plt.xlim(-0.5, 9.5)
plt.ylim(-8., 4.)

plt.xticks(
    np.arange(10),
    ["-" + str(9-j) if j != 29 else "Distal\nPAS" for j in range(10)], fontsize=12, rotation=45
)

plt.yticks(fontsize=12)

plt.ylabel("APARENT-ResNet Score", fontsize=12)

plt.tight_layout()

plt.show()

f = plt.figure(figsize=(8, 4))

sns.stripplot(data=s_dists[::-1], alpha=0.1, jitter=0.25)
sns.boxplot(data=s_dists[::-1], linewidth=2, fliersize=0.)

plt.xlim(-0.5, 9.5)
plt.ylim(-8., 4.)

plt.xticks(
    np.arange(10),
    ["-" + str(9-j) if j != 9 else "Distal\nPAS" for j in range(10)], fontsize=12, rotation=45
)

plt.yticks(fontsize=12)

plt.ylabel("APARENT-ResNet Score", fontsize=12)

plt.tight_layout()

plt.show()

f = plt.figure(figsize=(8, 4))

plt.errorbar(np.arange(10), s_dist_medians[::-1], yerr=s_dist_stds[::-1], linewidth=3, color='red', linestyle='-')

plt.xlim(-0.5, 9.5)
plt.ylim(-3.5, 1.)

plt.xticks(
    np.arange(10),
    ["-" + str(9-j) if j != 29 else "Distal\nPAS" for j in range(10)], fontsize=12, rotation=45
)

plt.yticks(fontsize=12)

plt.ylabel("APARENT-ResNet Score", fontsize=12)

plt.tight_layout()

plt.show()

In [11]:

#Fit regression / classification models on native measures

def logistic_model_predict(s, m, l, w_pas, w_len, w_bias) :
    
    score_exp = np.exp(w_pas * s + w_len * l + w_bias) * m
    
    return score_exp / np.sum(score_exp, axis=-1)[:, None]

def logistic_model_mse(w_bundle, s, m, l, y_true) :
    
    w_pas = w_bundle[0]
    w_len = w_bundle[1]
    w_bias = w_bundle[2:2+10]
    
    y_pred = logistic_model_predict(s, m, l, w_pas, w_len, w_bias)
    
    y_pred_clip = np.clip(y_pred, 1e-7, 1. - 1e-7)
    y_true_clip = np.clip(y_true, 1e-7, 1. - 1e-7)
    
    kl = np.sum(y_true_clip * np.log(y_true_clip / y_pred_clip), axis=-1)
    
    return np.mean(kl)

chosen_tissue_types = ['NT']

model_spearman_rs = np.zeros(len(chosen_tissue_types))

y_preds = []

for tissue_ix in range(len(chosen_tissue_types)) :
    
    print("Training on tissue = '" + chosen_tissue_types[tissue_ix] + "'")
    
    y_tissue = y[..., cell_type_dict[chosen_tissue_types[tissue_ix]]]
    
    w0 = np.zeros(2+10)

    res = minimize(logistic_model_mse, w0, args=(s, m, l_cumulative, y_tissue), method='BFGS', options={'disp': True})

    w_pas = res.x[0]
    w_len = res.x[1]
    w_bias = res.x[2:2+10]
    
    print(res.x)
    
    y_pred_tissue = logistic_model_predict(s, m, l_cumulative, w_pas, w_len, w_bias)
    
    y_preds.append(y_pred_tissue)
    
    spearman_r_val, _ = spearmanr(y_pred_tissue[np.arange(y_tissue.shape[0]), dist_index], y_tissue[np.arange(y_tissue.shape[0]), dist_index])
    model_spearman_rs[tissue_ix] = spearman_r_val
    
    print("- Spearman r = " + str(round(spearman_r_val, 3)))
    print("- n = " + str(y_tissue.shape[0]))

    f = plt.figure(figsize=(3, 3))

    plt.scatter(y_pred_tissue[np.arange(y_tissue.shape[0]), dist_index], y_tissue[np.arange(y_tissue.shape[0]), dist_index], color='black', s=8, alpha=0.2)

    plt.xlim(0., 1.)
    plt.ylim(0., 1.)

    plt.tight_layout()
    plt.show()

Training on tissue = 'NT'
Optimization terminated successfully.
         Current function value: 0.297444
         Iterations: 105
         Function evaluations: 1498
         Gradient evaluations: 107
[ 0.46941184 -0.35526653 -1.30689904  0.52319977  0.4682442   0.2754141
  0.1836425   0.23573602 -0.11909025  0.01295298  0.67265352 -0.94624276]
- Spearman r = 0.625
- n = 5022

In [12]:

#Fit regression / classification models on native measures (Independent regression weights per PAS)

def logistic_model_predict(s, m, l, w_pas, w_len, w_bias) :
    
    score_exp = np.exp(w_pas * s + w_len * l + w_bias) * m
    
    return score_exp / np.sum(score_exp, axis=-1)[:, None]

def logistic_model_mse(w_bundle, s, m, l, y_true) :
    
    w_pas = w_bundle[0:10]
    w_len = w_bundle[10:20]
    w_bias = w_bundle[20:30]
    
    y_pred = logistic_model_predict(s, m, l, w_pas, w_len, w_bias)
    
    y_pred_clip = np.clip(y_pred, 1e-7, 1. - 1e-7)
    y_true_clip = np.clip(y_true, 1e-7, 1. - 1e-7)
    
    kl = np.sum(y_true_clip * np.log(y_true_clip / y_pred_clip), axis=-1)
    
    return np.mean(kl)

chosen_tissue_types = ['NT']

model_spearman_rs = np.zeros(len(chosen_tissue_types))

y_preds = []

for tissue_ix in range(len(chosen_tissue_types)) :
    
    print("Training on tissue = '" + chosen_tissue_types[tissue_ix] + "'")
    
    y_tissue = y[..., cell_type_dict[chosen_tissue_types[tissue_ix]]]
    
    w0 = np.zeros(30)

    res = minimize(logistic_model_mse, w0, args=(s, m, l_cumulative, y_tissue), method='BFGS', options={'disp': True})

    w_pas = res.x[0:10]
    w_len = res.x[10:20]
    w_bias = res.x[20:30]
    
    print(res.x)
    
    y_pred_tissue = logistic_model_predict(s, m, l_cumulative, w_pas, w_len, w_bias)
    
    y_preds.append(y_pred_tissue)
    
    spearman_r_val, _ = spearmanr(y_pred_tissue[np.arange(y_tissue.shape[0]), dist_index], y_tissue[np.arange(y_tissue.shape[0]), dist_index])
    model_spearman_rs[tissue_ix] = spearman_r_val
    
    print("- Spearman r = " + str(round(spearman_r_val, 3)))
    print("- n = " + str(y_tissue.shape[0]))

    f = plt.figure(figsize=(3, 3))

    plt.scatter(y_pred_tissue[np.arange(y_tissue.shape[0]), dist_index], y_tissue[np.arange(y_tissue.shape[0]), dist_index], color='black', s=8, alpha=0.2)

    plt.xlim(0., 1.)
    plt.ylim(0., 1.)

    plt.tight_layout()
    plt.show()

Training on tissue = 'NT'
Optimization terminated successfully.
         Current function value: 0.292747
         Iterations: 143
         Function evaluations: 4640
         Gradient evaluations: 145
[ 0.38278536  0.56378862  0.55127036  0.50562707  0.48221289  0.6771202
  0.10395366  0.6936327   0.66904288  0.12772881  0.         -0.3646651
 -0.41507464 -0.52916818 -0.3768265  -0.38065306 -0.41928044 -0.40489881
 -0.34176212 -0.50646243 -1.8648517   0.23405808  0.53503883  1.25696534
 -0.06523703  0.04892737 -0.12228334 -0.04533058  0.08163913 -0.05900107]
- Spearman r = 0.632
- n = 5022

In [13]:

#Right-justify feature matrices

s_r = np.zeros(s.shape)
m_r = np.zeros(m.shape)
l_r = np.zeros(l.shape)
l_r = np.zeros(l.shape)
l_cumulative_r = np.zeros(l_cumulative.shape)
l_prox_cumulative_r = np.zeros(l_prox_cumulative.shape)
y_r = np.zeros(y.shape)

dist_index_r = np.copy(dist_index)
dist_index_r[:] = m.shape[1]-1

for i in range(m.shape[0]) :
    
    n_pas = int(np.sum(m[i, :]))
    for j in range(0, m.shape[1]) :
        
        if n_pas-1-j >= 0 and m[i, n_pas-1-j] == 1. :
            s_r[i, m.shape[1]-1-j] = s[i, n_pas-1-j]
            m_r[i, m.shape[1]-1-j] = m[i, n_pas-1-j]
            l_r[i, m.shape[1]-1-j] = l[i, n_pas-1-j]
            l_cumulative_r[i, m.shape[1]-1-j] = l_cumulative[i, n_pas-1-j]
            l_prox_cumulative_r[i, m.shape[1]-1-j] = l_prox_cumulative[i, n_pas-1-j]
            y_r[i, m.shape[1]-1-j] = y[i, n_pas-1-j]

In [14]:

#Fit regression / classification models on native measures (right-justified)

def logistic_model_predict(s, m, l, w_pas, w_len, w_bias) :
    
    score_exp = np.exp(w_pas * s + w_len * l + w_bias) * m
    
    return score_exp / np.sum(score_exp, axis=-1)[:, None]

def logistic_model_mse(w_bundle, s, m, l, y_true) :
    
    w_pas = w_bundle[0]
    w_len = w_bundle[1]
    w_bias = w_bundle[2:2+10]
    
    y_pred = logistic_model_predict(s, m, l, w_pas, w_len, w_bias)
    
    y_pred_clip = np.clip(y_pred, 1e-7, 1. - 1e-7)
    y_true_clip = np.clip(y_true, 1e-7, 1. - 1e-7)
    
    kl = np.sum(y_true_clip * np.log(y_true_clip / y_pred_clip), axis=-1)
    
    return np.mean(kl)

chosen_tissue_types = ['NT']

model_spearman_rs = np.zeros(len(chosen_tissue_types))

y_preds = []

for tissue_ix in range(len(chosen_tissue_types)) :
    
    print("Training on tissue = '" + chosen_tissue_types[tissue_ix] + "'")
    
    y_tissue_r = y_r[..., cell_type_dict[chosen_tissue_types[tissue_ix]]]
    
    w0 = np.zeros(2+10)

    res = minimize(logistic_model_mse, w0, args=(s_r, m_r, l_cumulative_r, y_tissue_r), method='BFGS', options={'disp': True})

    w_pas = res.x[0]
    w_len = res.x[1]
    w_bias = res.x[2:2+10]
    
    print(res.x)
    
    y_pred_tissue_r = logistic_model_predict(s_r, m_r, l_cumulative_r, w_pas, w_len, w_bias)
    
    y_preds.append(y_pred_tissue_r)
    
    spearman_r_val, _ = spearmanr(y_pred_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r], y_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r])
    model_spearman_rs[tissue_ix] = spearman_r_val
    
    print("- Spearman r = " + str(round(spearman_r_val, 3)))
    print("- n = " + str(y_tissue_r.shape[0]))

    f = plt.figure(figsize=(3, 3))

    plt.scatter(y_pred_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r], y_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r], color='black', s=8, alpha=0.2)

    plt.xlim(0., 1.)
    plt.ylim(0., 1.)

    plt.tight_layout()
    plt.show()

Training on tissue = 'NT'
Optimization terminated successfully.
         Current function value: 0.302773
         Iterations: 89
         Function evaluations: 1274
         Gradient evaluations: 91
[ 0.45093615 -0.0673798   2.03546178  0.91747305  0.70005375  0.33132832
 -0.15047316 -0.27811664 -0.52313202 -0.81023784 -1.01919246 -1.20324994]
- Spearman r = 0.622
- n = 5022

In [15]:

#Fit regression / classification models on native measures (right-justified)

def logistic_model_predict(s, m, l, w_pas, w_len, w_bias) :
    
    score_exp = np.exp(w_pas * s + w_len * l + w_bias) * m
    
    return score_exp / np.sum(score_exp, axis=-1)[:, None]

def logistic_model_mse(w_bundle, s, m, l, y_true) :
    
    w_pas = w_bundle[0:10]
    w_len = w_bundle[10:20]
    w_bias = w_bundle[20:30]
    
    y_pred = logistic_model_predict(s, m, l, w_pas, w_len, w_bias)
    
    y_pred_clip = np.clip(y_pred, 1e-7, 1. - 1e-7)
    y_true_clip = np.clip(y_true, 1e-7, 1. - 1e-7)
    
    kl = np.sum(y_true_clip * np.log(y_true_clip / y_pred_clip), axis=-1)
    
    return np.mean(kl)

chosen_tissue_types = ['NT']

model_spearman_rs = np.zeros(len(chosen_tissue_types))

y_preds = []

for tissue_ix in range(len(chosen_tissue_types)) :
    
    print("Training on tissue = '" + chosen_tissue_types[tissue_ix] + "'")
    
    y_tissue_r = y_r[..., cell_type_dict[chosen_tissue_types[tissue_ix]]]
    
    w0 = np.zeros(30)

    res = minimize(logistic_model_mse, w0, args=(s_r, m_r, l_cumulative_r, y_tissue_r), method='BFGS', options={'disp': True})

    w_pas = res.x[0:10]
    w_len = res.x[10:20]
    w_bias = res.x[20:30]
    
    print(res.x)
    
    y_pred_tissue_r = logistic_model_predict(s_r, m_r, l_cumulative_r, w_pas, w_len, w_bias)
    
    y_preds.append(y_pred_tissue_r)
    
    spearman_r_val, _ = spearmanr(y_pred_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r], y_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r])
    model_spearman_rs[tissue_ix] = spearman_r_val
    
    print("- Spearman r = " + str(round(spearman_r_val, 3)))
    print("- n = " + str(y_tissue_r.shape[0]))

    f = plt.figure(figsize=(3, 3))

    plt.scatter(y_pred_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r], y_tissue_r[np.arange(y_tissue_r.shape[0]), dist_index_r], color='black', s=8, alpha=0.2)

    plt.xlim(0., 1.)
    plt.ylim(0., 1.)

    plt.tight_layout()
    plt.show()

Training on tissue = 'NT'
Optimization terminated successfully.
         Current function value: 0.295065
         Iterations: 168
         Function evaluations: 5440
         Gradient evaluations: 170
[-0.54813119  0.26427883  0.30412155  0.15813086  0.28523733  0.33724423
  0.3261071   0.35811057  0.45170473  0.59936153  0.          0.09826074
 -0.01367696 -0.07057042 -0.06002848 -0.06163822 -0.0718277  -0.0539588
 -0.0877641  -0.34386414  0.26174613  0.47367662  0.4353824  -0.02046226
 -0.27616845 -0.28070102 -0.48793646 -0.72895501 -0.61745249  1.24071716]
- Spearman r = 0.635
- n = 5022

In [16]:

y_1_prox = []
y_2_prox = []
for i in range(s.shape[0]) :
    y_1_prox.append(y_1[i, prox_index[i]])
    y_2_prox.append(y_2[i, prox_index[i]])

y_1_prox = np.array(y_1_prox)
y_2_prox = np.array(y_2_prox)

logodds_1_prox = np.log(np.clip(y_1_prox, 1e-6, 1. - 1e-6) / (1. - np.clip(y_1_prox, 1e-6, 1. - 1e-6)))
logodds_2_prox = np.log(np.clip(y_2_prox, 1e-6, 1. - 1e-6) / (1. - np.clip(y_2_prox, 1e-6, 1. - 1e-6)))

y_1_dist = []
y_2_dist = []
for i in range(s.shape[0]) :
    y_1_dist.append(y_1[i, dist_index[i]])
    y_2_dist.append(y_2[i, dist_index[i]])

y_1_dist = np.array(y_1_dist)
y_2_dist = np.array(y_2_dist)

logodds_1_dist = np.log(np.clip(y_1_dist, 1e-6, 1. - 1e-6) / (1. - np.clip(y_1_dist, 1e-6, 1. - 1e-6)))
logodds_2_dist = np.log(np.clip(y_2_dist, 1e-6, 1. - 1e-6) / (1. - np.clip(y_2_dist, 1e-6, 1. - 1e-6)))

In [17]:

keep_index = ((y_1_prox > 0.01) & (y_1_prox < .99))
keep_index = keep_index & ((y_2_prox > 0.01) & (y_2_prox < .99))

s_prox_std = (s_prox - np.mean(s_prox)) / np.std(s_prox)
s_dist_std = (s_dist - np.mean(s_dist)) / np.std(s_dist)

f = plt.figure(figsize=(4, 4))

plt.scatter(y_1_prox[keep_index], y_2_prox[keep_index], cmap='bwr', c=(s_prox_std - s_dist_std)[keep_index], vmin=-2., vmax=2., s=15, alpha=0.5)

plt.xlim(0, 1)
plt.ylim(0, 1)

plt.xticks([0.0, 0.25, 0.5, 0.75, 1.0], fontsize=12)
plt.yticks([0.0, 0.25, 0.5, 0.75, 1.0], fontsize=12)

plt.xlabel(cell_type_1, fontsize=12)
plt.ylabel(cell_type_2, fontsize=12)

plt.title("Measured PPUI", fontsize=12)

plt.tight_layout()

plt.show()

In [18]:

keep_index = ((y_1_dist > 0.01) & (y_1_dist < .99))
keep_index = keep_index & ((y_2_dist > 0.01) & (y_2_dist < .99))

s_prox_std = (s_prox - np.mean(s_prox)) / np.std(s_prox)
s_dist_std = (s_dist - np.mean(s_dist)) / np.std(s_dist)

f = plt.figure(figsize=(4, 4))

plt.scatter(y_1_dist[keep_index], y_2_dist[keep_index], cmap='bwr', c=(s_prox_std - s_dist_std)[keep_index], vmin=-2., vmax=2., s=15, alpha=0.5)

plt.xlim(0, 1)
plt.ylim(0, 1)

plt.xticks([0.0, 0.25, 0.5, 0.75, 1.0], fontsize=12)
plt.yticks([0.0, 0.25, 0.5, 0.75, 1.0], fontsize=12)

plt.xlabel(cell_type_1, fontsize=12)
plt.ylabel(cell_type_2, fontsize=12)

plt.title("Measured PDUI", fontsize=12)

plt.tight_layout()

plt.show()

In [19]:

#Proximal-most vs Distal-most PAS scores

ys_1_dists = []
ys_1_dist_means = []
ys_1_dist_medians = []
ys_1_dist_stds = []
for k in range(0, 10) :
    
    s_d = []
    for i in range(s.shape[0]) :
        if dist_index[i] - k >= 0 and (y_1[i, dist_index[i] - k] > 0. and y_1[i, dist_index[i] - k] < 1.) :
            s_d.append(np.log(y_1[i, dist_index[i] - k] / (1. - y_1[i, dist_index[i] - k])))
    
    ys_1_dists.append(np.array(s_d))
    
    ys_1_dist_means.append(np.mean(ys_1_dists[-1]))
    ys_1_dist_medians.append(np.median(ys_1_dists[-1]))
    ys_1_dist_stds.append(np.std(ys_1_dists[-1]))

ys_1_dist_means = np.array(ys_1_dist_means)
ys_1_dist_medians = np.array(ys_1_dist_medians)
ys_1_dist_stds = np.array(ys_1_dist_stds)

In [20]:

#Proximal-most vs Distal-most PAS scores

ys_2_dists = []
ys_2_dist_means = []
ys_2_dist_medians = []
ys_2_dist_stds = []
for k in range(0, 10) :
    
    s_d = []
    for i in range(s.shape[0]) :
        if dist_index[i] - k >= 0 and (y_2[i, dist_index[i] - k] > 0. and y_2[i, dist_index[i] - k] < 1.) :
            s_d.append(np.log(y_2[i, dist_index[i] - k] / (1. - y_2[i, dist_index[i] - k])))
    
    ys_2_dists.append(np.array(s_d))
    
    ys_2_dist_means.append(np.mean(ys_2_dists[-1]))
    ys_2_dist_medians.append(np.median(ys_2_dists[-1]))
    ys_2_dist_stds.append(np.std(ys_2_dists[-1]))

ys_2_dist_means = np.array(ys_2_dist_means)
ys_2_dist_medians = np.array(ys_2_dist_medians)
ys_2_dist_stds = np.array(ys_2_dist_stds)

In [21]:

f = plt.figure(figsize=(8, 4))

plt.errorbar(np.arange(10), ys_1_dist_medians[::-1], yerr=ys_1_dist_stds[::-1], alpha=0.5, linewidth=3, color='red', linestyle='-')
plt.errorbar(np.arange(10), ys_2_dist_medians[::-1], yerr=ys_2_dist_stds[::-1], alpha=0.5, linewidth=3, color='deepskyblue', linestyle='-')

plt.xlim(-0.5, 9.5)
plt.ylim(-6.0, 2.5)

plt.tight_layout()

plt.show()

In [ ]: