#!/usr/bin/env python
# coding: utf-8

# In[ ]:


import ipywidgets as widgets
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import wot

# input paths
FULL_DS_PATH = 'data/ExprMatrix.h5ad'
CELL_DAYS_PATH = 'data/cell_days.txt'
VAR_DS_PATH = 'data/ExprMatrix.var.genes.h5ad'
TMAP_PATH = 'tmaps/serum'
CELL_SETS_PATH = 'data/major_cell_sets.gmt'
COORDS_PATH = 'data/fle_coords.txt'


# # Notebook 4: Ancestors, Descendants and Trajectories
# 
# In their raw form, the transport maps show the descendants and ancestors of each individual cell. Here we explore several different techniques to analyze the ancestors, descendants and trajectories of *sets* of cells.
# 
# Given a set $C$ of cells at time $t_j$, we can compute the descendant distribution at time $t_{j+1}$ by pushing the cell set through the transport matrix. 
# This push forward operation is implemented in a matrix multiplication as follows. We first form a probability vector $p_{t_j}$ to represent the cell set $C$ as follows:
# 
# $$p_{t_j}(x) = \begin{cases} \frac 1 {|C|} & \quad x \in C \\ 0 & \quad \text{otherwise}.\end{cases}$$
# 
# In the following code block we generate probability vectors for the cell sets IPS, Neural, Trophoblast, and Stromal on day 18. The $\texttt{populations}$ object is a list of these probability vectors for different cell sets. 

# In[ ]:


tmap_model = wot.tmap.TransportMapModel.from_directory(TMAP_PATH)
cell_sets = wot.io.read_sets(CELL_SETS_PATH, as_dict=True)
populations = tmap_model.population_from_cell_sets(cell_sets, at_time=12)


# We can then **push forward** each probability vector by multiplying by the transport map on the right 
# 
# $$p_{t_{j+1}}^T = p_{t_{j}}^T \pi_{t_j,t_{j+1}}.$$
# 
# Multiplying $p_{t_{j+1}}$ by $\pi_{t_{j+1},t_{j+2}}$ would push it forward again to give the descendant distribution at time $t_{j+2}$. 
# Continuing in this way, we can compute the descendant distribution at any later time point $t_{\ell} > t_j$. 
# 
# To compute the **ancestors** of $C$ at an earlier time point $t_i < t_j$, we pull the cell set back through the transport map. 
# This **pull-back** operation is also implemented as a matrix multiplication
# $$p_{t_{j-1}} = \pi_{t_{j-1},t_j} p_{t_j}.$$
# The **trajectory** of a cell set $C$ refers to the sequence of ancestor distributions at earlier time points and descendant distributions at later time points. 
# 
# The method ```trajectories``` takes a list of probability vectors and returns the trajectories containing descendant distributions at later time points and ancestor distributions at earlier time points. 

# In[3]:


trajectory_ds = tmap_model.trajectories(populations)


# We can now visualize trajectories on the force layout embedding coordinates.

# In[4]:


# Load embedding coordinates
coord_df = pd.read_csv(COORDS_PATH, sep='\t', index_col=0)
nbins = 500
xrange = coord_df['x'].min(), coord_df['x'].max()
yrange = coord_df['y'].min(), coord_df['y'].max()
coord_df['x'] = np.floor(
    np.interp(coord_df['x'], [xrange[0], xrange[1]], [0, nbins - 1])).astype(int)
coord_df['y'] = np.floor(
    np.interp(coord_df['y'], [yrange[0], yrange[1]], [0, nbins - 1])).astype(int)
trajectory_ds.obs = trajectory_ds.obs.join(coord_df)


# In[5]:


# Visualize trajectories
trajectory_dropdown = widgets.Dropdown(
    options=trajectory_ds.var.index,
    description='Trajectory:'
)

def update_trajectory_vis(name):
    figure = plt.figure(figsize=(10, 10))
    plt.axis('off')
    plt.tight_layout()
    plt.title(name)
    plt.scatter(coord_df['x'], coord_df['y'], c='#f0f0f0',
                   s=4, marker=',', edgecolors='none', alpha=0.8)
    binned_df = trajectory_ds.obs.copy()
    binned_df['values'] = trajectory_ds[:, name].X
    binned_df = binned_df.groupby(['x', 'y'], as_index=False).sum()
    plt.scatter(binned_df['x'], binned_df['y'], c=binned_df['values'],
                   s=6, marker=',', edgecolors='none', vmax=binned_df['values'].quantile(0.975))
    plt.colorbar().ax.set_title('Trajectory')

widgets.interact(update_trajectory_vis, name=trajectory_dropdown)


# We can also generate a trajectory movie across time

# In[6]:


# from matplotlib import animation
# from IPython.display import Video
# name = 'IPS'
# movie_file_name = '{}_trajectory.mov'.format(name)
# figure = plt.figure(figsize=(10, 10))
# plt.axis('off')

# plt.tight_layout()
# unique_days = trajectory_ds.obs['day'].unique()

# binned_df = trajectory_ds.obs.copy()
# binned_df['values'] = trajectory_ds[:, name].X
# binned_df = binned_df.groupby(['x', 'y'], as_index=False).sum()
# vmax=binned_df['values'].quantile(0.975)

# def animate(i):
#     _trajectory_ds = trajectory_ds[trajectory_ds.obs['day']==unique_days[i]]
#     binned_df = _trajectory_ds.obs.copy()
#     plt.suptitle('Day {}, {:,} cells'.format(unique_days[i], _trajectory_ds.shape[0]), y=0.95)
#     plt.scatter(coord_df['x'], coord_df['y'], c='#f0f0f0', s=4, marker=',', edgecolors='none')
#     binned_df['values'] = _trajectory_ds[:, name].X
#     binned_df = binned_df.groupby(['x', 'y'], as_index=False).sum()
#     plt.scatter(binned_df['x'], binned_df['y'], c=binned_df['values'],
#                 s=6, marker=',', edgecolors='none', vmin=0, vmax=vmax)
 
    
# anim = animation.FuncAnimation(figure, func=animate, frames=range(0, len(unique_days)), init_func=lambda **args:None, repeat=False, interval=400)
# anim.save(movie_file_name)
# plt.close(figure)
# Video(movie_file_name)


# # Expression trends along trajectories
# 
# We now show how to compute trends in expression along trajectories. Mathematically, the trajectory is represented by a probability distribution over cells at each time point. Therefore we can easily compute the average expression according to this probability distribution. 
# 
# We begin by computing expression trends for all genes and saving the `trajectory_trends` to disk. 

# In[7]:


#Load expression data
adata = wot.io.read_dataset(FULL_DS_PATH) 

#Compute trends for all genes
trajectory_trends = wot.tmap.trajectory_trends_from_trajectory(trajectory_ds, adata)

# Save each trajectory in a separate file
for i in range(len(trajectory_trends)):
    wot.io.write_dataset(trajectory_trends[i], trajectory_ds.var.index[i] + '_trends.txt')


# Read in trajectory trends

# In[8]:


trajectory_trend_datasets = []
trajectory_names = []

for i in range(trajectory_ds.shape[1]):
    trajectory_names.append(trajectory_ds.var.index[i]) 
    trajectory_trend_datasets.append(wot.io.read_dataset(trajectory_ds.var.index[i] + '_trends.txt'))


# Visualize trends

# In[9]:


trajectory_dropdown = widgets.SelectMultiple(
    options=trajectory_ds.var.index,
    value=[trajectory_ds.var.index[0]],
    description='Trajectory:'
)

gene_input = widgets.Text(
    placeholder='',
    description='Genes:',
    value='Nanog',
    continuous_update=False
)

def update_trends_vis(selected_trajectories, gene_names):
    gene_names = gene_names.replace(' ', '').split(',')
    
    figure = plt.figure(figsize=(10, 10))
    
    
    for selected_trajectory in selected_trajectories:
        trajectory_index = trajectory_names.index(selected_trajectory)
        mean = trajectory_trend_datasets[trajectory_index]
        mean = mean[:, mean.var.index.str.lower().isin([gene.lower() for gene in gene_names])]
        timepoints = mean.obs.index.values.astype(float)
        mean.obs.index = mean.obs.index.astype('category')
    
        if mean.shape[1] > 0:
            for i in range(mean.shape[1]):  # each gene
                mean_i = mean[:, i].X
                plt.plot(timepoints, mean_i, label=mean.var.index[i] + ', ' + selected_trajectory)
            plt.xlabel("Day")
            plt.ylabel("Expression")
            plt.legend()

widgets.interact(update_trends_vis, selected_trajectories=trajectory_dropdown, gene_names=gene_input)


# # Shared Ancestry

# In[10]:


adata_var = wot.io.read_dataset(VAR_DS_PATH) 


# In[11]:


divergence_df = wot.tmap.trajectory_divergence(adata_var, trajectory_ds, distance_metric='total_variation')


# Plot divergence

# In[12]:


divergence_df['name'] = divergence_df['name1'].str.split('/').str.get(0) + ' vs. ' + divergence_df['name2'].str.split('/').str.get(
        0)
plt.figure(figsize=(10, 10))
plt.xlabel("Day")
plt.ylabel("Distance")
for p, d in divergence_df.groupby('name'):
    plt.plot(d['day2'], d['distance'], '-o', label=p)
plt.legend(loc='best')