import pandas as pd
import numpy as np
import scanpy as sc
import os
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.metrics.cluster import adjusted_mutual_info_score
from sklearn.metrics.cluster import homogeneity_score
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
metadata = pd.read_csv('../input/metadata.tsv',sep='\t',index_col=0)
num_clusters = len(np.unique(metadata['label']))
print(num_clusters)
10
df_metrics = pd.DataFrame(columns=['ARI','AMI','Homogeneity'])
df_clusters = pd.DataFrame(index=metadata.index)
for dirpath, dirnames, filenames in os.walk("./"):
for filename in [f for f in filenames if(f.endswith(".tsv") and f.startswith("clustering"))]:
print(os.path.join(dirpath, filename))
df = pd.read_csv(os.path.join(dirpath, filename),sep='\t',index_col=0)
df_clusters = pd.merge(df_clusters, df, left_index=True, right_index=True)
./SnapATAC/clusteringSolution.tsv ./Cusanovich2018/clusteringSolution.tsv ./scABC/clusteringSolution.tsv ./cisTopic/clusteringSolution.tsv ./Scasat/clusteringSolution.tsv ./Cicero/clusteringSolution.tsv
df_clusters.head()
SnapATAC | cusanovich2018 | scABC | cisTopic | Scasat | Cicero | |
---|---|---|---|---|---|---|
BM1077-CLP-Frozen-160106-13 | 4 | 6 | 6 | 1 | 1 | 4 |
BM1077-CLP-Frozen-160106-14 | 4 | 6 | 6 | 1 | 1 | 9 |
BM1077-CLP-Frozen-160106-2 | 4 | 6 | 6 | 1 | 1 | 3 |
BM1077-CLP-Frozen-160106-21 | 4 | 6 | 6 | 1 | 1 | 4 |
BM1077-CLP-Frozen-160106-27 | 1 | 6 | 9 | 2 | 1 | 4 |
for method in df_clusters.columns:
print(method)
#adjusted rank index
ari = adjusted_rand_score(metadata['label'], df_clusters[method])
#adjusted mutual information
ami = adjusted_mutual_info_score(metadata['label'], df_clusters[method],average_method='arithmetic')
#homogeneity
homo = homogeneity_score(metadata['label'], df_clusters[method])
df_metrics.loc[method,'ARI'] = ari
df_metrics.loc[method,'AMI'] = ami
df_metrics.loc[method,'Homogeneity'] = homo
SnapATAC cusanovich2018 scABC cisTopic Scasat Cicero
df_metrics
ARI | AMI | Homogeneity | |
---|---|---|---|
SnapATAC | 0.323942 | 0.587034 | 0.559376 |
cusanovich2018 | 0.48362 | 0.662329 | 0.68703 |
scABC | 0.270214 | 0.464873 | 0.446248 |
cisTopic | 0.51701 | 0.661236 | 0.682697 |
Scasat | 0.111576 | 0.324815 | 0.328444 |
Cicero | 0.22272 | 0.349726 | 0.352056 |
df_metrics.to_csv('./clustering_scores.csv')