8,640 cells from the melanoma WM989 cell line were sequenced using Drop-seq, where 32,287 genes were detected (MELANOMA). In addition, RNA FISH experiment of across 7,000-88,000 cells from the same cell line was conducted and 26 genes were detected (MELANOMA_FISH).
import numpy as np
import pandas as pd
import h5py
Download melanoma RNA-seq data for imputation.
melanoma_rnaseq_path = "E:/DISC/reproducibility/data/MELANOMA/original_data/GSE99330_dropseqHumanDge.txt.gz"
melanoma_rnaseq_pd = pd.read_csv(melanoma_rnaseq_path, sep=" ", compression='gzip', index_col=0, skiprows=1)
melanoma_rnaseq_pd
CTCGCGAGTAGC | CGGAGGCACTCG | GCAAGTCGATAT | GGACAATTTGTA | TGACAATTGACC | TAAGACTTCCCT | GAGGAAGGACTC | GAAACGGACAGA | TCGATTGGAGAA | ATCTAGTCCCCA | ... | AGCCCTGACAAC | ACTCTCGATTCC | GGTCAAATAAGA | ACCTCCCCTATA | ACCTCCCCTACC | CCATTTTTTCCT | TAAAGCGTGTAC | GATCAGAAGGTA | AGCGAGACGATG | ATTCTTGTGTAC | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
A1BG | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
A1BG-AS1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
A1CF | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
A2M | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
A2M-AS1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
A2ML1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
A2ML1-AS1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
A2MP1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
A3GALT2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
A4GALT | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AAAS | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AACS | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AADACL3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AADAT | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AAED1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AAGAB | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AAK1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
AAMDC | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 |
AAMP | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AANAT | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AAR2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
AARD | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AARS | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
AARS2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 |
AARSD1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AASDH | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AASDHPPT | 0 | 0 | 1 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | ... | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AASS | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AATF | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
AATK | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
ZSCAN5A | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZSCAN9 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZSWIM1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZSWIM3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZSWIM4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZSWIM5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZSWIM6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZSWIM7 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 2 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZSWIM8 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZUFSP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZW10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZWILCH | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
ZWINT | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZXDA | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZXDB | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZXDC | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZYG11AP1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZYG11B | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZYX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ZZEF1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
ZZZ3 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 1 |
bP-21201H5.1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
bP-21264C1.2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
bP-2171C21.3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
bP-2171C21.4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
bP-2171C21.6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
bP-2189O9.2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
bP-2189O9.3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
chr22-38_28785274-29006793.1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
yR211F11.2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
32287 rows × 8640 columns
We download melanoma FISH data for validation.
melanoma_fish_path = "E:/DISC/reproducibility/data/MELANOMA/original_data/fishSubset.txt"
melanoma_fish_pd = pd.read_csv(melanoma_fish_path, sep=" ", index_col=0).T
melanoma_fish_pd
fish1.1 | fish1.2 | fish1.3 | fish1.4 | fish1.5 | fish1.6 | fish1.8 | fish1.9 | fish1.10 | fish1.11 | ... | fish7.21305 | fish7.21306 | fish7.21307 | fish7.21308 | fish7.21309 | fish7.21310 | fish7.21311 | fish7.21312 | fish7.21313 | fish7.21314 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
EGFR | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 2.0 | 0.0 | ... | 2.0 | 0.0 | 0.0 | 0.0 | 1.0 | 4.0 | 0.0 | 0.0 | 0.0 | 0.0 |
SOX10 | 191.0 | 115.0 | 86.0 | 40.0 | 74.0 | 97.0 | 151.0 | 98.0 | 97.0 | 167.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
CCNA2 | 2.0 | 2.0 | 7.0 | 4.0 | 4.0 | 5.0 | 10.0 | 7.0 | 14.0 | 15.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
GAPDH | 270.0 | 241.0 | 192.0 | 87.0 | 149.0 | 165.0 | 267.0 | 199.0 | 292.0 | 225.0 | ... | 183.0 | 306.0 | 142.0 | 114.0 | 462.0 | 1031.0 | 60.0 | 51.0 | 105.0 | 41.0 |
WNT5A | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
PDGFRB | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
PDGFC | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
SERPINE1 | 21.0 | 7.0 | 4.0 | 0.0 | 2.0 | 6.0 | 16.0 | 2.0 | 7.0 | 12.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
NGFR | 4.0 | 8.0 | 4.0 | 2.0 | 1.0 | 4.0 | 5.0 | 1.0 | 2.0 | 3.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
NRG1 | 1.0 | 5.0 | 0.0 | 1.0 | 2.0 | 1.0 | 11.0 | 3.0 | 2.0 | 6.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
FOSL1 | 3.0 | 3.0 | 3.0 | 0.0 | 0.0 | 1.0 | 0.0 | 3.0 | 0.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
VEGFC | 11.0 | 19.0 | 2.0 | 3.0 | 4.0 | 6.0 | 4.0 | 4.0 | 2.0 | 2.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
AXL | 3.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
MITF | 34.0 | 20.0 | 10.0 | 8.0 | 12.0 | 14.0 | 40.0 | 31.0 | 30.0 | 14.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
LOXL2 | 4.0 | 43.0 | 8.0 | 0.0 | 1.0 | 7.0 | 28.0 | 7.0 | 8.0 | 17.0 | ... | 38.0 | 0.0 | 1.0 | 2.0 | 11.0 | 14.0 | 1.0 | 7.0 | 7.0 | 1.0 |
RUNX2 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
FGFR1 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
JUN | 0.0 | 4.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
VGF | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
BABAM1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
KDM5A | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
LMNA | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
KDM5B | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
C1S | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
VCL | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
TXNRD1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 40.0 | 68.0 | 19.0 | 12.0 | 42.0 | 154.0 | 9.0 | 4.0 | 5.0 | 24.0 |
26 rows × 88040 columns
We fill missing values with -1 as loom not support np.nan dtype.
melanoma_fish_pd = melanoma_fish_pd.fillna(-1)
melanoma_fish_pd
fish1.1 | fish1.2 | fish1.3 | fish1.4 | fish1.5 | fish1.6 | fish1.8 | fish1.9 | fish1.10 | fish1.11 | ... | fish7.21305 | fish7.21306 | fish7.21307 | fish7.21308 | fish7.21309 | fish7.21310 | fish7.21311 | fish7.21312 | fish7.21313 | fish7.21314 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
EGFR | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 2.0 | 0.0 | ... | 2.0 | 0.0 | 0.0 | 0.0 | 1.0 | 4.0 | 0.0 | 0.0 | 0.0 | 0.0 |
SOX10 | 191.0 | 115.0 | 86.0 | 40.0 | 74.0 | 97.0 | 151.0 | 98.0 | 97.0 | 167.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
CCNA2 | 2.0 | 2.0 | 7.0 | 4.0 | 4.0 | 5.0 | 10.0 | 7.0 | 14.0 | 15.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
GAPDH | 270.0 | 241.0 | 192.0 | 87.0 | 149.0 | 165.0 | 267.0 | 199.0 | 292.0 | 225.0 | ... | 183.0 | 306.0 | 142.0 | 114.0 | 462.0 | 1031.0 | 60.0 | 51.0 | 105.0 | 41.0 |
WNT5A | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
PDGFRB | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
PDGFC | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
SERPINE1 | 21.0 | 7.0 | 4.0 | 0.0 | 2.0 | 6.0 | 16.0 | 2.0 | 7.0 | 12.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
NGFR | 4.0 | 8.0 | 4.0 | 2.0 | 1.0 | 4.0 | 5.0 | 1.0 | 2.0 | 3.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
NRG1 | 1.0 | 5.0 | 0.0 | 1.0 | 2.0 | 1.0 | 11.0 | 3.0 | 2.0 | 6.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
FOSL1 | 3.0 | 3.0 | 3.0 | 0.0 | 0.0 | 1.0 | 0.0 | 3.0 | 0.0 | 0.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
VEGFC | 11.0 | 19.0 | 2.0 | 3.0 | 4.0 | 6.0 | 4.0 | 4.0 | 2.0 | 2.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
AXL | 3.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
MITF | 34.0 | 20.0 | 10.0 | 8.0 | 12.0 | 14.0 | 40.0 | 31.0 | 30.0 | 14.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
LOXL2 | 4.0 | 43.0 | 8.0 | 0.0 | 1.0 | 7.0 | 28.0 | 7.0 | 8.0 | 17.0 | ... | 38.0 | 0.0 | 1.0 | 2.0 | 11.0 | 14.0 | 1.0 | 7.0 | 7.0 | 1.0 |
RUNX2 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
FGFR1 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
JUN | 0.0 | 4.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
VGF | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
BABAM1 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
KDM5A | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
LMNA | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
KDM5B | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
C1S | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
VCL | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | ... | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 |
TXNRD1 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | ... | 40.0 | 68.0 | 19.0 | 12.0 | 42.0 | 154.0 | 9.0 | 4.0 | 5.0 | 24.0 |
26 rows × 88040 columns
DISC
uses loom as its I/O format so we save these data as loom-formatted files.
with h5py.File("E:/DISC/reproducibility/data/MELANOMA/original.loom", "w") as out_f:
out_f.create_group("row_graphs")
out_f.create_group("col_graphs")
out_f.create_group("layers")
out_f["row_attrs/Gene"] = melanoma_rnaseq_pd.index.values.astype(np.string_)
out_f["col_attrs/CellID"] = melanoma_rnaseq_pd.columns.values.astype(np.string_)
out_f.create_dataset("matrix", shape=melanoma_rnaseq_pd.shape,
chunks=(melanoma_rnaseq_pd.shape[0], 1), dtype=np.float32, fletcher32=False,
compression="gzip", shuffle=False, compression_opts=2)
out_f["matrix"][...] = melanoma_rnaseq_pd.values
with h5py.File("E:/DISC/reproducibility/data/MELANOMA/fish.loom", "w") as out_f:
out_f.create_group("row_graphs")
out_f.create_group("col_graphs")
out_f.create_group("layers")
out_f["row_attrs/Gene"] = melanoma_fish_pd.index.values.astype(np.string_)
out_f["col_attrs/CellID"] = melanoma_fish_pd.columns.values.astype(np.string_)
out_f.create_dataset("matrix", shape=melanoma_fish_pd.shape,
chunks=(melanoma_fish_pd.shape[0], 1), dtype=np.float32, fletcher32=False,
compression="gzip", shuffle=False, compression_opts=2)
out_f["matrix"][...] = melanoma_fish_pd.values
We remove cells with library size less than 500 or greater than 20,000 for RNA-seq data as SAVER. does.
with h5py.File("E:/DISC/reproducibility/data/MELANOMA/raw.loom", "w") as out_f:
with h5py.File("E:/DISC/reproducibility/data/MELANOMA/original.loom", "r", libver='latest', swmr=True) as f:
gene_bc_mat = f["matrix"][...]
gene_name = f["row_attrs/Gene"][...]
cell_id = f["col_attrs/CellID"][...]
out_f.create_group("row_graphs")
out_f.create_group("col_graphs")
out_f.create_group("layers")
out_f["row_attrs/Gene"] = gene_name
cell_filter = np.bitwise_and(gene_bc_mat.sum(0) >= 500, gene_bc_mat.sum(0) <= 20000)
out_f["col_attrs/CellID"] = cell_id[cell_filter]
gene_bc_filt = gene_bc_mat[:, cell_filter]
out_f.create_dataset("matrix", shape=gene_bc_filt.shape,
chunks=(gene_bc_filt.shape[0], 1), dtype=np.float32, fletcher32=False,
compression="gzip", shuffle=False, compression_opts=2)
out_f["matrix"][...] = gene_bc_filt
We will use raw.loom
(RNA-seq) for imputation and fish.loom
(FISH) for evaluation.
Reference: