import numpy as np
import scipy.io
import pandas as pd
import loompy
import rpy2.robjects as robjects
saveRDS = robjects.r["saveRDS"]
from rpy2.robjects import pandas2ri
pandas2ri.activate()
data_path = "E:/DISC/reproducibility/data/BRAIN_SPLiT/original_data/GSM3017261_150000_CNS_nuclei.mat"
data = scipy.io.loadmat(data_path)
gene_bc_sparse = data["DGE"].transpose()
gene_name = pd.Series(data['genes']).str.strip(' ').values
sample_type = pd.Series(data['sample_type']).str.strip(' ').values
barcode_str = data["barcodes"].squeeze().astype(np.str)
cell_id = pd.Series(np.repeat("Cell", barcode_str.size)).str.cat(barcode_str, sep='_').values
saveRDS(pd.Series(pd.Series(data['cluster_assignment']).str.strip(' ').values, index=cell_id), "E:/DISC/reproducibility/data/BRAIN_SPLiT/cell_type.rds")
output_path = "E:/DISC/reproducibility/data/BRAIN_SPLiT/raw.loom"
row_attrs = {"Gene": gene_name}
col_attrs = {"CellID": cell_id, "SampleID": sample_type}
loompy.create(output_path, gene_bc_sparse, row_attrs, col_attrs)
print(output_path)
print(gene_bc_sparse.shape)
E:/DISC/reproducibility/data/BRAIN_SPLiT/raw.loom (26894, 156049)
Reference: