Mouse somatosensory cortex of CD-1 mice at age of p28 and p29 were profiled by 10X where 7,477 cells were detected (SSCORTEX). In addition, osmFISH experiment of 4,839 cells from somatosensory cortex, hippocampus and ventricle of a CD-1 mouse at age of p22 was conducted and 33 genes were detected (SSCORTEX_FISH).
import numpy as np
import collections
import h5py
6 samples from Mouse L2 cortex tissue were pooled, 2 of them were from somatosensory cortex tissue. These 6 samples were saved in http://loom.linnarssonlab.org/clone/Mousebrain.org.level1/L1_Cortex2.loom.
This dataset has duplicated gene names, we ran a python script in terminal as https://github.com/iyhaoo/DISC/blob/master/reproducibility/data_preparation_and_imputation/run_imputation.md first.
loom_path = "E:/DISC/reproducibility/data/SSCORTEX/original_data/l1_cortex2_unique_rename.loom"
with h5py.File(loom_path, "r", libver='latest', swmr=True) as f:
gene_name = f["row_attrs/Gene"][...]
cell_id = f["col_attrs/CellID"][...]
sample_id = f["col_attrs/SampleID"][...]
tissue = f["col_attrs/Tissue"][...]
gene_bc_mat = f["matrix"][...]
We extract cells from somatosensory cortex tissue.
print(collections.Counter(tissue))
print(collections.Counter(sample_id))
used_cell = np.isin(tissue.astype(np.str), "SScortex")
gene_bc_sscortex = gene_bc_mat[:, used_cell]
cell_id_sscortex = cell_id[used_cell]
sample_id_sscortex = sample_id[used_cell]
express_cells_sscortex = (gene_bc_sscortex > 0).sum(0)
Counter({b'Ctx2': 10978, b'SScortex': 7477, b'Ctx1.5': 2356}) Counter({b'10X02_1': 3971, b'10X19_2': 3874, b'10X01_1': 3506, b'10X35_1': 3229, b'10X35_2': 2783, b'10X38_3': 2356, b'10X36_3': 1092})
We then save the original data and remove cell based on expressed gene number. The Level 1 data were filtered cells based on clustering.
output_path = "E:/DISC/reproducibility/data/SSCORTEX/original.loom"
with h5py.File(output_path, "w") as f:
f.create_group("row_graphs")
f.create_group("col_graphs")
f.create_group("layers")
f["row_attrs/Gene"] = gene_name
f["col_attrs/CellID"] = cell_id_sscortex
f["col_attrs/SampleID"] = sample_id_sscortex
f.create_dataset("matrix", shape=gene_bc_sscortex.shape,
chunks=(gene_name.size, 1), dtype=np.float32, fletcher32=False,
compression="gzip", shuffle=False, compression_opts=2)
f["matrix"][...] = gene_bc_sscortex
print(output_path, gene_bc_sscortex.shape)
E:/DISC/reproducibility/data/SSCORTEX/original.loom (27998, 7477)
Here we additionally remove a little more cells based on expressed gene number for better data quality.
cell_filter = np.bitwise_and(express_cells_sscortex >= 500, express_cells_sscortex <= 5000)
gene_bc_sscortex_filt = gene_bc_sscortex[:, cell_filter]
cell_id_sscortex_filt = cell_id_sscortex[cell_filter]
sample_id_sscortex_filt = sample_id_sscortex[cell_filter]
output_path = "E:/DISC/reproducibility/data/SSCORTEX/raw.loom"
with h5py.File(output_path, "w") as f:
f.create_group("row_graphs")
f.create_group("col_graphs")
f.create_group("layers")
f["row_attrs/Gene"] = gene_name
f["col_attrs/CellID"] = cell_id_sscortex_filt
f["col_attrs/SampleID"] = sample_id_sscortex_filt
f.create_dataset("matrix", shape=gene_bc_sscortex_filt.shape,
chunks=(gene_name.size, 1), dtype=np.float32, fletcher32=False,
compression="gzip", shuffle=False, compression_opts=2)
f["matrix"][...] = gene_bc_sscortex_filt
print(output_path, gene_bc_sscortex_filt.shape)
E:/DISC/reproducibility/data/SSCORTEX/raw.loom (27998, 7416)
We then extract somatosensory cortex cells from osmFISH data, which can be downloaded directly from http://linnarssonlab.org/osmFISH/osmFISH_SScortex_mouse_all_cells.loom
loom_path = "E:/DISC/reproducibility/data/SSCORTEX/original_data/osmFISH_SScortex_mouse_all_cells.loom"
with h5py.File(loom_path, "r", libver='latest', swmr=True) as f:
gene_name = f["row_attrs/Gene"][...]
cell_id = f["col_attrs/CellID"][...]
region = f["col_attrs/Region"][...].astype(np.str)
gene_bc_mat = f["matrix"][...]
used_cell = np.bitwise_and(np.bitwise_and(region != "Hippocampus", region != "Ventricle"), region != "Excluded")
gene_bc_sscortex = gene_bc_mat[:, used_cell]
cell_id_sscortex = cell_id[used_cell]
output_path = "E:/DISC/reproducibility/data/SSCORTEX/fish.loom"
with h5py.File(output_path, "w") as f:
f.create_group("row_graphs")
f.create_group("col_graphs")
f.create_group("layers")
f["row_attrs/Gene"] = gene_name
f["col_attrs/CellID"] = cell_id_sscortex
f.create_dataset("matrix", shape=gene_bc_sscortex.shape,
chunks=(gene_name.size, 1), dtype=np.float32, fletcher32=False,
compression="gzip", shuffle=False, compression_opts=2)
f["matrix"][...] = gene_bc_sscortex
print(output_path, gene_bc_sscortex.shape)
E:/DISC/reproducibility/data/SSCORTEX/fish.loom (33, 4388)
Reference: