library(DIscBIO) FileName<-"CTCdataset" # Name of the dataset #CSV=TRUE # If the dataset has ".csv", the user shoud set CSV to TRUE CSV=FALSE # If the dataset has ".rda", the user shoud set CSV to FALSE if (CSV==TRUE){ DataSet <- read.csv(file = paste0(FileName,".csv"), sep = ",",header=T) rownames(DataSet)<-DataSet[,1] DataSet<-DataSet[,-1] } else{ load(paste0(FileName,".rda")) DataSet<-get(FileName) } cat(paste0("The ", FileName," contains:","\n","Genes: ",length(DataSet[,1]),"\n","cells: ",length(DataSet[1,]),"\n")) sc<- DISCBIO(DataSet) # The DISCBIO class is the central object storing all information generated throughout the pipeline # Estimating a value for the "mintotal" parameters # As a common practice, mintotal is set to 1000 S1<-summary(colSums(DataSet,na.rm=TRUE)) # It gives an idea about the number of reads across cells print(S1) # Estimating a value for the "minexpr" parameter S2<-summary(rowMeans(DataSet,na.rm=TRUE)) # It gives an idea about the overall expression of the genes print(S2) minexpr= S2[3] # S2[3] is referring to the median whereas S2[4] is referring to the mean # Estimating a value for the "minnumber" parameters minnumber= round(length(DataSet[1,])/10) # To be expressed in at 10% of the cells. print(minnumber) sc<-Normalizedata(sc, mintotal=1000, minexpr=minexpr, minnumber=minnumber, maxexpr=Inf, downsample=FALSE, dsn=1, rseed=17000) sc<-FinalPreprocessing(sc,GeneFlitering="ExpF",export = TRUE) # The GeneFiltering should be set to "ExpF" load("SC.RData") # Loading the "SC" object that has include the data of the k-means clustering load("Ndata.RData") # Loading the "Ndata" object and stored in the @ndata will be used to plot the expression of genes load("expdata.RData") # Loading the "expdata" object and stored in the @expdata will be used to plot the expression of genes sc<-SC # Storing the data of SC in the sc sc@ndata<-Ndata sc@expdata<-expdata ########## Removing the unneeded objects rm(Ndata) rm(expdata) rm(DataSet) rm(SC) #sc<- Clustexp(sc,cln=4,quiet=T,clustnr=6,rseed=17000) plotGap(sc) ### Plotting gap statistics ############ Plotting the clusters plottSNE(sc) sc<-pseudoTimeOrdering(sc,quiet = TRUE, export = FALSE) plotOrderTsne(sc) g='ENSG00000104413' #### Plotting the log expression of ESRP1 plotExptSNE(sc,g) g='ENSG00000251562' #### Plotting the log expression of MALAT1 plotExptSNE(sc,g)