import os, numpy, warnings
import pandas as pd
os.environ['R_HOME'] = '/home/gdpoore/anaconda3/envs/tcgaAnalysisPythonR/lib/R'
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'retina'
%reload_ext rpy2.ipython
%%R
require(ggplot2)
require(snm)
require(limma)
require(edgeR)
require(dplyr)
require(edgeR)
require(pvca)
require(lme4)
require(doMC)
numCores <- detectCores()
registerDoMC(cores=numCores)
%%R
load("validationSplitVbDataAndMetadata.RData")
%%R
print(dim(split1MetadataQC))
print(dim(split2MetadataQC))
print(dim(split1VbDataQC))
print(dim(split2VbDataQC))
[1] 8814 42 [1] 8811 42 [1] 8814 1993 [1] 8811 1993
%%R
qcMetadataS1 <- split1MetadataQC # metadataSamplesAllQCAML
qcDataS1 <- split1VbDataQC # vbDataBarnDFReconciledQCAML
# Set up design matrix
covDesignNormS1 <- model.matrix(~0 + sample_type +
data_submitting_center_label +
platform +
experimental_strategy +
tissue_source_site_label +
portion_is_ffpe,
data = qcMetadataS1)
print(colnames(covDesignNormS1))
colnames(covDesignNormS1) <- gsub('([[:punct:]])|\\s+','',colnames(covDesignNormS1))
print(colnames(covDesignNormS1))
# Set up counts matrix
countsS1 <- t(qcDataS1) # DGEList object from a table of counts (rows=features, columns=samples)
# Normalize using edgeR and then plug into voom
dgeS1 <- DGEList(counts = countsS1)
keep <- filterByExpr(dgeS1, covDesignNormS1)
dgeS1 <- dgeS1[keep,,keep.lib.sizes=FALSE]
print("Now normalizing data...")
dgeS1 <- calcNormFactors(dgeS1, method = "TMM")
print("Now applying voom on normalized data...")
vdgeS1 <- voom(dgeS1, design = covDesignNormS1, plot = TRUE, save.plot = TRUE, normalize.method="none")
[1] "sample_typeSolid Tissue Normal" [2] "sample_typeAdditional - New Primary" [3] "sample_typeAdditional Metastatic" [4] "sample_typeBlood Derived Normal" [5] "sample_typeMetastatic" [6] "sample_typePrimary Tumor" [7] "sample_typeRecurrent Tumor" [8] "data_submitting_center_labelBroad Institute of MIT and Harvard" [9] "data_submitting_center_labelCanada's Michael Smith Genome Sciences Centre" [10] "data_submitting_center_labelHarvard Medical School" [11] "data_submitting_center_labelMD Anderson - Institute for Applied Cancer Science" [12] "data_submitting_center_labelMD Anderson - RPPA Core Facility (Proteomics)" [13] "data_submitting_center_labelUniversity of North Carolina" [14] "data_submitting_center_labelWashington University School of Medicine" [15] "platformHiSeq X Ten" [16] "platformIllumina GA" [17] "platformIllumina HiSeq" [18] "platformIllumina MiSeq" [19] "platformLS 454" [20] "experimental_strategyWGS" [21] "tissue_source_site_labelABS - Lahey Clinic" [22] "tissue_source_site_labelABS - Research Metrics Pakistan" [23] "tissue_source_site_labelABS IUPUI" [24] "tissue_source_site_labelAlbert Einstein Medical Center" [25] "tissue_source_site_labelAlberta Health Services" [26] "tissue_source_site_labelAsbestos Diseases Research Institute" [27] "tissue_source_site_labelAsterand" [28] "tissue_source_site_labelBarretos Cancer Hospital" [29] "tissue_source_site_labelBaylor College of Medicine" [30] "tissue_source_site_labelBC Cancer Agency" [31] "tissue_source_site_labelBLN - Baylor" [32] "tissue_source_site_labelBLN - Cleveland Clinic" [33] "tissue_source_site_labelBLN - University Of Chicago" [34] "tissue_source_site_labelBLN - University of Miami" [35] "tissue_source_site_labelBLN - UT Southwestern Medical Center at Dallas" [36] "tissue_source_site_labelBLN Baylor" [37] "tissue_source_site_labelBLN UT Southwestern Medical Center at Dallas" [38] "tissue_source_site_labelBoston Medical Center" [39] "tissue_source_site_labelBrigham and Women's Hospital" [40] "tissue_source_site_labelBrigham and Women's Hospital Division of Thoracic Surgery" [41] "tissue_source_site_labelBritish Columbia Cancer Agency" [42] "tissue_source_site_labelCandler" [43] "tissue_source_site_labelCase Western" [44] "tissue_source_site_labelCase Western - St Joes" [45] "tissue_source_site_labelCedars Sinai" [46] "tissue_source_site_labelCHI-Penrose Colorado" [47] "tissue_source_site_labelChristiana Care" [48] "tissue_source_site_labelChristiana Healthcare" [49] "tissue_source_site_labelCleveland Clinic" [50] "tissue_source_site_labelCleveland Clinic Foundation" [51] "tissue_source_site_labelColumbia University" [52] "tissue_source_site_labelCornell Medical College" [53] "tissue_source_site_labelCureline" [54] "tissue_source_site_labelDept of Neurosurgery at University of Heidelberg" [55] "tissue_source_site_labelDuke" [56] "tissue_source_site_labelDuke University" [57] "tissue_source_site_labelEmory University" [58] "tissue_source_site_labelEmory University - Winship Cancer Inst." [59] "tissue_source_site_labelErasmus MC" [60] "tissue_source_site_labelEssen" [61] "tissue_source_site_labelFondazione-Besta" [62] "tissue_source_site_labelFox Chase" [63] "tissue_source_site_labelFox Chase Cancer Center" [64] "tissue_source_site_labelFundacio Clinic per a la Recerca Biomedica" [65] "tissue_source_site_labelGarvan Institute of Medical Research" [66] "tissue_source_site_labelGlobal BioClinical - Georgia" [67] "tissue_source_site_labelGlobal Bioclinical-Moldova" [68] "tissue_source_site_labelGreater Poland Cancer Center" [69] "tissue_source_site_labelGreenville Health System" [70] "tissue_source_site_labelGreenville Health Systems" [71] "tissue_source_site_labelGundersen Lutheran" [72] "tissue_source_site_labelGundersen Lutheran Health System" [73] "tissue_source_site_labelGynecologic Oncology Group" [74] "tissue_source_site_labelHartford" [75] "tissue_source_site_labelHartford Hospital" [76] "tissue_source_site_labelHarvard" [77] "tissue_source_site_labelHarvard Beth Israel" [78] "tissue_source_site_labelHenry Ford Hospital" [79] "tissue_source_site_labelHoly Cross" [80] "tissue_source_site_labelHospital Louis Pradel" [81] "tissue_source_site_labelHuntsman Cancer Institute" [82] "tissue_source_site_labelIDI-IRCCS" [83] "tissue_source_site_labelILSbio" [84] "tissue_source_site_labelILSBIO" [85] "tissue_source_site_labelImperial College" [86] "tissue_source_site_labelIndivumed" [87] "tissue_source_site_labelInstitut Curie" [88] "tissue_source_site_labelInstitute for Medical Research" [89] "tissue_source_site_labelInstitute of Human Virology Nigeria" [90] "tissue_source_site_labelInternational Genomics Conosrtium" [91] "tissue_source_site_labelInternational Genomics Consortium" [92] "tissue_source_site_labelJohn Wayne Cancer Center" [93] "tissue_source_site_labelJohns Hopkins" [94] "tissue_source_site_labelMaine Medical Center" [95] "tissue_source_site_labelMary Bird Perkins Cancer Center - Our Lady of the Lake" [96] "tissue_source_site_labelMayo" [97] "tissue_source_site_labelMayo Clinic" [98] "tissue_source_site_labelMayo Clinic - Rochester" [99] "tissue_source_site_labelMayo Clinic Arizona" [100] "tissue_source_site_labelMayo Clinic Rochester" [101] "tissue_source_site_labelMD Anderson" [102] "tissue_source_site_labelMD Anderson Cancer Center" [103] "tissue_source_site_labelMedical College of Georgia" [104] "tissue_source_site_labelMedical College of Wisconsin" [105] "tissue_source_site_labelMelbourne Health" [106] "tissue_source_site_labelMemorial Sloan Kettering" [107] "tissue_source_site_labelMemorial Sloan Kettering Cancer Center" [108] "tissue_source_site_labelMichigan University" [109] "tissue_source_site_labelMilan - Italy, Fondazione IRCCS Instituto Neuroligico C. Besta" [110] "tissue_source_site_labelMoffitt Cancer Center" [111] "tissue_source_site_labelMolecular Response" [112] "tissue_source_site_labelMontefiore Medical Center" [113] "tissue_source_site_labelMount Sinai School of Medicine" [114] "tissue_source_site_labelMSKCC" [115] "tissue_source_site_labelNational Cancer Center Korea" [116] "tissue_source_site_labelNational Institutes of Health" [117] "tissue_source_site_labelNCI HRE Branch" [118] "tissue_source_site_labelNCI Urologic Oncology Branch" [119] "tissue_source_site_labelNorthwestern University" [120] "tissue_source_site_labelNot available" [121] "tissue_source_site_labelOhio State University" [122] "tissue_source_site_labelOntario Institute for Cancer Research" [123] "tissue_source_site_labelOntario Institute for Cancer Research (OICR)" [124] "tissue_source_site_labelPapworth Hospital" [125] "tissue_source_site_labelPeter MacCallum Cancer Center" [126] "tissue_source_site_labelPrince Charles Hospital" [127] "tissue_source_site_labelPrincess Margaret Hospital (Canada)" [128] "tissue_source_site_labelPROCURE Biobank" [129] "tissue_source_site_labelProteogenex, Inc" [130] "tissue_source_site_labelProvidence Portland Medical Center" [131] "tissue_source_site_labelRegina Elena National Cancer Institute" [132] "tissue_source_site_labelRoswell" [133] "tissue_source_site_labelRoswell Park" [134] "tissue_source_site_labelSaint Mary's Health Care" [135] "tissue_source_site_labelSapienza University of Rome" [136] "tissue_source_site_labelSingHealth" [137] "tissue_source_site_labelSpectrum Health" [138] "tissue_source_site_labelSt Joseph's Medical Center (MD)" [139] "tissue_source_site_labelSt. Joseph's Hospital (AZ)" [140] "tissue_source_site_labelSt. Joseph's Hospital Arizona" [141] "tissue_source_site_labelSt. University of Colorado Denver" [142] "tissue_source_site_labelStanford University" [143] "tissue_source_site_labelTayside Tissue Bank" [144] "tissue_source_site_labelTechnical University of Munich" [145] "tissue_source_site_labelThe Ohio State University" [146] "tissue_source_site_labelThe University of New South Wales" [147] "tissue_source_site_labelThomas Jefferson University" [148] "tissue_source_site_labelThoraxklinik" [149] "tissue_source_site_labelThoraxklinik at University Hospital Heidelberg" [150] "tissue_source_site_labelToronto Western Hospital" [151] "tissue_source_site_labelTranslational Genomics Research Institute" [152] "tissue_source_site_labelTufts Medical Center" [153] "tissue_source_site_labelUCSF" [154] "tissue_source_site_labelUNC" [155] "tissue_source_site_labelUniversity Health Network" [156] "tissue_source_site_labelUniversity Health Network, Toronto" [157] "tissue_source_site_labelUniversity Hospital Erlangen" [158] "tissue_source_site_labelUniversity Hospital Motol" [159] "tissue_source_site_labelUniversity Medical Center Hamburg-Eppendorf" [160] "tissue_source_site_labelUniversity of Alabama" [161] "tissue_source_site_labelUniversity of Arizona" [162] "tissue_source_site_labelUniversity of California San Francisco" [163] "tissue_source_site_labelUniversity of California, Davis" [164] "tissue_source_site_labelUniversity of Chicago" [165] "tissue_source_site_labelUniversity of Colorado Denver" [166] "tissue_source_site_labelUniversity of Florida" [167] "tissue_source_site_labelUniversity of Hawaii" [168] "tissue_source_site_labelUniversity of Iowa" [169] "tissue_source_site_labelUniversity of Kansas" [170] "tissue_source_site_labelUniversity of Kansas Medical Center" [171] "tissue_source_site_labelUniversity of Liverpool" [172] "tissue_source_site_labelUniversity of Miami" [173] "tissue_source_site_labelUniversity of Michigan" [174] "tissue_source_site_labelUniversity Of Michigan" [175] "tissue_source_site_labelUniversity of Minnesota" [176] "tissue_source_site_labelUniversity of Nebraska Medical Center (UNMC)" [177] "tissue_source_site_labelUniversity of New Mexico" [178] "tissue_source_site_labelUniversity of North Carolina" [179] "tissue_source_site_labelUniversity of Oklahoma HSC" [180] "tissue_source_site_labelUniversity of Pennsylvania" [181] "tissue_source_site_labelUniversity of Pittsburgh" [182] "tissue_source_site_labelUniversity of Puerto Rico" [183] "tissue_source_site_labelUniversity of Sao Paulo" [184] "tissue_source_site_labelUniversity of Schleswig-Holstein" [185] "tissue_source_site_labelUniversity of Sheffield" [186] "tissue_source_site_labelUniversity of Southern California" [187] "tissue_source_site_labelUniversity of Sydney" [188] "tissue_source_site_labelUniversity of Texas MD Anderson Cancer Center" [189] "tissue_source_site_labelUniversity of Ulm" [190] "tissue_source_site_labelUniversity of Utah" [191] "tissue_source_site_labelUniversity of Washington" [192] "tissue_source_site_labelValley Hospital" [193] "tissue_source_site_labelVanderbilt" [194] "tissue_source_site_labelVanderbilt University" [195] "tissue_source_site_labelWake Forest University" [196] "tissue_source_site_labelWalter Reed" [197] "tissue_source_site_labelWashington University" [198] "tissue_source_site_labelWashington University - Alabama" [199] "tissue_source_site_labelWashington University - CALGB" [200] "tissue_source_site_labelWashington University - CHUV" [201] "tissue_source_site_labelWashington University - Cleveland Clinic" [202] "tissue_source_site_labelWashington University - Emory" [203] "tissue_source_site_labelWashington University - Mayo Clinic" [204] "tissue_source_site_labelWashington University - NYU" [205] "tissue_source_site_labelWashington University - Rush University" [206] "tissue_source_site_labelWashington University - St. Louis" [207] "tissue_source_site_labelWashington University St. Louis" [208] "tissue_source_site_labelWills Eye Institute" [209] "tissue_source_site_labelYale" [210] "tissue_source_site_labelYale University" [211] "portion_is_ffpeYES" [1] "sampletypeSolidTissueNormal" [2] "sampletypeAdditionalNewPrimary" [3] "sampletypeAdditionalMetastatic" [4] "sampletypeBloodDerivedNormal" [5] "sampletypeMetastatic" [6] "sampletypePrimaryTumor" [7] "sampletypeRecurrentTumor" [8] "datasubmittingcenterlabelBroadInstituteofMITandHarvard" [9] "datasubmittingcenterlabelCanadasMichaelSmithGenomeSciencesCentre" [10] "datasubmittingcenterlabelHarvardMedicalSchool" [11] "datasubmittingcenterlabelMDAndersonInstituteforAppliedCancerScience" [12] "datasubmittingcenterlabelMDAndersonRPPACoreFacilityProteomics" [13] "datasubmittingcenterlabelUniversityofNorthCarolina" [14] "datasubmittingcenterlabelWashingtonUniversitySchoolofMedicine" [15] "platformHiSeqXTen" [16] "platformIlluminaGA" [17] "platformIlluminaHiSeq" [18] "platformIlluminaMiSeq" [19] "platformLS454" [20] "experimentalstrategyWGS" [21] "tissuesourcesitelabelABSLaheyClinic" [22] "tissuesourcesitelabelABSResearchMetricsPakistan" [23] "tissuesourcesitelabelABSIUPUI" [24] "tissuesourcesitelabelAlbertEinsteinMedicalCenter" [25] "tissuesourcesitelabelAlbertaHealthServices" [26] "tissuesourcesitelabelAsbestosDiseasesResearchInstitute" [27] "tissuesourcesitelabelAsterand" [28] "tissuesourcesitelabelBarretosCancerHospital" [29] "tissuesourcesitelabelBaylorCollegeofMedicine" [30] "tissuesourcesitelabelBCCancerAgency" [31] "tissuesourcesitelabelBLNBaylor" [32] "tissuesourcesitelabelBLNClevelandClinic" [33] "tissuesourcesitelabelBLNUniversityOfChicago" [34] "tissuesourcesitelabelBLNUniversityofMiami" [35] "tissuesourcesitelabelBLNUTSouthwesternMedicalCenteratDallas" [36] "tissuesourcesitelabelBLNBaylor" [37] "tissuesourcesitelabelBLNUTSouthwesternMedicalCenteratDallas" [38] "tissuesourcesitelabelBostonMedicalCenter" [39] "tissuesourcesitelabelBrighamandWomensHospital" [40] "tissuesourcesitelabelBrighamandWomensHospitalDivisionofThoracicSurgery" [41] "tissuesourcesitelabelBritishColumbiaCancerAgency" [42] "tissuesourcesitelabelCandler" [43] "tissuesourcesitelabelCaseWestern" [44] "tissuesourcesitelabelCaseWesternStJoes" [45] "tissuesourcesitelabelCedarsSinai" [46] "tissuesourcesitelabelCHIPenroseColorado" [47] "tissuesourcesitelabelChristianaCare" [48] "tissuesourcesitelabelChristianaHealthcare" [49] "tissuesourcesitelabelClevelandClinic" [50] "tissuesourcesitelabelClevelandClinicFoundation" [51] "tissuesourcesitelabelColumbiaUniversity" [52] "tissuesourcesitelabelCornellMedicalCollege" [53] "tissuesourcesitelabelCureline" [54] "tissuesourcesitelabelDeptofNeurosurgeryatUniversityofHeidelberg" [55] "tissuesourcesitelabelDuke" [56] "tissuesourcesitelabelDukeUniversity" [57] "tissuesourcesitelabelEmoryUniversity" [58] "tissuesourcesitelabelEmoryUniversityWinshipCancerInst" [59] "tissuesourcesitelabelErasmusMC" [60] "tissuesourcesitelabelEssen" [61] "tissuesourcesitelabelFondazioneBesta" [62] "tissuesourcesitelabelFoxChase" [63] "tissuesourcesitelabelFoxChaseCancerCenter" [64] "tissuesourcesitelabelFundacioClinicperalaRecercaBiomedica" [65] "tissuesourcesitelabelGarvanInstituteofMedicalResearch" [66] "tissuesourcesitelabelGlobalBioClinicalGeorgia" [67] "tissuesourcesitelabelGlobalBioclinicalMoldova" [68] "tissuesourcesitelabelGreaterPolandCancerCenter" [69] "tissuesourcesitelabelGreenvilleHealthSystem" [70] "tissuesourcesitelabelGreenvilleHealthSystems" [71] "tissuesourcesitelabelGundersenLutheran" [72] "tissuesourcesitelabelGundersenLutheranHealthSystem" [73] "tissuesourcesitelabelGynecologicOncologyGroup" [74] "tissuesourcesitelabelHartford" [75] "tissuesourcesitelabelHartfordHospital" [76] "tissuesourcesitelabelHarvard" [77] "tissuesourcesitelabelHarvardBethIsrael" [78] "tissuesourcesitelabelHenryFordHospital" [79] "tissuesourcesitelabelHolyCross" [80] "tissuesourcesitelabelHospitalLouisPradel" [81] "tissuesourcesitelabelHuntsmanCancerInstitute" [82] "tissuesourcesitelabelIDIIRCCS" [83] "tissuesourcesitelabelILSbio" [84] "tissuesourcesitelabelILSBIO" [85] "tissuesourcesitelabelImperialCollege" [86] "tissuesourcesitelabelIndivumed" [87] "tissuesourcesitelabelInstitutCurie" [88] "tissuesourcesitelabelInstituteforMedicalResearch" [89] "tissuesourcesitelabelInstituteofHumanVirologyNigeria" [90] "tissuesourcesitelabelInternationalGenomicsConosrtium" [91] "tissuesourcesitelabelInternationalGenomicsConsortium" [92] "tissuesourcesitelabelJohnWayneCancerCenter" [93] "tissuesourcesitelabelJohnsHopkins" [94] "tissuesourcesitelabelMaineMedicalCenter" [95] "tissuesourcesitelabelMaryBirdPerkinsCancerCenterOurLadyoftheLake" [96] "tissuesourcesitelabelMayo" [97] "tissuesourcesitelabelMayoClinic" [98] "tissuesourcesitelabelMayoClinicRochester" [99] "tissuesourcesitelabelMayoClinicArizona" [100] "tissuesourcesitelabelMayoClinicRochester" [101] "tissuesourcesitelabelMDAnderson" [102] "tissuesourcesitelabelMDAndersonCancerCenter" [103] "tissuesourcesitelabelMedicalCollegeofGeorgia" [104] "tissuesourcesitelabelMedicalCollegeofWisconsin" [105] "tissuesourcesitelabelMelbourneHealth" [106] "tissuesourcesitelabelMemorialSloanKettering" [107] "tissuesourcesitelabelMemorialSloanKetteringCancerCenter" [108] "tissuesourcesitelabelMichiganUniversity" [109] "tissuesourcesitelabelMilanItalyFondazioneIRCCSInstitutoNeuroligicoCBesta" [110] "tissuesourcesitelabelMoffittCancerCenter" [111] "tissuesourcesitelabelMolecularResponse" [112] "tissuesourcesitelabelMontefioreMedicalCenter" [113] "tissuesourcesitelabelMountSinaiSchoolofMedicine" [114] "tissuesourcesitelabelMSKCC" [115] "tissuesourcesitelabelNationalCancerCenterKorea" [116] "tissuesourcesitelabelNationalInstitutesofHealth" [117] "tissuesourcesitelabelNCIHREBranch" [118] "tissuesourcesitelabelNCIUrologicOncologyBranch" [119] "tissuesourcesitelabelNorthwesternUniversity" [120] "tissuesourcesitelabelNotavailable" [121] "tissuesourcesitelabelOhioStateUniversity" [122] "tissuesourcesitelabelOntarioInstituteforCancerResearch" [123] "tissuesourcesitelabelOntarioInstituteforCancerResearchOICR" [124] "tissuesourcesitelabelPapworthHospital" [125] "tissuesourcesitelabelPeterMacCallumCancerCenter" [126] "tissuesourcesitelabelPrinceCharlesHospital" [127] "tissuesourcesitelabelPrincessMargaretHospitalCanada" [128] "tissuesourcesitelabelPROCUREBiobank" [129] "tissuesourcesitelabelProteogenexInc" [130] "tissuesourcesitelabelProvidencePortlandMedicalCenter" [131] "tissuesourcesitelabelReginaElenaNationalCancerInstitute" [132] "tissuesourcesitelabelRoswell" [133] "tissuesourcesitelabelRoswellPark" [134] "tissuesourcesitelabelSaintMarysHealthCare" [135] "tissuesourcesitelabelSapienzaUniversityofRome" [136] "tissuesourcesitelabelSingHealth" [137] "tissuesourcesitelabelSpectrumHealth" [138] "tissuesourcesitelabelStJosephsMedicalCenterMD" [139] "tissuesourcesitelabelStJosephsHospitalAZ" [140] "tissuesourcesitelabelStJosephsHospitalArizona" [141] "tissuesourcesitelabelStUniversityofColoradoDenver" [142] "tissuesourcesitelabelStanfordUniversity" [143] "tissuesourcesitelabelTaysideTissueBank" [144] "tissuesourcesitelabelTechnicalUniversityofMunich" [145] "tissuesourcesitelabelTheOhioStateUniversity" [146] "tissuesourcesitelabelTheUniversityofNewSouthWales" [147] "tissuesourcesitelabelThomasJeffersonUniversity" [148] "tissuesourcesitelabelThoraxklinik" [149] "tissuesourcesitelabelThoraxklinikatUniversityHospitalHeidelberg" [150] "tissuesourcesitelabelTorontoWesternHospital" [151] "tissuesourcesitelabelTranslationalGenomicsResearchInstitute" [152] "tissuesourcesitelabelTuftsMedicalCenter" [153] "tissuesourcesitelabelUCSF" [154] "tissuesourcesitelabelUNC" [155] "tissuesourcesitelabelUniversityHealthNetwork" [156] "tissuesourcesitelabelUniversityHealthNetworkToronto" [157] "tissuesourcesitelabelUniversityHospitalErlangen" [158] "tissuesourcesitelabelUniversityHospitalMotol" [159] "tissuesourcesitelabelUniversityMedicalCenterHamburgEppendorf" [160] "tissuesourcesitelabelUniversityofAlabama" [161] "tissuesourcesitelabelUniversityofArizona" [162] "tissuesourcesitelabelUniversityofCaliforniaSanFrancisco" [163] "tissuesourcesitelabelUniversityofCaliforniaDavis" [164] "tissuesourcesitelabelUniversityofChicago" [165] "tissuesourcesitelabelUniversityofColoradoDenver" [166] "tissuesourcesitelabelUniversityofFlorida" [167] "tissuesourcesitelabelUniversityofHawaii" [168] "tissuesourcesitelabelUniversityofIowa" [169] "tissuesourcesitelabelUniversityofKansas" [170] "tissuesourcesitelabelUniversityofKansasMedicalCenter" [171] "tissuesourcesitelabelUniversityofLiverpool" [172] "tissuesourcesitelabelUniversityofMiami" [173] "tissuesourcesitelabelUniversityofMichigan" [174] "tissuesourcesitelabelUniversityOfMichigan" [175] "tissuesourcesitelabelUniversityofMinnesota" [176] "tissuesourcesitelabelUniversityofNebraskaMedicalCenterUNMC" [177] "tissuesourcesitelabelUniversityofNewMexico" [178] "tissuesourcesitelabelUniversityofNorthCarolina" [179] "tissuesourcesitelabelUniversityofOklahomaHSC" [180] "tissuesourcesitelabelUniversityofPennsylvania" [181] "tissuesourcesitelabelUniversityofPittsburgh" [182] "tissuesourcesitelabelUniversityofPuertoRico" [183] "tissuesourcesitelabelUniversityofSaoPaulo" [184] "tissuesourcesitelabelUniversityofSchleswigHolstein" [185] "tissuesourcesitelabelUniversityofSheffield" [186] "tissuesourcesitelabelUniversityofSouthernCalifornia" [187] "tissuesourcesitelabelUniversityofSydney" [188] "tissuesourcesitelabelUniversityofTexasMDAndersonCancerCenter" [189] "tissuesourcesitelabelUniversityofUlm" [190] "tissuesourcesitelabelUniversityofUtah" [191] "tissuesourcesitelabelUniversityofWashington" [192] "tissuesourcesitelabelValleyHospital" [193] "tissuesourcesitelabelVanderbilt" [194] "tissuesourcesitelabelVanderbiltUniversity" [195] "tissuesourcesitelabelWakeForestUniversity" [196] "tissuesourcesitelabelWalterReed" [197] "tissuesourcesitelabelWashingtonUniversity" [198] "tissuesourcesitelabelWashingtonUniversityAlabama" [199] "tissuesourcesitelabelWashingtonUniversityCALGB" [200] "tissuesourcesitelabelWashingtonUniversityCHUV" [201] "tissuesourcesitelabelWashingtonUniversityClevelandClinic" [202] "tissuesourcesitelabelWashingtonUniversityEmory" [203] "tissuesourcesitelabelWashingtonUniversityMayoClinic" [204] "tissuesourcesitelabelWashingtonUniversityNYU" [205] "tissuesourcesitelabelWashingtonUniversityRushUniversity" [206] "tissuesourcesitelabelWashingtonUniversityStLouis" [207] "tissuesourcesitelabelWashingtonUniversityStLouis" [208] "tissuesourcesitelabelWillsEyeInstitute" [209] "tissuesourcesitelabelYale" [210] "tissuesourcesitelabelYaleUniversity" [211] "portionisffpeYES" [1] "Now normalizing data..." [1] "Now applying voom on normalized data..." Coefficients not estimable: sampletypeAdditionalMetastatic tissuesourcesitelabelABSResearchMetricsPakistan tissuesourcesitelabelGlobalBioClinicalGeorgia tissuesourcesitelabelGreenvilleHealthSystems tissuesourcesitelabelSapienzaUniversityofRome tissuesourcesitelabelUniversityofColoradoDenver
%%R
qcMetadataS2 <- split2MetadataQC # metadataSamplesAllQCAML
qcDataS2 <- split2VbDataQC # vbDataBarnDFReconciledQCAML
# Set up design matrix
covDesignNormS2 <- model.matrix(~0 + sample_type +
data_submitting_center_label +
platform +
experimental_strategy +
tissue_source_site_label +
portion_is_ffpe,
data = qcMetadataS2)
print(colnames(covDesignNormS2))
colnames(covDesignNormS2) <- gsub('([[:punct:]])|\\s+','',colnames(covDesignNormS2))
print(colnames(covDesignNormS2))
# Set up counts matrix
countsS2 <- t(qcDataS2) # DGEList object from a table of counts (rows=features, columns=samples)
# Normalize using edgeR and then plug into voom
dgeS2 <- DGEList(counts = countsS2)
keep <- filterByExpr(dgeS2, covDesignNormS2)
dgeS2 <- dgeS2[keep,,keep.lib.sizes=FALSE]
print("Now normalizing data...")
dgeS2 <- calcNormFactors(dgeS2, method = "TMM")
print("Now applying voom on normalized data...")
vdgeS2 <- voom(dgeS2, design = covDesignNormS2, plot = TRUE, save.plot = TRUE, normalize.method="none")
[1] "sample_typeSolid Tissue Normal" [2] "sample_typeAdditional - New Primary" [3] "sample_typeAdditional Metastatic" [4] "sample_typeBlood Derived Normal" [5] "sample_typeMetastatic" [6] "sample_typePrimary Tumor" [7] "sample_typeRecurrent Tumor" [8] "data_submitting_center_labelBroad Institute of MIT and Harvard" [9] "data_submitting_center_labelCanada's Michael Smith Genome Sciences Centre" [10] "data_submitting_center_labelHarvard Medical School" [11] "data_submitting_center_labelMD Anderson - Institute for Applied Cancer Science" [12] "data_submitting_center_labelMD Anderson - RPPA Core Facility (Proteomics)" [13] "data_submitting_center_labelUniversity of North Carolina" [14] "data_submitting_center_labelWashington University School of Medicine" [15] "platformHiSeq X Ten" [16] "platformIllumina GA" [17] "platformIllumina HiSeq" [18] "platformIllumina MiSeq" [19] "platformLS 454" [20] "experimental_strategyWGS" [21] "tissue_source_site_labelABS - Lahey Clinic" [22] "tissue_source_site_labelABS - Research Metrics Pakistan" [23] "tissue_source_site_labelABS IUPUI" [24] "tissue_source_site_labelAlbert Einstein Medical Center" [25] "tissue_source_site_labelAlberta Health Services" [26] "tissue_source_site_labelAsbestos Diseases Research Institute" [27] "tissue_source_site_labelAsterand" [28] "tissue_source_site_labelBarretos Cancer Hospital" [29] "tissue_source_site_labelBaylor College of Medicine" [30] "tissue_source_site_labelBC Cancer Agency" [31] "tissue_source_site_labelBLN - Baylor" [32] "tissue_source_site_labelBLN - Cleveland Clinic" [33] "tissue_source_site_labelBLN - University Of Chicago" [34] "tissue_source_site_labelBLN - University of Miami" [35] "tissue_source_site_labelBLN - UT Southwestern Medical Center at Dallas" [36] "tissue_source_site_labelBLN Baylor" [37] "tissue_source_site_labelBLN UT Southwestern Medical Center at Dallas" [38] "tissue_source_site_labelBoston Medical Center" [39] "tissue_source_site_labelBrigham and Women's Hospital" [40] "tissue_source_site_labelBrigham and Women's Hospital Division of Thoracic Surgery" [41] "tissue_source_site_labelBritish Columbia Cancer Agency" [42] "tissue_source_site_labelCandler" [43] "tissue_source_site_labelCase Western" [44] "tissue_source_site_labelCase Western - St Joes" [45] "tissue_source_site_labelCedars Sinai" [46] "tissue_source_site_labelCHI-Penrose Colorado" [47] "tissue_source_site_labelChristiana Care" [48] "tissue_source_site_labelChristiana Healthcare" [49] "tissue_source_site_labelCleveland Clinic" [50] "tissue_source_site_labelCleveland Clinic Foundation" [51] "tissue_source_site_labelColumbia University" [52] "tissue_source_site_labelCornell Medical College" [53] "tissue_source_site_labelCureline" [54] "tissue_source_site_labelDept of Neurosurgery at University of Heidelberg" [55] "tissue_source_site_labelDuke" [56] "tissue_source_site_labelDuke University" [57] "tissue_source_site_labelEmory University" [58] "tissue_source_site_labelEmory University - Winship Cancer Inst." [59] "tissue_source_site_labelErasmus MC" [60] "tissue_source_site_labelEssen" [61] "tissue_source_site_labelFondazione-Besta" [62] "tissue_source_site_labelFox Chase" [63] "tissue_source_site_labelFox Chase Cancer Center" [64] "tissue_source_site_labelFundacio Clinic per a la Recerca Biomedica" [65] "tissue_source_site_labelGarvan Institute of Medical Research" [66] "tissue_source_site_labelGlobal BioClinical - Georgia" [67] "tissue_source_site_labelGlobal Bioclinical-Moldova" [68] "tissue_source_site_labelGreater Poland Cancer Center" [69] "tissue_source_site_labelGreenville Health System" [70] "tissue_source_site_labelGreenville Health Systems" [71] "tissue_source_site_labelGundersen Lutheran" [72] "tissue_source_site_labelGundersen Lutheran Health System" [73] "tissue_source_site_labelGynecologic Oncology Group" [74] "tissue_source_site_labelHartford" [75] "tissue_source_site_labelHartford Hospital" [76] "tissue_source_site_labelHarvard" [77] "tissue_source_site_labelHarvard Beth Israel" [78] "tissue_source_site_labelHenry Ford Hospital" [79] "tissue_source_site_labelHoly Cross" [80] "tissue_source_site_labelHospital Louis Pradel" [81] "tissue_source_site_labelHuntsman Cancer Institute" [82] "tissue_source_site_labelIDI-IRCCS" [83] "tissue_source_site_labelILSbio" [84] "tissue_source_site_labelILSBIO" [85] "tissue_source_site_labelImperial College" [86] "tissue_source_site_labelIndivumed" [87] "tissue_source_site_labelInstitut Curie" [88] "tissue_source_site_labelInstitute for Medical Research" [89] "tissue_source_site_labelInstitute of Human Virology Nigeria" [90] "tissue_source_site_labelInternational Genomics Conosrtium" [91] "tissue_source_site_labelInternational Genomics Consortium" [92] "tissue_source_site_labelJohn Wayne Cancer Center" [93] "tissue_source_site_labelJohns Hopkins" [94] "tissue_source_site_labelMaine Medical Center" [95] "tissue_source_site_labelMary Bird Perkins Cancer Center - Our Lady of the Lake" [96] "tissue_source_site_labelMayo" [97] "tissue_source_site_labelMayo Clinic" [98] "tissue_source_site_labelMayo Clinic - Rochester" [99] "tissue_source_site_labelMayo Clinic Arizona" [100] "tissue_source_site_labelMayo Clinic Rochester" [101] "tissue_source_site_labelMD Anderson" [102] "tissue_source_site_labelMD Anderson Cancer Center" [103] "tissue_source_site_labelMedical College of Georgia" [104] "tissue_source_site_labelMedical College of Wisconsin" [105] "tissue_source_site_labelMelbourne Health" [106] "tissue_source_site_labelMemorial Sloan Kettering" [107] "tissue_source_site_labelMemorial Sloan Kettering Cancer Center" [108] "tissue_source_site_labelMichigan University" [109] "tissue_source_site_labelMilan - Italy, Fondazione IRCCS Instituto Neuroligico C. Besta" [110] "tissue_source_site_labelMoffitt Cancer Center" [111] "tissue_source_site_labelMolecular Response" [112] "tissue_source_site_labelMontefiore Medical Center" [113] "tissue_source_site_labelMount Sinai School of Medicine" [114] "tissue_source_site_labelMSKCC" [115] "tissue_source_site_labelNational Cancer Center Korea" [116] "tissue_source_site_labelNational Institutes of Health" [117] "tissue_source_site_labelNCI HRE Branch" [118] "tissue_source_site_labelNCI Urologic Oncology Branch" [119] "tissue_source_site_labelNorthwestern University" [120] "tissue_source_site_labelNot available" [121] "tissue_source_site_labelOhio State University" [122] "tissue_source_site_labelOntario Institute for Cancer Research" [123] "tissue_source_site_labelOntario Institute for Cancer Research (OICR)" [124] "tissue_source_site_labelPapworth Hospital" [125] "tissue_source_site_labelPeter MacCallum Cancer Center" [126] "tissue_source_site_labelPrince Charles Hospital" [127] "tissue_source_site_labelPrincess Margaret Hospital (Canada)" [128] "tissue_source_site_labelPROCURE Biobank" [129] "tissue_source_site_labelProteogenex, Inc" [130] "tissue_source_site_labelProvidence Portland Medical Center" [131] "tissue_source_site_labelRegina Elena National Cancer Institute" [132] "tissue_source_site_labelRoswell" [133] "tissue_source_site_labelRoswell Park" [134] "tissue_source_site_labelSaint Mary's Health Care" [135] "tissue_source_site_labelSapienza University of Rome" [136] "tissue_source_site_labelSingHealth" [137] "tissue_source_site_labelSpectrum Health" [138] "tissue_source_site_labelSt Joseph's Medical Center (MD)" [139] "tissue_source_site_labelSt. Joseph's Hospital (AZ)" [140] "tissue_source_site_labelSt. Joseph's Hospital Arizona" [141] "tissue_source_site_labelSt. University of Colorado Denver" [142] "tissue_source_site_labelStanford University" [143] "tissue_source_site_labelTayside Tissue Bank" [144] "tissue_source_site_labelTechnical University of Munich" [145] "tissue_source_site_labelThe Ohio State University" [146] "tissue_source_site_labelThe University of New South Wales" [147] "tissue_source_site_labelThomas Jefferson University" [148] "tissue_source_site_labelThoraxklinik" [149] "tissue_source_site_labelThoraxklinik at University Hospital Heidelberg" [150] "tissue_source_site_labelToronto Western Hospital" [151] "tissue_source_site_labelTranslational Genomics Research Institute" [152] "tissue_source_site_labelTufts Medical Center" [153] "tissue_source_site_labelUCSF" [154] "tissue_source_site_labelUNC" [155] "tissue_source_site_labelUniversity Health Network" [156] "tissue_source_site_labelUniversity Health Network, Toronto" [157] "tissue_source_site_labelUniversity Hospital Erlangen" [158] "tissue_source_site_labelUniversity Hospital Motol" [159] "tissue_source_site_labelUniversity Medical Center Hamburg-Eppendorf" [160] "tissue_source_site_labelUniversity of Alabama" [161] "tissue_source_site_labelUniversity of Arizona" [162] "tissue_source_site_labelUniversity of California San Francisco" [163] "tissue_source_site_labelUniversity of California, Davis" [164] "tissue_source_site_labelUniversity of Chicago" [165] "tissue_source_site_labelUniversity of Colorado Denver" [166] "tissue_source_site_labelUniversity of Florida" [167] "tissue_source_site_labelUniversity of Hawaii" [168] "tissue_source_site_labelUniversity of Iowa" [169] "tissue_source_site_labelUniversity of Kansas" [170] "tissue_source_site_labelUniversity of Kansas Medical Center" [171] "tissue_source_site_labelUniversity of Liverpool" [172] "tissue_source_site_labelUniversity of Miami" [173] "tissue_source_site_labelUniversity of Michigan" [174] "tissue_source_site_labelUniversity Of Michigan" [175] "tissue_source_site_labelUniversity of Minnesota" [176] "tissue_source_site_labelUniversity of Nebraska Medical Center (UNMC)" [177] "tissue_source_site_labelUniversity of New Mexico" [178] "tissue_source_site_labelUniversity of North Carolina" [179] "tissue_source_site_labelUniversity of Oklahoma HSC" [180] "tissue_source_site_labelUniversity of Pennsylvania" [181] "tissue_source_site_labelUniversity of Pittsburgh" [182] "tissue_source_site_labelUniversity of Puerto Rico" [183] "tissue_source_site_labelUniversity of Sao Paulo" [184] "tissue_source_site_labelUniversity of Schleswig-Holstein" [185] "tissue_source_site_labelUniversity of Sheffield" [186] "tissue_source_site_labelUniversity of Southern California" [187] "tissue_source_site_labelUniversity of Sydney" [188] "tissue_source_site_labelUniversity of Texas MD Anderson Cancer Center" [189] "tissue_source_site_labelUniversity of Ulm" [190] "tissue_source_site_labelUniversity of Utah" [191] "tissue_source_site_labelUniversity of Washington" [192] "tissue_source_site_labelValley Hospital" [193] "tissue_source_site_labelVanderbilt" [194] "tissue_source_site_labelVanderbilt University" [195] "tissue_source_site_labelWake Forest University" [196] "tissue_source_site_labelWalter Reed" [197] "tissue_source_site_labelWashington University" [198] "tissue_source_site_labelWashington University - Alabama" [199] "tissue_source_site_labelWashington University - CALGB" [200] "tissue_source_site_labelWashington University - CHUV" [201] "tissue_source_site_labelWashington University - Cleveland Clinic" [202] "tissue_source_site_labelWashington University - Emory" [203] "tissue_source_site_labelWashington University - Mayo Clinic" [204] "tissue_source_site_labelWashington University - NYU" [205] "tissue_source_site_labelWashington University - Rush University" [206] "tissue_source_site_labelWashington University - St. Louis" [207] "tissue_source_site_labelWashington University St. Louis" [208] "tissue_source_site_labelWills Eye Institute" [209] "tissue_source_site_labelYale" [210] "tissue_source_site_labelYale University" [211] "portion_is_ffpeYES" [1] "sampletypeSolidTissueNormal" [2] "sampletypeAdditionalNewPrimary" [3] "sampletypeAdditionalMetastatic" [4] "sampletypeBloodDerivedNormal" [5] "sampletypeMetastatic" [6] "sampletypePrimaryTumor" [7] "sampletypeRecurrentTumor" [8] "datasubmittingcenterlabelBroadInstituteofMITandHarvard" [9] "datasubmittingcenterlabelCanadasMichaelSmithGenomeSciencesCentre" [10] "datasubmittingcenterlabelHarvardMedicalSchool" [11] "datasubmittingcenterlabelMDAndersonInstituteforAppliedCancerScience" [12] "datasubmittingcenterlabelMDAndersonRPPACoreFacilityProteomics" [13] "datasubmittingcenterlabelUniversityofNorthCarolina" [14] "datasubmittingcenterlabelWashingtonUniversitySchoolofMedicine" [15] "platformHiSeqXTen" [16] "platformIlluminaGA" [17] "platformIlluminaHiSeq" [18] "platformIlluminaMiSeq" [19] "platformLS454" [20] "experimentalstrategyWGS" [21] "tissuesourcesitelabelABSLaheyClinic" [22] "tissuesourcesitelabelABSResearchMetricsPakistan" [23] "tissuesourcesitelabelABSIUPUI" [24] "tissuesourcesitelabelAlbertEinsteinMedicalCenter" [25] "tissuesourcesitelabelAlbertaHealthServices" [26] "tissuesourcesitelabelAsbestosDiseasesResearchInstitute" [27] "tissuesourcesitelabelAsterand" [28] "tissuesourcesitelabelBarretosCancerHospital" [29] "tissuesourcesitelabelBaylorCollegeofMedicine" [30] "tissuesourcesitelabelBCCancerAgency" [31] "tissuesourcesitelabelBLNBaylor" [32] "tissuesourcesitelabelBLNClevelandClinic" [33] "tissuesourcesitelabelBLNUniversityOfChicago" [34] "tissuesourcesitelabelBLNUniversityofMiami" [35] "tissuesourcesitelabelBLNUTSouthwesternMedicalCenteratDallas" [36] "tissuesourcesitelabelBLNBaylor" [37] "tissuesourcesitelabelBLNUTSouthwesternMedicalCenteratDallas" [38] "tissuesourcesitelabelBostonMedicalCenter" [39] "tissuesourcesitelabelBrighamandWomensHospital" [40] "tissuesourcesitelabelBrighamandWomensHospitalDivisionofThoracicSurgery" [41] "tissuesourcesitelabelBritishColumbiaCancerAgency" [42] "tissuesourcesitelabelCandler" [43] "tissuesourcesitelabelCaseWestern" [44] "tissuesourcesitelabelCaseWesternStJoes" [45] "tissuesourcesitelabelCedarsSinai" [46] "tissuesourcesitelabelCHIPenroseColorado" [47] "tissuesourcesitelabelChristianaCare" [48] "tissuesourcesitelabelChristianaHealthcare" [49] "tissuesourcesitelabelClevelandClinic" [50] "tissuesourcesitelabelClevelandClinicFoundation" [51] "tissuesourcesitelabelColumbiaUniversity" [52] "tissuesourcesitelabelCornellMedicalCollege" [53] "tissuesourcesitelabelCureline" [54] "tissuesourcesitelabelDeptofNeurosurgeryatUniversityofHeidelberg" [55] "tissuesourcesitelabelDuke" [56] "tissuesourcesitelabelDukeUniversity" [57] "tissuesourcesitelabelEmoryUniversity" [58] "tissuesourcesitelabelEmoryUniversityWinshipCancerInst" [59] "tissuesourcesitelabelErasmusMC" [60] "tissuesourcesitelabelEssen" [61] "tissuesourcesitelabelFondazioneBesta" [62] "tissuesourcesitelabelFoxChase" [63] "tissuesourcesitelabelFoxChaseCancerCenter" [64] "tissuesourcesitelabelFundacioClinicperalaRecercaBiomedica" [65] "tissuesourcesitelabelGarvanInstituteofMedicalResearch" [66] "tissuesourcesitelabelGlobalBioClinicalGeorgia" [67] "tissuesourcesitelabelGlobalBioclinicalMoldova" [68] "tissuesourcesitelabelGreaterPolandCancerCenter" [69] "tissuesourcesitelabelGreenvilleHealthSystem" [70] "tissuesourcesitelabelGreenvilleHealthSystems" [71] "tissuesourcesitelabelGundersenLutheran" [72] "tissuesourcesitelabelGundersenLutheranHealthSystem" [73] "tissuesourcesitelabelGynecologicOncologyGroup" [74] "tissuesourcesitelabelHartford" [75] "tissuesourcesitelabelHartfordHospital" [76] "tissuesourcesitelabelHarvard" [77] "tissuesourcesitelabelHarvardBethIsrael" [78] "tissuesourcesitelabelHenryFordHospital" [79] "tissuesourcesitelabelHolyCross" [80] "tissuesourcesitelabelHospitalLouisPradel" [81] "tissuesourcesitelabelHuntsmanCancerInstitute" [82] "tissuesourcesitelabelIDIIRCCS" [83] "tissuesourcesitelabelILSbio" [84] "tissuesourcesitelabelILSBIO" [85] "tissuesourcesitelabelImperialCollege" [86] "tissuesourcesitelabelIndivumed" [87] "tissuesourcesitelabelInstitutCurie" [88] "tissuesourcesitelabelInstituteforMedicalResearch" [89] "tissuesourcesitelabelInstituteofHumanVirologyNigeria" [90] "tissuesourcesitelabelInternationalGenomicsConosrtium" [91] "tissuesourcesitelabelInternationalGenomicsConsortium" [92] "tissuesourcesitelabelJohnWayneCancerCenter" [93] "tissuesourcesitelabelJohnsHopkins" [94] "tissuesourcesitelabelMaineMedicalCenter" [95] "tissuesourcesitelabelMaryBirdPerkinsCancerCenterOurLadyoftheLake" [96] "tissuesourcesitelabelMayo" [97] "tissuesourcesitelabelMayoClinic" [98] "tissuesourcesitelabelMayoClinicRochester" [99] "tissuesourcesitelabelMayoClinicArizona" [100] "tissuesourcesitelabelMayoClinicRochester" [101] "tissuesourcesitelabelMDAnderson" [102] "tissuesourcesitelabelMDAndersonCancerCenter" [103] "tissuesourcesitelabelMedicalCollegeofGeorgia" [104] "tissuesourcesitelabelMedicalCollegeofWisconsin" [105] "tissuesourcesitelabelMelbourneHealth" [106] "tissuesourcesitelabelMemorialSloanKettering" [107] "tissuesourcesitelabelMemorialSloanKetteringCancerCenter" [108] "tissuesourcesitelabelMichiganUniversity" [109] "tissuesourcesitelabelMilanItalyFondazioneIRCCSInstitutoNeuroligicoCBesta" [110] "tissuesourcesitelabelMoffittCancerCenter" [111] "tissuesourcesitelabelMolecularResponse" [112] "tissuesourcesitelabelMontefioreMedicalCenter" [113] "tissuesourcesitelabelMountSinaiSchoolofMedicine" [114] "tissuesourcesitelabelMSKCC" [115] "tissuesourcesitelabelNationalCancerCenterKorea" [116] "tissuesourcesitelabelNationalInstitutesofHealth" [117] "tissuesourcesitelabelNCIHREBranch" [118] "tissuesourcesitelabelNCIUrologicOncologyBranch" [119] "tissuesourcesitelabelNorthwesternUniversity" [120] "tissuesourcesitelabelNotavailable" [121] "tissuesourcesitelabelOhioStateUniversity" [122] "tissuesourcesitelabelOntarioInstituteforCancerResearch" [123] "tissuesourcesitelabelOntarioInstituteforCancerResearchOICR" [124] "tissuesourcesitelabelPapworthHospital" [125] "tissuesourcesitelabelPeterMacCallumCancerCenter" [126] "tissuesourcesitelabelPrinceCharlesHospital" [127] "tissuesourcesitelabelPrincessMargaretHospitalCanada" [128] "tissuesourcesitelabelPROCUREBiobank" [129] "tissuesourcesitelabelProteogenexInc" [130] "tissuesourcesitelabelProvidencePortlandMedicalCenter" [131] "tissuesourcesitelabelReginaElenaNationalCancerInstitute" [132] "tissuesourcesitelabelRoswell" [133] "tissuesourcesitelabelRoswellPark" [134] "tissuesourcesitelabelSaintMarysHealthCare" [135] "tissuesourcesitelabelSapienzaUniversityofRome" [136] "tissuesourcesitelabelSingHealth" [137] "tissuesourcesitelabelSpectrumHealth" [138] "tissuesourcesitelabelStJosephsMedicalCenterMD" [139] "tissuesourcesitelabelStJosephsHospitalAZ" [140] "tissuesourcesitelabelStJosephsHospitalArizona" [141] "tissuesourcesitelabelStUniversityofColoradoDenver" [142] "tissuesourcesitelabelStanfordUniversity" [143] "tissuesourcesitelabelTaysideTissueBank" [144] "tissuesourcesitelabelTechnicalUniversityofMunich" [145] "tissuesourcesitelabelTheOhioStateUniversity" [146] "tissuesourcesitelabelTheUniversityofNewSouthWales" [147] "tissuesourcesitelabelThomasJeffersonUniversity" [148] "tissuesourcesitelabelThoraxklinik" [149] "tissuesourcesitelabelThoraxklinikatUniversityHospitalHeidelberg" [150] "tissuesourcesitelabelTorontoWesternHospital" [151] "tissuesourcesitelabelTranslationalGenomicsResearchInstitute" [152] "tissuesourcesitelabelTuftsMedicalCenter" [153] "tissuesourcesitelabelUCSF" [154] "tissuesourcesitelabelUNC" [155] "tissuesourcesitelabelUniversityHealthNetwork" [156] "tissuesourcesitelabelUniversityHealthNetworkToronto" [157] "tissuesourcesitelabelUniversityHospitalErlangen" [158] "tissuesourcesitelabelUniversityHospitalMotol" [159] "tissuesourcesitelabelUniversityMedicalCenterHamburgEppendorf" [160] "tissuesourcesitelabelUniversityofAlabama" [161] "tissuesourcesitelabelUniversityofArizona" [162] "tissuesourcesitelabelUniversityofCaliforniaSanFrancisco" [163] "tissuesourcesitelabelUniversityofCaliforniaDavis" [164] "tissuesourcesitelabelUniversityofChicago" [165] "tissuesourcesitelabelUniversityofColoradoDenver" [166] "tissuesourcesitelabelUniversityofFlorida" [167] "tissuesourcesitelabelUniversityofHawaii" [168] "tissuesourcesitelabelUniversityofIowa" [169] "tissuesourcesitelabelUniversityofKansas" [170] "tissuesourcesitelabelUniversityofKansasMedicalCenter" [171] "tissuesourcesitelabelUniversityofLiverpool" [172] "tissuesourcesitelabelUniversityofMiami" [173] "tissuesourcesitelabelUniversityofMichigan" [174] "tissuesourcesitelabelUniversityOfMichigan" [175] "tissuesourcesitelabelUniversityofMinnesota" [176] "tissuesourcesitelabelUniversityofNebraskaMedicalCenterUNMC" [177] "tissuesourcesitelabelUniversityofNewMexico" [178] "tissuesourcesitelabelUniversityofNorthCarolina" [179] "tissuesourcesitelabelUniversityofOklahomaHSC" [180] "tissuesourcesitelabelUniversityofPennsylvania" [181] "tissuesourcesitelabelUniversityofPittsburgh" [182] "tissuesourcesitelabelUniversityofPuertoRico" [183] "tissuesourcesitelabelUniversityofSaoPaulo" [184] "tissuesourcesitelabelUniversityofSchleswigHolstein" [185] "tissuesourcesitelabelUniversityofSheffield" [186] "tissuesourcesitelabelUniversityofSouthernCalifornia" [187] "tissuesourcesitelabelUniversityofSydney" [188] "tissuesourcesitelabelUniversityofTexasMDAndersonCancerCenter" [189] "tissuesourcesitelabelUniversityofUlm" [190] "tissuesourcesitelabelUniversityofUtah" [191] "tissuesourcesitelabelUniversityofWashington" [192] "tissuesourcesitelabelValleyHospital" [193] "tissuesourcesitelabelVanderbilt" [194] "tissuesourcesitelabelVanderbiltUniversity" [195] "tissuesourcesitelabelWakeForestUniversity" [196] "tissuesourcesitelabelWalterReed" [197] "tissuesourcesitelabelWashingtonUniversity" [198] "tissuesourcesitelabelWashingtonUniversityAlabama" [199] "tissuesourcesitelabelWashingtonUniversityCALGB" [200] "tissuesourcesitelabelWashingtonUniversityCHUV" [201] "tissuesourcesitelabelWashingtonUniversityClevelandClinic" [202] "tissuesourcesitelabelWashingtonUniversityEmory" [203] "tissuesourcesitelabelWashingtonUniversityMayoClinic" [204] "tissuesourcesitelabelWashingtonUniversityNYU" [205] "tissuesourcesitelabelWashingtonUniversityRushUniversity" [206] "tissuesourcesitelabelWashingtonUniversityStLouis" [207] "tissuesourcesitelabelWashingtonUniversityStLouis" [208] "tissuesourcesitelabelWillsEyeInstitute" [209] "tissuesourcesitelabelYale" [210] "tissuesourcesitelabelYaleUniversity" [211] "portionisffpeYES" [1] "Now normalizing data..." [1] "Now applying voom on normalized data..." Coefficients not estimable: platformIlluminaMiSeq tissuesourcesitelabelBLNBaylor tissuesourcesitelabelBLNUTSouthwesternMedicalCenteratDallas tissuesourcesitelabelBostonMedicalCenter tissuesourcesitelabelInstituteforMedicalResearch tissuesourcesitelabelJohnWayneCancerCenter tissuesourcesitelabelMaineMedicalCenter tissuesourcesitelabelUniversityofArizona tissuesourcesitelabelUniversityOfMichigan tissuesourcesitelabelValleyHospital tissuesourcesitelabelWakeForestUniversity
%%R
print(dim(t(vdgeS1$E)))
print(dim(t(vdgeS2$E)))
[1] 8814 1764 [1] 8811 1767
%%R
taxaIntersect <- intersect(colnames(t(vdgeS1$E)), colnames(t(vdgeS2$E)))
print(length(taxaIntersect))
vdgeS1Filt <- t(vdgeS1$E)[,taxaIntersect]
vdgeS2Filt <- t(vdgeS2$E)[,taxaIntersect]
print(dim(vdgeS1Filt))
print(dim(vdgeS2Filt))
[1] 1737 [1] 8814 1737 [1] 8811 1737
%%R
# Apply
bio.var.sample.typeS1 <- model.matrix(~sample_type, #sample_type, # histological_diagnosis_label and disease_type tried but cause function to fail
data=qcMetadataS1)
adj.varS1 <- model.matrix(~data_submitting_center_label +
platform +
experimental_strategy +
tissue_source_site_label +
portion_is_ffpe,
data=qcMetadataS1)
colnames(bio.var.sample.typeS1) <- gsub('([[:punct:]])|\\s+','',colnames(bio.var.sample.typeS1))
colnames(adj.varS1) <- gsub('([[:punct:]])|\\s+','',colnames(adj.varS1))
print(dim(adj.varS1))
print(dim(bio.var.sample.typeS1))
print(dim(vdgeS1Filt))
print(dim(covDesignNormS1))
[1] 8814 205 [1] 8814 7 [1] 8814 1737 [1] 8814 211
%%R
# Apply
bio.var.sample.typeS2 <- model.matrix(~sample_type, #sample_type, # histological_diagnosis_label and disease_type tried but cause function to fail
data=qcMetadataS2)
adj.varS2 <- model.matrix(~data_submitting_center_label +
platform +
experimental_strategy +
tissue_source_site_label +
portion_is_ffpe,
data=qcMetadataS2)
colnames(bio.var.sample.typeS2) <- gsub('([[:punct:]])|\\s+','',colnames(bio.var.sample.typeS2))
colnames(adj.varS2) <- gsub('([[:punct:]])|\\s+','',colnames(adj.varS2))
print(dim(adj.varS2))
print(dim(bio.var.sample.typeS2))
print(dim(vdgeS2Filt))
print(dim(covDesignNormS2))
[1] 8811 205 [1] 8811 7 [1] 8811 1737 [1] 8811 211
%%R
snmDataObjSampleTypeWithExpStrategyFAS1 <- snm(raw.dat = t(vdgeS1Filt),
bio.var = bio.var.sample.typeS1,
adj.var = adj.varS1,
rm.adj=TRUE,
verbose = TRUE,
diagnose = TRUE)
snmDataSampleTypeWithExpStrategyFAS1 <- t(snmDataObjSampleTypeWithExpStrategyFAS1$norm.dat)
print(dim(snmDataSampleTypeWithExpStrategyFAS1))
[1] 8814 1737
%%R
snmDataObjSampleTypeWithExpStrategyFAS2 <- snm(raw.dat = t(vdgeS2Filt),
bio.var = bio.var.sample.typeS2,
adj.var = adj.varS2,
rm.adj=TRUE,
verbose = TRUE,
diagnose = TRUE)
snmDataSampleTypeWithExpStrategyFAS2 <- t(snmDataObjSampleTypeWithExpStrategyFAS2$norm.dat)
print(dim(snmDataSampleTypeWithExpStrategyFAS2))
[1] 8811 1737
%%R
save(snmDataSampleTypeWithExpStrategyFAS1, snmDataSampleTypeWithExpStrategyFAS2,
split1MetadataQC, split2MetadataQC,
file = "validationSplits_snmDataSampleTypeWithExpStrategyFAS1S2.RData")
%%R
pcaPlotting <- function(pcaObject,pcChoices, dataLabels, factorString, titleString){
require(ggbiplot)
theme_update(plot.title = element_text(hjust = 0.5))
g <- ggbiplot(pcaObject,pcChoices, obs.scale = 1, var.scale = 1,
groups = dataLabels, ellipse = TRUE,
alpha = 0.2,
circle = TRUE,var.axes=FALSE) +
scale_color_discrete(name = factorString) +
theme_bw() +
#theme(legend.direction = "horizontal", legend.position = "top") +
ggtitle(titleString) + theme(plot.title = element_text(hjust = 0.5))
print(g)
}
%%R
unnormalizedPCAPlotFA <- pcaPlotting(pcaObject = prcomp(t(vdge$E)),
pcChoices = c(1,2),
dataLabels = qcMetadata$data_submitting_center_label,
factorString = "Batch",
titleString = "PCA w/o Batch Correction")
%%R
snmPCAPlotSampleTypeFA <- pcaPlotting(pcaObject = prcomp(snmDataSampleTypeWithExpStrategyFA),
pcChoices = c(1,2),
dataLabels = qcMetadata$data_submitting_center_label,
factorString = "Sequencing Center",
titleString = "PCA w/ SNM Correction\n(Target: Sample Type)")
# %%R
# snmPCAPlotGender <- pcaPlotting(pcaObject = prcomp(snmDataGenderWithAML),
# pcChoices = c(1,2),
# dataLabels = qcMetadata$data_submitting_center_label,
# factorString = "Sequencing Center",
# titleString = "PCA w/ SNM Correction\n(Target: Gender)")
%%R
ggsave(plot = unnormalizedPCAPlotFA,
filename = "unnormalizedPCAPlotFA_DecreasedOpacity.png",
width = 16.2,
height = 5.29,
units = "in",
dpi = "retina")
ggsave(plot = snmPCAPlotSampleTypeFA,
filename = "snmPCAPlotSampleTypeFA_DecreasedOpacity.png",
width = 16.2,
height = 5.29,
units = "in",
dpi = "retina")
# save(snmDataGenderWithAML, metadataSamplesAllQCAML,
# vbDataBarnDFReconciledQCAML,
# file = "amlVbDataAndMetadataAndSNMByGender.RData")
# %%R
# snmDataObjGenderWithAML <- snm(raw.dat = vdge$E,
# bio.var = bio.var.gender,
# adj.var = adj.var,
# rm.adj=TRUE,
# verbose = TRUE,
# diagnose = TRUE)
# snmDataGenderWithAML <- t(snmDataObjGenderWithAML$norm.dat)
# print(dim(snmDataGenderWithAML))
[1] 17975 1802
%%R
# Implement PVCA
# From extended model, remove variables that contribute very little if at all:
# ethnicity, gender, reference_genome
pct_threshold <- 0.8
metaPVCAExtendedFiltered <- metadataSamplesAllQC[,c("sample_type",
"disease_type",
"data_submitting_center_label",
"platform",
"experimental_strategy",
"tissue_source_site_label",
"portion_is_ffpe")]
print(dim(metaPVCAExtendedFiltered))
print(dim(snmDataSampleTypeWithExpStrategy))
print(dim(vbDataBarnDFReconciledQC))
[1] 17625 7 [1] 17625 1795 [1] 17625 1993
%%R
pvcaVbRawNoVoomNoSNM_ExtendedFiltered_FA <- PVCA(counts = t(vbDataBarnDFReconciledQC),
meta = metaPVCAExtendedFiltered,
threshold = pct_threshold,
inter = FALSE)
save(pvcaVbRawNoVoomNoSNM_ExtendedFiltered_FA, file = "pvcaVbRawNoVoomNoSNM_ExtendedFiltered_FA.RData")
PlotPVCA(pvcaVbRawNoVoomNoSNM_ExtendedFiltered_FA, "Raw count data")
%%R
pvcaVoomNoSNM_ExtendedFiltered_FA <- PVCA(counts = vdge$E,
meta = metaPVCAExtendedFiltered,
threshold = pct_threshold,
inter = FALSE)
save(pvcaVoomNoSNM_ExtendedFiltered_FA, file = "pvcaVoomNoSNM_ExtendedFiltered_FA.RData")
PlotPVCA(pvcaVoomNoSNM_ExtendedFiltered_FA, "Voom Normalized")
%%R
pvcaSampleWithExpStrategySNM_ExtendedFiltered_FA <- PVCA(counts = t(snmDataSampleTypeWithExpStrategyFA),
meta = metaPVCAExtendedFiltered,
threshold = pct_threshold,
inter = FALSE)
save(pvcaSampleWithExpStrategySNM_ExtendedFiltered_FA,
file = "pvcnoaSampleWithExpStrategySNM_ExtendedFiltered_FA.RData")
PlotPVCA(pvcaSampleWithExpStrategySNM_ExtendedFiltered_FA,
"Voom Normalized & SNM Corrected Plus Exp Strategy (Target is Sample Type)")
%%R
1+2
[1] 3
%%R
require(ggplot2)
require(matrixStats)
divSNMDataSampleType <- snmDataSampleType / t(snmDataObjSampleType$raw.dat)
taxaMedians <- data.frame(Medians = colMedians(divSNMDataSampleType),
Taxa = colnames(divSNMDataSampleType),
pval = factor(ifelse(snmDataObjSampleType$pval <=0.05,
yes = "P-value <= 0.05", no = "P-value > 0.05")))
sampleMedians <- data.frame(Medians = rowMedians(divSNMDataSampleType),
Samples = rownames(divSNMDataSampleType),
SeqCenter = metadataSamplesAllQC$data_submitting_center_label,
SampleType = metadataSamplesAllQC$sample_type,
CancerType = metadataSamplesAllQC$disease_type)
gt <- ggplot(taxaMedians, aes(x = reorder(Taxa, -Medians), y = Medians, fill = pval)) +
geom_bar(stat = "identity") +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
labs(y = "Median of Normalizing Ratios Per Taxa", x = "Samples", fill = "ANOVA Result Per Taxa")
gs <- ggplot(sampleMedians, aes(x = reorder(Samples, -Medians), y = Medians, fill = CancerType)) +
geom_bar(stat = "identity") + coord_flip() +
theme(axis.text.y=element_blank(), axis.ticks.y=element_blank()) +
scale_y_log10() + labs(y = "Median of Normalizing Ratios Per Sample", x = "Samples", fill='Cancer Type')
%%R
gt
%%R
ggsave(plot = gt,
filename = "snmNormMedianPerTaxaPval.png",
width = 8.5,
height = 6,
units = "in",
dpi = "retina")
%%R
require(pheatmap)
pheatmap(snmDataSampleTypeLMFit$coefficients,
clustering_distance_rows = "correlation",
clustering_distance_cols = "correlation",
show_rownames = FALSE,
show_colnames = FALSE,
filename = "snmLMFitCoefCorr.png")
# %%R
# save(snmDataObjPathStage, snmDataPathStage, metadataSamplesAllQCPath, file = "snmResultsPathBinned.RData")