#import pandas as pd
#pwd
#!ls -alth
#gdc_manifest.2018-07-11.txt
total 551M drwxr-xr-x 3 btsui users 49 Jul 11 19:15 . -rw-r--r-- 1 btsui users 22K Jul 11 19:15 MaskingGenomeWithSnp_human.ipynb drwxr-xr-x 2 btsui users 12 Jul 11 19:15 .ipynb_checkpoints -rw-r--r-- 1 btsui users 909K Jul 11 15:34 gdc_manifest.2018-07-11.txt -rw-r--r-- 1 btsui users 12K Jul 11 15:32 downloadTCGA_LGG.ipynb drwxr-xr-x 19 btsui users 24 Jul 8 09:29 .. -rw-r--r-- 1 btsui users 20K Jul 4 16:19 old_BuildReferenceWithMicrobes.ipynb -rw-r--r-- 1 btsui users 7.5K Jul 4 16:13 MergeViralAndBacterial.ipynb -rw-r--r-- 1 btsui users 50K Jul 4 14:41 MaskingGenomeWithSnp_Specie.ipynb -rw-r--r-- 1 btsui users 64K Jul 4 14:25 ParseBamReadCount_base_case.ipynb -rw-r--r-- 1 btsui users 29 Mar 2 16:19 tmp.out.100.bed -rw-r--r-- 1 btsui users 0 Mar 2 16:19 unMapped -rw-r--r-- 1 btsui users 31 Mar 2 16:19 tmp.bed -rw-r--r-- 1 btsui users 22K Mar 2 14:56 tmp.out.1.bed -rw-r--r-- 1 btsui users 23K Mar 2 14:55 tmp.out.bed prw-r--r-- 1 btsui users 0 Mar 2 10:50 pipe -rw-r--r-- 1 btsui users 780K Mar 2 10:49 complement.txt -rw-r--r-- 1 btsui users 266 Mar 2 10:49 genome -rw-r--r-- 1 btsui users 3.3M Mar 2 10:49 extracting_region.bed -rw-r--r-- 1 btsui users 2.5M Mar 1 11:25 10000.pickle.gz -rw-r--r-- 1 btsui users 2.4M Mar 1 11:25 0.pickle.gz -rw-r--r-- 1 btsui users 6.8M Jan 28 18:33 Pos_block_140700000 -rw-r--r-- 1 btsui users 5.2M Jan 28 18:28 Pos_block_231700000 -rw-r--r-- 1 btsui users 1.1K Jan 26 08:57 test.h5 -rw-r--r-- 1 btsui users 36K Jan 24 16:47 Untitled.ipynb -rw-r--r-- 1 btsui users 19K Jan 24 15:13 testOne.ipynb -rw-r--r-- 1 btsui users 128K Jan 24 15:11 TCGA_compare.alternative_allele.png -rw-r--r-- 1 btsui users 18K Jan 24 15:11 TCGA_compare.alternative_allele.pdf -rw-r--r-- 1 btsui users 124K Jan 24 15:10 TCGA_compare.png -rw-r--r-- 1 btsui users 19K Jan 24 15:10 TCGA_compare.pdf -rw-r--r-- 1 btsui users 4.5M Jan 24 14:11 tmp.tcga.txt.gz -rw-r--r-- 1 btsui users 163M Jan 3 2018 0.h5 -rw-r--r-- 1 btsui users 85M Jan 3 2018 1000.pickle.gz -rw-r--r-- 1 btsui users 6.5K Jan 2 2018 GenerateEmptyPicklesForEachSpecies.ipynb -rw-r--r-- 1 btsui users 385K Jan 2 2018 tmp2.pickle.gz -rw-r--r-- 1 btsui users 400K Jan 2 2018 tmp.pickle.gz -rw-r--r-- 1 btsui users 2.3M Jan 2 2018 SRR349840.txt.snp.gz -rw-r--r-- 1 btsui users 188K Jan 2 2018 SRR349840_per_fa_record_stat.txt.gz -rw-r--r-- 1 btsui users 81M Dec 30 2017 microbe.fa.gz -rw-r--r-- 1 btsui users 86M Dec 30 2017 Homo_sapiens.fa.gz -rw-r--r-- 1 btsui users 25 Dec 30 2017 Homo_sapiens.GRCh38.dna_rm.toplevel.fa.gz -rw-r--r-- 1 btsui users 302 Dec 30 2017 grch38.genome -rw-r--r-- 1 btsui users 221M Dec 30 2017 Homo_sapiens.GRCh38.dna_rm.toplevel.SNP_masked.fa.gz -rw-r--r-- 1 btsui users 7.9M Dec 30 2017 test.bed -rw-r--r-- 1 btsui users 53M Dec 29 2017 microbe.fa -rw-r--r-- 1 btsui users 5.2K Dec 28 2017 single_snp.py -rw-r--r-- 1 btsui users 0 Dec 26 2017 untitled.txt -rw-r--r-- 1 btsui users 38K Dec 26 2017 gdc_manifest.2017-12-27T02_59_36.013442.txt -rw-r--r-- 1 btsui users 180K Dec 26 2017 gdc_manifest.2017-12-27T02_43_35.959399.txt
from tqdm import tqdm
import pandas as pd
import os
import subprocess
CWD='/cellar/users/btsui/Project/METAMAP/notebook/RapMapTest/XGS_WGS/'
os.chdir(CWD)
#gdc_meta_df=pd.read_json('files.2017-12-09T19_29_39.496570.json')
gdc_meta_df=pd.read_csv('gdc_manifest.2017-12-27T02_59_36.013442.txt',sep='\t')
tcgaMetaDf=gdc_meta_df[gdc_meta_df.filename.str.contains('TCGA-\w+-\w+-0')]
brain_tcga_mut_df=pd.read_pickle('/cellar/users/btsui/Project/KangZhang/NB/clinicalCleaning/../interDataDir/tcga_mutation.pickle')
#(brain_tcga_mut_df['Hugo_Symbol']=='IDH1')
IDH1_mutated_df=brain_tcga_mut_df[
(brain_tcga_mut_df.TSS=='LGG')]
IDH1_mutated_df['pid']=IDH1_mutated_df['patient'].str.extract('TCGA-\w+-(\w+)')
gdc_meta_df['pid']=gdc_meta_df.filename.str.extract('TCGA-\w+-(\w+)')
gdc_meta_df['idh1_mutation_status']=gdc_meta_df.pid.isin(IDH1_mutated_df.pid)
gdc_meta_df=gdc_meta_df.sort_values('idh1_mutation_status',ascending=False)
/cellar/users/btsui/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:20: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
gdc_meta_df.shape
(252, 7)
!ls /nrnb/users/btsui/Data/tcga_raw_lgg/02206442-a052-4c44-a4b8-1467493df2eb
TCGA-DB-5276-01A-01D-1465_130806_SN1440_0159_BC29Y3ACXX_s_5_rg.sorted.bam TCGA-DB-5276-01A-01D-1465_130806_SN1440_0159_BC29Y3ACXX_s_5_rg.sorted.bam.bai bcf9702a-8c53-4d3b-a9bf-06d4b8c12c72_analysis.xml bcf9702a-8c53-4d3b-a9bf-06d4b8c12c72_experiment.xml bcf9702a-8c53-4d3b-a9bf-06d4b8c12c72_run.xml logs
out_dir='/nrnb/users/btsui/Data/tcga_raw_lgg/'
#gdc_meta_df.cases.iloc[0]
token_dir='/cellar/users/ramarty/tokens/gdc-user-token.2018-06-25T22_21_40.089Z.txt'
gdc_cmd_fmt='gdc-client download -t {token_dir} -d {out_dir} {file_uuid}'
for _,rowS in tqdm(gdc_meta_df.iterrows()) :
file_uuid=rowS.loc['id']
gdc_cmd=gdc_cmd_fmt.format(out_dir=out_dir,file_uuid=file_uuid,token_dir=token_dir)
result = os.system(gdc_cmd)
print (gdc_cmd)
print (result)
#print (os.system(gdc_cmd))
1it [00:03, 3.78s/it]
gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ 9aa689c9-1b3c-45f1-8178-3f0b11ca5a11 0
2it [00:06, 3.21s/it]
gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ e33fa79f-3d33-4f0b-81d9-705ab9c8a19c 512
3it [00:09, 3.01s/it]
gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ 336bd48f-3c20-4a2f-a2ef-38bb94e30e11 512
4it [00:11, 2.89s/it]
gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ 550d6316-2067-4f67-ba64-a8687f3320b1 512
5it [00:13, 2.76s/it]
gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ ab6bd849-b8ca-4864-b133-72a3aa82c923 2
6it [00:16, 2.73s/it]
gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ 02206442-a052-4c44-a4b8-1467493df2eb 512
#!ls /cellar/users/btsui/../hcarter/
#!gdc-client download -t /cellar/users/ramarty/tokens/gdc-user-token.2018-06-25T22_21_40.089Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ e33fa79f-3d33-4f0b-81d9-705ab9c8a19c
WARNING: Your token file '/cellar/users/ramarty/tokens/gdc-user-token.2018-06-25T22_21_40.089Z.txt' is not properly secured. Please secure your token file by ensuring that it is not readable or writeable by anyone other than the owner of the file. On Linux: chmod 600 /cellar/users/ramarty/tokens/gdc-user-token.2018-06-25T22_21_40.089Z.txt
6% [### ] ETA: 0:20:12 ] ETA: 0:23:18
!ls -l /nrnb/users/btsui/Data/tcga_raw/
total 3328 drwxr-xr-x 3 btsui users 131072 Dec 26 2017 02206442-a052-4c44-a4b8-1467493df2eb drwxr-xr-x 3 btsui users 131072 Jan 24 12:54 2b0048e0-a062-40d2-a1e1-4bb763ea0ead drwxr-xr-x 3 btsui users 131072 Dec 26 2017 336bd48f-3c20-4a2f-a2ef-38bb94e30e11 drwxr-xr-x 3 btsui users 131072 Dec 27 2017 52ae2dd2-f573-41c6-ad1a-18b19c9eea35 drwxr-xr-x 3 btsui users 131072 Dec 26 2017 550d6316-2067-4f67-ba64-a8687f3320b1 drwxr-xr-x 3 btsui users 512 Dec 27 2017 6f5b793c-9040-4fd7-8b32-2fe33bc8c7d2 drwxr-xr-x 3 btsui users 512 Dec 27 2017 781639a0-ea42-4e90-8e48-2dd0de69143f drwxr-xr-x 3 btsui users 131072 Dec 26 2017 8013ce94-6e62-4d7f-b834-fb13d709a080 drwxr-xr-x 3 btsui users 131072 Dec 26 2017 9aa689c9-1b3c-45f1-8178-3f0b11ca5a11 drwxr-xr-x 3 btsui users 512 Jan 4 17:27 a0e6878e-0862-4b54-897e-2a5a4fb7df86 drwxr-xr-x 3 btsui users 131072 Dec 26 2017 ab6bd849-b8ca-4864-b133-72a3aa82c923 drwxr-xr-x 3 btsui users 131072 Dec 26 2017 b404ac67-1c7f-4b01-8038-7432d3d6e489 drwxr-xr-x 3 btsui users 131072 Dec 26 2017 d987c181-1947-486c-957f-c5bd782a1eba drwxr-xr-x 3 btsui users 131072 Dec 27 2017 e33fa79f-3d33-4f0b-81d9-705ab9c8a19c drwxr-xr-x 3 btsui users 131072 Dec 26 2017 ed7bb33a-0d4e-4215-b4c5-2dc1250d882b drwxr-xr-x 3 btsui users 131072 Dec 26 2017 f441e949-5e27-4235-a0db-39c77aacdb5c
print gdc_cmd
gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ 8b974222-8a7a-43ee-8a1b-20e511f2fe68
gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ f7c8fa51-dedd-47b0-82c2-0a87a4c7aad3
"""
out_dir='/nrnb/users/btsui/Data/Wei/'
gdc_meta_df.cases.iloc[0]
token_dir='/cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt'
gdc_cmd_fmt='gdc-client download -t {token_dir} -d {out_dir} {file_uuid}'
for _,rowS in gdc_meta_df.iterrows():
file_uuid=rowS.loc['file_id']
gdc_cmd=gdc_cmd_fmt.format(out_dir=out_dir,file_uuid=file_uuid,token_dir=token_dir)
os.system(gdc_cmd)
"""
!gdc-client --version
v1.2.0
#!head /nrnb/data/controlled/2017_TCGA_genotypes/TCGA_all.map