Download a copy of a generic cacert.pem here.
PATH_TO_CACERT = '/cellar/users/agross/cacert.pem'
cd ../src
/cellar/users/agross/TCGA_Code/TCGA/src
from Processing.Imports import *
from IPython.display import clear_output
line_width has been deprecated, use display.width instead (currently both are identical)
OUT_PATH = '../Data'
RUN_DATE = '2014_01_15'
VERSION = 'all'
CANCER = 'HNSC'
FIGDIR = '../Figures/'
if not os.path.isdir(FIGDIR):
os.makedirs(FIGDIR)
run_path = '{}/Firehose__{}/'.format(OUT_PATH, RUN_DATE)
run = get_run(run_path, 'Run_' + VERSION)
path = 'https://tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/'
out = OUT_PATH + '/Followup_R7'
if not os.path.isdir(out):
os.makedirs(out)
run.cancers
array(['ACC', 'BLCA', 'BRCA', 'CESC', 'COAD', 'DLBC', 'ESCA', 'GBM', 'HNSC', 'KICH', 'KIRC', 'KIRP', 'LAML', 'LGG', 'LIHC', 'LUAD', 'LUSC', 'OV', 'PAAD', 'PRAD', 'READ', 'SARC', 'SKCM', 'STAD', 'THCA', 'UCEC'], dtype=object)
for cancer in run.cancers:
print cancer
try:
f = '{}{}/bcr/biotab/clin/'.format(path, cancer.lower())
files = pd.read_table(f + 'MANIFEST.txt', sep=' ', header=None)
if not os.path.isdir(out + '/' + cancer):
os.makedirs(out + '/' + cancer)
for g in files[1]:
p = f + g
o = out + '/' + cancer + '/' + g
!curl --cacert $PATH_TO_CACERT $p > $o
except:
print 'FAIL: Make sure path to cacert.pem is set!'
clear_output()
for f in os.listdir(out):
path = out + '/' + f + '/'
for f in os.listdir(path):
if 'nationwidechildrens' in f:
os.rename(path + f, path + '_'.join(f.split('_')[1:]))