Download a copy of a generic cacert.pem here.
PATH_TO_CACERT = '/cellar/users/agross/cacert.pem'
cd ../src
/cellar/users/agross/TCGA_Code/TCGA/src
from Processing.Imports import *
from IPython.display import clear_output
params = pd.read_table('../global_params.txt', header=None, squeeze=True,
index_col=0)
run_path = '{}/Firehose__{}/'.format(params.ix['OUT_PATH'], params.ix['RUN_DATE'])
run = get_run(run_path, 'Run_' + params.ix['VERSION'])
path = 'https://tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/'
out = params['OUT_PATH'] + '/Followup'
if not os.path.isdir(out):
os.makedirs(out)
for cancer in run.cancers:
print cancer
try:
f = '{}{}/bcr/biotab/clin/'.format(path, cancer.lower())
files = pd.read_table(f + 'MANIFEST.txt', sep=' ', header=None)
if not os.path.isdir(out + '/' + cancer):
os.makedirs(out + '/' + cancer)
for g in files[1]:
p = f + g
o = out + '/' + cancer + '/' + g
!curl --cacert $PATH_TO_CACERT $p > $o
except:
print 'FAIL: Make sure path to cacert.pem is set!'
clear_output()
for f in os.listdir(out):
path = out + '/' + f + '/'
for f in os.listdir(path):
if 'nationwidechildrens' in f:
os.rename(path + f, path + '_'.join(f.split('_')[1:]))