%pylab inline
Populating the interactive namespace from numpy and matplotlib
cd ../src
/cellar/users/agross/TCGA_Code/TCGA/src
from Processing.Imports import *
from Figures.Helpers import *
from Figures.Survival import *
import Data.Firehose as FH
params = pd.read_table('../global_params.txt', header=None, squeeze=True,
index_col=0)
run_path = '{}/Firehose__{}/'.format(params.ix['OUT_PATH'], params.ix['RUN_DATE'])
run = get_run(run_path, 'Run_' + params.ix['VERSION'])
cancers = {c: run.load_cancer(c) for c in run.cancers}
from Data.ProcessClinicalDataPortal import update_clinical_object
clinical = {c: cancer.load_clinical() for c, cancer in cancers.iteritems()}
for cancer,clin in clinical.iteritems():
try:
path = params['OUT_PATH'] + '/Followup/' + cancer + '/'
clinical[cancer] = update_clinical_object(clin, path)
except:
print cancer
do_nothing = True
Data/ProcessClinicalDataPortal.py:37: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_index,col_indexer] = value instead f['vitalstatus'] = (f['daystodeath'].isnull() == True)
surv = pd.DataFrame({c: v.survival.survival for c,v in clinical.iteritems()})
surv = surv.stack()
codes = pd.Series({s[0]: s[1] for s in surv[:,'days'].index})
codes = codes.groupby(level=0).first()
codes.name = 'cancer'
surv.index = surv.index.droplevel(2)
surv = surv.groupby(level=[0,1]).first()
surv = surv.ix[ti(surv[:,'days'] >= 7)]
surv_5y = pd.DataFrame({c: v.survival.survival_5y for c,v in clinical.iteritems()})
surv_5y = surv_5y.stack()
surv_5y.index = surv_5y.index.droplevel(2)
surv_5y = surv_5y.groupby(level=[0,1]).first()
#bad = ((surv_5y.unstack()['days'] < 8) * (surv_5y.unstack()['event'] == 1)) == False
#surv_5y = surv_5y.unstack().ix[bad].stack()
surv_5y = surv_5y.ix[ti(surv[:,'days'] >= 7)]
for c in clinical.values():
c = c.artificially_censor(10)
surv_10y = pd.DataFrame({c: v.survival.survival_10y for c,v in clinical.iteritems()})
surv_10y = surv_10y.stack()
surv_10y.index = surv_10y.index.droplevel(2)
surv_10y = surv_10y.groupby(level=[0,1]).first()
surv_10y = surv_10y.ix[ti(surv[:,'days'] >= 7)]
for c in clinical.values():
c = c.artificially_censor(3)
surv_3y = pd.DataFrame({c: v.survival.survival_3y for c,v in clinical.iteritems()})
surv_3y = surv_3y.stack()
surv_3y.index = surv_3y.index.droplevel(2)
surv_3y = surv_3y.groupby(level=[0,1]).first()
surv_3y = surv_3y.ix[ti(surv[:,'days'] >= 7)]
all_mut = pd.read_csv(params['OUT_PATH'] + '/MAFs/mega_maf.csv', index_col=0)
all_mut = all_mut[all_mut.Tumor_Sample_Barcode.apply(lambda s: s[13:16]) == '01A']
all_mut.Variant_Classification.value_counts()
Missense_Mutation 727728 Silent 287897 Nonsense_Mutation 56861 Frame_Shift_Del 47950 Splice_Site 32484 Frame_Shift_Ins 28901 RNA 26502 In_Frame_Del 11978 IGR 3229 In_Frame_Ins 2288 Nonstop_Mutation 854 5'Flank 707 Translation_Start_Site 513 3'UTR 216 Intron 166 5'UTR 31 Indel 9 De_novo_Start_InFrame 1 De_novo_Start_OutOfFrame 1 dtype: int64
all_mut = pd.read_csv(params['OUT_PATH'] + '/MAFs/mega_maf.csv', index_col=0)
all_mut = all_mut[all_mut.Tumor_Sample_Barcode.apply(lambda s: s[13:16]) == '01A']
non_coding = ['Silent','RNA','IGR',"5'Flank", "3'UTR",'Intron',"5'UTR"]
all_mut = all_mut[all_mut.Variant_Classification.isin(non_coding)==False]
all_mut.Tumor_Sample_Barcode = all_mut.Tumor_Sample_Barcode.map(lambda s: s[:12])
all_mut = all_mut.groupby(['Tumor_Sample_Barcode', 'Hugo_Symbol']).size()
all_mut = all_mut.unstack().T.fillna(0)
/cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/core/generic.py:1642: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_index,col_indexer] = value instead self[name] = value
mut = {c: cancer.load_data('Mutation') for c, cancer in cancers.iteritems()
if run.sample_matrix.MAF.ix[c] > 0}
hit_mat = pd.concat([m.df.unstack() for c,m in mut.iteritems()], axis=1).fillna(0)
hit_mat = hit_mat.groupby(axis=1, level=0).first()
hit_mat = pd.concat([hit_mat, all_mut], axis=1)
hit_mat = hit_mat.groupby(axis=1, level=0).sum() > 0
cn_all = {}
for c in codes.unique():
try:
cn_all[c] = FH.get_gistic_gene_matrix(run.data_path, c)
except:
print c
cn_all = pd.concat(cn_all.values(), axis=1)
cn_all = cn_all.groupby(axis=1, level=0).first()
non_embargo = ['AML','BRCA','KIRC','COAD','READ','COADREAD','SKCM','GBM',
'HNSC','LUAD','LUSC','OV','STAD','THCA','UCEC']
#codes = codes[codes.isin(non_embargo)]
c2 = codes[codes.isin(ti(codes.value_counts() > 30))]
survival_and_stats(c2, surv_10y, upper_lim=10, figsize=(10,12))
fig, axs = subplots(2,1, figsize=(8,8))
ax = axs[0]
t = get_surv_fit(surv, codes)
t = t.sort([('5y Survival','Surv')])
t['Stats'].plot(kind='bar', ax=ax)
prettify_ax(ax)
ax2 = axs[1]
t = get_surv_fit(surv, codes)
tt = t['5y Survival'].sort('Surv')
b = (tt['Surv']).plot(kind='bar', ax=ax2, color='grey',
yerr=[tt.Surv-tt.Lower, tt.Upper-tt.Surv], ecolor='black')
ax2.set_ylabel('5Y Survival')
ax2.set_yticks([0, .5, 1.])
prettify_ax(ax2)
fig.tight_layout()
fys = get_surv_fit(surv, codes)['5y Survival']['Surv'].order()
nn = codes.isin(true_index((fys > .1) * (fys < .83)))
fys
GBM 0.06 LAML 0.23 OV 0.31 STAD 0.35 LUAD 0.35 LIHC 0.36 BLCA 0.39 HNSC 0.44 LUSC 0.45 READ 0.47 ACC 0.48 SARC 0.58 SKCM 0.61 KIRC 0.62 COAD 0.63 LGG 0.65 CESC 0.65 KIRP 0.68 UCEC 0.79 BRCA 0.81 DLBC 0.84 KICH 0.87 THCA 0.89 PRAD 0.97 ESCA NaN PAAD NaN Name: Surv, dtype: float64
age = pd.concat([c.clinical.age for c in clinical.values()])
age = age.groupby(level=0).first()
age.name = 'age'
survival_and_stats((age / 15).round(), surv_5y)
survival_and_stats(age >= 85, surv_5y)
get_cox_ph(surv_5y, age >= 85, print_desc=True);
coef exp(coef) se(coef) z p feature 0.773 2.17 0.139 5.55 2.9e-08 Likelihood ratio test=24.4 on 1 df, p=7.72e-07 n= 7294, number of events= 1959
exp(.773), exp(.773)-(exp(.773-.139))
(2.1662552812206535, 0.28111921870672885)
year = pd.concat([c.clinical.yearofinitialpathologicdiagnosis for c in clinical.values()])
year = year.groupby(level=0).first().replace('[Discrepancy]', nan).astype(float).dropna()
survival_and_stats(year <= 2000, surv_5y)
get_cox_ph_ms(surv_5y, year < 2000, [codes])
/cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/core/indexing.py:344: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_index,col_indexer] = value instead self.obj[item] = s
LR 5.92e-09 feature_p NaN fmla Surv(days, event) ~ cancer + cancer:feature\n hazzard NaN dtype: object
r_status = pd.concat([c.clinical.residualtumor for c in clinical.values() if 'residualtumor'
in c.clinical])
r_status = r_status.replace(['[Not Evaluated]', '[Unknown]', 'RX'])
r_status = r_status.str.lower()
r_status = r_status.groupby(level=0).first()
survival_and_stats(r_status.dropna(), surv_5y)
get_cox_ph(surv_5y, r_status=='r2', print_desc=True);
coef exp(coef) se(coef) z p feature 1.05 2.86 0.18 5.82 5.8e-09 Likelihood ratio test=25.1 on 1 df, p=5.57e-07 n= 3555, number of events= 809
exp(1.05), exp(1.05)-(exp(1.05-.18))
(2.8576511180631639, 0.47074026453888695)
margin_status = pd.concat([c.clinical.marginstatus for c in clinical.values() if 'marginstatus' in c.clinical])
margin_status = margin_status.groupby(level=0).first()
survival_and_stats(margin_status.dropna(), surv_5y)
s = {}
for can,c in clinical.iteritems():
if 'pathologicstage' in c.stage:
s[can] = c.stage.pathologicstage
elif 'clinicalstage' in c.stage:
s[can] = c.stage.clinicalstage
stage = pd.concat(s.values())
stage = stage.groupby(level=0).first()
stage = stage.dropna().map(lambda s: s.replace('A','').replace('B','').replace('C',''))
stage = stage.dropna().map(lambda s: s.replace('1','').replace('2',''))
stage = stage.ix[age.index].fillna('missing')
stage = stage.str.lower()
stage = stage.replace(['stage x','stage tis','i or ii nos', 'stage 0'], nan).dropna()
stage.name = 'stage'
survival_and_stats(stage.ix[true_index(nn)].dropna(), surv_5y)
get_cox_ph(surv_5y, stage == 'stage iv', print_desc=True);
coef exp(coef) se(coef) z p feature 0.712 2.04 0.0625 11.4 0 Likelihood ratio test=110 on 1 df, p=0 n= 7264, number of events= 1952
exp(.712), exp(.712)-(exp(.712-.0625))
(2.0380633118599061, 0.1234800138578156)
mets = pd.concat([c.stage.pathologicm for can,c in clinical.iteritems()
if 'pathologicm' in c.stage])
mets = mets.groupby(level=0).first()
mets = pd.concat([c.stage.pathologicm for can,c in clinical.iteritems() if 'pathologicm' in c.stage])
mets = mets.groupby(level=0).first()
mets = mets.dropna().map(lambda s: s.replace('a','').replace('b','').replace('c',''))
mets = mets.replace(['[Unknown]','MX'], nan).dropna()
mets = mets=='M1'
mets.name = 'metastasis'
survival_and_stats(mets, surv_5y)
survival_and_stats(combine(mets, stage=='stage iv'), surv_5y)
new_p53_calls = pd.read_csv('../Extra_Data/p53_calls_pancancer.csv', header=None, index_col=0,
squeeze=True)
p53_new = (hit_mat.ix['TP53'].combine_first(new_p53_calls)) > 0
filters = pd.concat([age >= 85, mets, r_status=='r2', stage=='stage iv', nn==False,
codes.isin(['ACC'])], 1)
keepers = true_index((filters > 0).sum(1) == 0)
keepers = keepers.intersection(surv_5y.unstack().index)
keepers = keepers.intersection(p53_new.index).intersection(cn_all.columns)
keepers = keepers.intersection(codes.index)
codes.ix[keepers].value_counts().shape
(18,)
survival_and_stats(codes.ix[keepers].order(), surv_10y, upper_lim=10, figsize=(10,10))
plt.savefig('/cellar/users/agross/figures/pancan_supp.pdf', transparent=True)
p53_all = p53_new > 0
p53_mut = p53_all.ix[keepers]
p53_all.name = 'TP53'
p53_mut.name = 'TP53'
survival_and_stats(p53_mut, surv_5y)
get_cox_ph_ms(surv_5y, p53_mut, [codes, age, stage], interactions=None)
LR 0.0994 feature_p 0.1 fmla Surv(days, event) ~ feature + cancer + age + s... hazzard 1.15 dtype: object
pct_del = (cn_all < 0).sum() / (1.*len(cn_all))
pct_amp = (cn_all > 0).sum() / (1.*len(cn_all))
pct_altered = pct_amp + pct_del
pct_altered.name = 'CIN_pct_altered'
survival_and_stats(to_quants(pct_altered, q=.33), surv)
del_3p_all = cn_all.ix['3p14.2'].median().round()
del_3p_all = (cn_all.ix[[i for i in cn_all.index if '3p14.2' in i[0]]] < 0).mean()
del_3p_all = del_3p_all > .5
del_3p_all = del_3p_all.map({True:-1, False:0})
del_3p = del_3p_all.ix[keepers].dropna()
del_3p.name = '3p_cna'
del_3p_all.name = '3p_cna'
survival_and_stats(del_3p, surv_5y)
get_cox_ph_ms(surv_5y, del_3p < 0, [codes, age, stage, pct_altered], interactions=None)
LR 0.0342 feature_p 0.0336 fmla Surv(days, event) ~ feature + cancer + age + s... hazzard 1.21 dtype: object
old = age >= 75
codes.name = 'codes'
fmla = 'Surv(days, event) ~ feature + strata(codes)'
fig, axs = subplots(2, 1, figsize=(3.5,3.5), sharey=True, sharex=True)
old.name = 'age_over_75'
del_3p.name = 'del_3p'
del_3p_all.name = 'del_3p'
pct_altered.name = 'CIN_pct_altered'
fmla = 'Surv(days, event) ~ del_3p + CIN_pct_altered + age + stage + codes'
fmla2 = 'Surv(days, event) ~ CIN_pct_altered + age + stage + codes'
k2 = keepers.diff(codes[codes == 'HNSC'].index)
k2 = codes[codes != 'HNSC'].index
pts = [keepers, ti(p53_mut==1), ti(p53_mut==0)]
pts = [ti(p53_mut==1).intersection(k2),
ti(p53_mut==0).intersection(k2)]
#d2 = k2.intersection(hit_mat.columns)
color_list = ['#a1d99b','#9ecae1']
for i, pt in enumerate(pts):
ax = axs[i]
m1 = get_cox_ph(surv_5y, covariates=[del_3p_all.ix[pt].dropna() < 0, pct_altered, stage, age, codes],
formula=fmla, print_desc=False, interactions=False);
m2 = get_cox_ph(surv_5y, covariates=[del_3p_all.ix[pt].dropna() < 0, pct_altered, stage, age, codes],
formula=fmla2, print_desc=False, interactions=False);
print LR_test(m1, m2)
ci = convert_robj(robjects.r.summary(m1)[7])
haz = ci['exp(coef)']
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color=color_list[i],
edgecolors=['black'], zorder=10)
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color=color_list[i])
ax.axvline(1, ls='--', color='black')
ax.set_xscale('log')
ax.set_xbound(.8,1.5)
ax.set_ybound(-.5,len(ci.index) - .5)
ax.set_xticks([1])
ax.set_xticklabels([1])
ax.set_yticks(range(len(ci.index)))
ax.set_yticklabels(ci.index)
ax.set_ybound(-.5, 1.5)
prettify_ax(ax)
axs[1].set_xlabel('Hazard Ratio')
fig.tight_layout()
#fig.savefig('/cellar/users/agross/figures/fig2c_alt.pdf', transparent=True)
0.0177228968666 0.716383050501
combo = combine(del_3p < 0, p53_mut>0)
combo = combo.ix[keepers].dropna()
combo.name = 'TP53 / 3p14'
len(combo), len(combo.ix[codes[codes != 'HNSC'].index].dropna())
(4583, 4404)
venn_pandas(del_3p_all < 0, p53_all>0)
<matplotlib_venn._venn2.Venn2 instance at 0x1a4053f8>
len(combine(del_3p_all.ix[del_3p_all.index.diff(true_index(codes=='HNSC'))].dropna() < 0,
p53_all>0))
7081
fisher_exact_test(del_3p_all.ix[del_3p_all.index.diff(true_index(codes=='HNSC'))].dropna() < 0,
p53_all>0)
odds_ratio 2.47e+00 p 1.96e-62 dtype: float64
venn_pandas(del_3p.ix[del_3p.index.diff(true_index(codes=='HNSC'))].dropna() < 0,
p53_mut>0)
<matplotlib_venn._venn2.Venn2 instance at 0x1d75a950>
len(combine(del_3p.ix[combo.index.diff(true_index(codes=='HNSC'))] < 0, p53_mut>0))
4404
fisher_exact_test(del_3p.ix[combo.index.diff(true_index(codes=='HNSC'))] < 0, p53_mut>0)
odds_ratio 2.00e+00 p 3.31e-26 dtype: float64
combo_all = combine(p53_all>0, del_3p_all<0)
combo_all.name = 'TP53 / 3p14'
ct = pd.crosstab(combo_all, stage[stage != 'M']=='stage iv').T
(ct.ix[True] / (1.*ct.sum())).order().plot(kind='bar');
plt.ylabel('% of Patients in Stage IV')
plt.xlabel('');
combo_all = combine(p53_all>0, del_3p_all<0)
fisher_exact_test(combo_all=='both', stage[stage != 'M']=='stage iv')
odds_ratio 2.45e+00 p 4.16e-20 dtype: float64
len(combo_all)
7444
Univarite model, all patients
get_cox_ph(surv_5y, combo=='both', print_desc=True);
coef exp(coef) se(coef) z p feature 0.505 1.66 0.0767 6.59 4.3e-11 Likelihood ratio test=39.5 on 1 df, p=3.21e-10 n= 4583, number of events= 955
Multivariate model, all patients
get_cox_ph_ms(surv_5y, combo=='both', [codes, age, stage], interactions=None)
LR 0.000351 feature_p 0.000271 fmla Surv(days, event) ~ feature + codes + age + st... hazzard 1.4 dtype: object
codes.ix[keepers].value_counts().shape
(18,)
Univariate model, HNSCC excluded
get_cox_ph(surv_5y, combo.ix[true_index(codes!='HNSC')].dropna()=='both', print_desc=True);
coef exp(coef) se(coef) z p feature 0.452 1.57 0.082 5.52 3.4e-08 Likelihood ratio test=27.8 on 1 df, p=1.35e-07 n= 4404, number of events= 904
cox((combo[combo.isin(['TP53','both'])].dropna()=='both').ix[true_index(codes!='HNSC')].dropna(),
surv_5y).ix['LR'].ix['p']
0.001692027203569757
pats = true_index(codes!='HNSC').intersection(true_index(age < 75))
log_rank((combo[combo.isin(['TP53','both'])]=='both').ix[pats].dropna(), surv_5y).ix['p']
0.0018420748458692609
cox((combo[combo.isin(['TP53','both'])]=='both').ix[pats].dropna(), surv_5y)
hazard exp(coef) 1.41 exp(-coef) 0.71 lower .95 1.13 upper .95 1.75 LR stat 9.48 df 1.00 p 0.00 concordance stat 0.56 se 0.01 dtype: float64
Multivariate model, HNSCC excluded
get_cox_ph_ms(surv_5y, (combo=='both').ix[true_index(codes!='HNSC')].dropna(),
[codes, stage, age], interactions=False)
LR 0.00507 feature_p 0.00431 fmla Surv(days, event) ~ feature + codes + stage + ... hazzard 1.33 dtype: object
f = (combo[combo.isin(['TP53','both'])]=='both').ix[true_index(codes!='HNSC')].dropna()
pts = f.index
cox(f, surv_5y).ix['LR'].ix['p']
0.001692027203569757
fmla = 'Surv(days, event) ~ feature + age + stage + strata(codes)'
fmla2 = 'Surv(days, event)~ age + stage + strata(codes)'
m1 = get_cox_ph(surv_3y, f, [codes, age.ix[pts], stage], formula=fmla, print_desc=True)
m2 = get_cox_ph(surv_3y, f, [codes, age.ix[pts], stage], formula=fmla2, print_desc=True)
LR_test(m1, m2)
coef exp(coef) se(coef) z p feature 0.402 1.494 0.1308 3.07 2.1e-03 age 0.395 1.485 0.0759 5.21 1.9e-07 stagestage i -1.665 0.189 0.5991 -2.78 5.5e-03 stagestage ii -1.389 0.249 0.6003 -2.31 2.1e-02 stagestage iii -0.716 0.489 0.5959 -1.20 2.3e-01 Likelihood ratio test=69.5 on 5 df, p=1.33e-13 n= 1748, number of events= 307 coef exp(coef) se(coef) z p age 0.387 1.473 0.0756 5.12 3.0e-07 stagestage i -1.680 0.186 0.5994 -2.80 5.1e-03 stagestage ii -1.396 0.248 0.5997 -2.33 2.0e-02 stagestage iii -0.724 0.485 0.5960 -1.21 2.2e-01 Likelihood ratio test=60.1 on 4 df, p=2.7e-12 n= 1748, number of events= 307
0.0022792308568612249
cc = array(colors_th)
fig, ax = subplots(figsize=(5,3))
draw_survival_curve(combo, surv_5y, ax=ax,
colors=cc[[2,0,3,1]], ms=30, alpha=.7)
ax.get_legend().set_visible(False)
ax.set_ybound(0,1)
ax.set_xbound(0,5)
prettify_ax(ax)
combo.ix[true_index(codes!='HNSC')].dropna().ix[surv_5y.unstack().index].dropna().value_counts()
neither 1955 TP53 1046 both 726 del_3p 677 dtype: int64
k2 = combo.index.intersection(true_index(codes!='HNSC'))
fig, ax = subplots(figsize=(4,3))
draw_survival_curve(combo.ix[k2], surv_5y, ax=ax,
colors={'both': cc[0], 'TP53': cc[1], 'del_3p': cc[2], 'neither':cc[3]},
ms=30, alpha=.7)
ax.get_legend().set_visible(False)
ax.set_ybound(0,1)
ax.set_xbound(0,5)
prettify_ax(ax)
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/fig3e.pdf', transparent=True)
get_surv_fit(surv, combo, time_cutoff=3)
Stats | Median Survival | 3y Survival | ||||||
---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | |
both | 823 | 246 | 4.78 | 4.11 | 5.72 | 0.62 | 0.58 | 0.67 |
del_3p | 705 | 182 | 8.12 | 6.42 | 9.48 | 0.80 | 0.76 | 0.83 |
neither | 1994 | 433 | 8.20 | 6.96 | 10.60 | 0.77 | 0.75 | 0.80 |
TP53 | 1061 | 272 | 5.56 | 4.85 | 7.17 | 0.75 | 0.72 | 0.79 |
4 rows × 8 columns
fmla = 'Surv(days, event) ~ feature + age + stage + strata(codes)'
m = {'TP53':'c', 'both':'d', 'neither':'a', 'del_3p':'b'}
cm = combo.map(m).ix[true_index(codes != 'HNSC')]
f = get_cox_ph(surv_3y, cm, interactions=False)
ci = com.convert_robj(robjects.r.summary(f)[7])
ci.index = ['3p','TP53','both']
n = ci.ix[0]*0 +1
n.name = 'neither'
ci = ci.append(n)
ci = ci.ix[['neither','3p', 'TP53','both']]
ci_uni = ci
f = get_cox_ph(surv_3y, cm, [codes, age, stage],
formula=fmla, interactions=False)
ci = com.convert_robj(robjects.r.summary(f)[7])
ci = ci.ix[:3]
ci.index = ['3p','TP53','both']
n = ci.ix[0]*0 +1
n.name = 'neither'
ci = ci.append(n)
ci = ci.ix[['neither','3p', 'TP53','both']]
ci_full = ci
fig, ax = subplots(1,1, figsize=(3,2.5))
ci = ci_full
haz = ci['exp(coef)']
b = haz.plot(kind='bar', ax=ax, color=cc[[3,2,1,0]],
yerr=[haz - ci['lower .95'], ci['upper .95'] - haz], ecolor='black')
prettify_ax(ax)
ax.set_ylabel('hazard')
ax.set_ylim(0)
ax.axhline(y=1, ls='--', lw=3, color='black', alpha=.5)
fig.tight_layout()
cm.value_counts().sum()
4404
fig, ax = subplots(1,1, figsize=(2.3,3))
ci = ci_full.ix[::-1]
haz = ci['exp(coef)']
color_list = colors_th
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color=color_list[j],
edgecolors=['black'], zorder=10, )
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color=color_list[j])
ax.axvline(1, ls='--', color='black')
ax.set_xscale('log')
ax.set_xbound(.7,2.)
ax.set_ybound(-.5,len(ci.index) - .5)
ax.set_xticks([.75, 1, 1.25, 1.5, 2])
ax.set_xticklabels([.75, 1, 1.25, 1.5, 2])
ax.set_yticks(range(len(ci.index)))
ax.set_yticklabels('')
ax.set_ybound(-.5, 3.5)
prettify_ax(ax)
ax.set_xlabel('3 Year Hazard Ratio')
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/fig3f.pdf', transparent=True)
fig, axs = subplots(1,2, figsize=(5,2.5), sharey=True)
for i,ci in enumerate([ci_uni, ci_full]):
ax = axs[i]
haz = ci['exp(coef)']
b = haz.plot(kind='bar', ax=ax, color=cc[[3,2,1,0]],
yerr=[haz - ci['lower .95'], ci['upper .95'] - haz], ecolor='black')
prettify_ax(ax)
ax.set_ylabel('hazard')
ax.set_ylim(0)
ax.axhline(y=1, ls='--', lw=3, color='black', alpha=.5)
fig.tight_layout()
from itertools import combinations
c = list(combinations(cm.unique(),2))[0]
cm = combo.ix[true_index(codes!='HNSC')].dropna()
sig = pd.Series({c: get_cox_ph_ms(surv_5y, cm[cm.isin(c)], interactions=False)['LR']
for c in combinations(cm.unique(),2)})
sig
(TP53, both) 1.69e-03 (TP53, del_3p) 1.28e-02 (TP53, neither) 3.14e-02 (both, del_3p) 8.11e-07 (both, neither) 1.64e-07 (neither, del_3p) 6.54e-01 dtype: float64
cm = combo.ix[true_index(codes!='HNSC')].dropna()
sig = pd.Series({c: get_cox_ph_ms(surv_3y, cm[cm.isin(c)], interactions=False)['LR']
for c in combinations(cm.unique(),2)})
sig
(TP53, both) 2.24e-05 (TP53, del_3p) 1.49e-01 (TP53, neither) 9.26e-01 (both, del_3p) 2.22e-07 (both, neither) 3.85e-06 (neither, del_3p) 1.20e-01 dtype: float64
cm = combo.ix[true_index(codes!='HNSC')].dropna()
sig = pd.Series({c: get_cox_ph_ms(surv_3y, cm[cm.isin(c)], [codes, age], interactions=False)['LR']
for c in combinations(cm.unique(),2)})
sig
(TP53, both) 1.66e-03 (TP53, del_3p) 8.55e-01 (TP53, neither) 4.55e-01 (both, del_3p) 8.83e-02 (both, neither) 2.13e-04 (neither, del_3p) 6.54e-01 dtype: float64
cm = combo.ix[true_index(codes!='HNSC')].dropna()
sig = pd.Series({c: get_cox_ph_ms(surv_5y, cm[cm.isin(c)], [codes, age, stage], interactions=False)['LR']
for c in combinations(cm.unique(),2)})
sig
(TP53, both) 5.96e-03 (TP53, del_3p) 4.78e-01 (TP53, neither) 8.02e-01 (both, del_3p) 3.39e-01 (both, neither) 2.31e-04 (neither, del_3p) 5.80e-01 dtype: float64
cm = combo.ix[true_index(codes!='HNSC')].dropna()
sig = pd.Series({c: get_cox_ph_ms(surv_3y, cm[cm.isin(c)], [codes, age, stage], interactions=False)['LR']
for c in combinations(cm.unique(),2)})
sig
(TP53, both) 2.60e-03 (TP53, del_3p) 8.35e-01 (TP53, neither) 4.38e-01 (both, del_3p) 6.06e-02 (both, neither) 7.49e-04 (neither, del_3p) 7.94e-01 dtype: float64
sig.ix[0]
0.0026044624850724524
f = {}
for cancer in codes.ix[combo.index].unique():
try:
c = combo=='both'
c = c.ix[ti(codes==cancer)]
f[cancer] = get_cox_ph_ms(surv_3y, c, [age, old], interactions=False)
except:
print 'fail'
c = combo.ix[ti(codes=='HNSC')]
get_cox_ph_ms(surv_3y, c, [age, old], interactions=False)
LR 0.00501 feature_p NaN fmla Surv(days, event) ~ feature\n hazzard NaN dtype: object
pd.DataFrame(f).T.sort('LR')
LR | feature_p | fmla | hazzard | |
---|---|---|---|---|
BRCA | 0.0017 | 0.000705 | Surv(days, event) ~ feature + age\n | 3.25 |
HNSC | 0.00453 | 0.00235 | Surv(days, event) ~ feature\n | 2.72 |
OV | 0.0124 | 0.00947 | Surv(days, event) ~ feature + age\n | 1.77 |
LUSC | 0.0237 | 0.00996 | Surv(days, event) ~ feature\n | 0.547 |
LAML | 0.0239 | 0.0118 | Surv(days, event) ~ feature + age + age_over_75\n | 3.28 |
KIRC | 0.0252 | 0.00719 | Surv(days, event) ~ feature + age\n | 4.09 |
LUAD | 0.0468 | 0.0344 | Surv(days, event) ~ feature\n | 1.67 |
LIHC | 0.155 | 0.044 | Surv(days, event) ~ feature\n | 4.99 |
LGG | 0.158 | 0.111 | Surv(days, event) ~ feature + age + age_over_75\n | 2.67 |
COAD | 0.352 | 0.299 | Surv(days, event) ~ feature\n | 1.91 |
READ | 0.455 | 0.416 | Surv(days, event) ~ feature + age\n | 2.57 |
UCEC | 0.525 | 0.511 | Surv(days, event) ~ feature + age\n | 1.39 |
SKCM | 0.932 | 0.931 | Surv(days, event) ~ feature + age\n | 1.09 |
CESC | 1 | NaN | Surv(days, event) ~ feature + age + age_over_75\n | NaN |
BLCA | 1 | 0.709 | Surv(days, event) ~ feature\n | 0.796 |
KIRP | 1 | NaN | Surv(days, event) ~ feature\n | 1 |
SARC | 1 | NaN | Surv(days, event) ~ feature\n | NaN |
STAD | 1 | 0.732 | Surv(days, event) ~ feature\n | 1.13 |
18 rows × 4 columns
fig, axs = subplots(1, 7, figsize=(10,3), sharey=True)
cc = array(colors_th)
for i,c in enumerate(['HNSC','BRCA','LUAD', 'OV','KIRC','LGG','LUSC']):
draw_survival_curve(combo[combo.isin(['both','TP53'])].ix[true_index(codes==c)].dropna(),
surv_3y, ax=axs[i],
colors={'both': cc[0], 'TP53': cc[1], 'del_3p': cc[2], 'neither':cc[3]})
axs[i].get_legend().set_visible(False)
axs[i].set_ylim(.5,1.05)
axs[i].set_xticks([0,1,2,3])
prettify_ax(axs[i])
axs[i].annotate(c, (2, .42))
fig.tight_layout()