import NotebookImport from Imports import * p53_mut = mut.features.ix['TP53'].ix[keepers_o].dropna() del_3p = cn.features.ix['Deletion'].ix['3p14.2'].ix[0].ix[keepers_o].dropna() combo = combine(p53_mut==1, del_3p==-1) combo = combo.map({'Lesion':'b', 'neither':'a', 'TP53':'c', 'both':'d'}) two_hit = combo=='d' clin_stage = clinical.processed.stage.ix[keepers_o].dropna() clin_stage.name = 'Clinical_Stage' lymph_stage = clinical.processed.lymph_stage.replace('n2', 'n2+').replace('n3', 'n2+').dropna() lymph_stage = lymph_stage == 'n2+' lymph_stage = lymph_stage.ix[keepers_o].fillna('nx') s4 = clin_stage == 'Stage iv' s4.name = 'Stage_IV' drinker = clinical.processed.drinker current_smoker = clinical.processed.smoker.dropna() == 'current smoker' current_smoker.name = 'current_smoker' non_smoker = clinical.processed.smoker.dropna() == 'lifelong non-smoker' non_smoker.name = 'non_smoker' recent_smoker = clinical.processed.smoker.dropna().isin(['current reformed smoker for < or = 15 years', 'current smoker']) recent_smoker.name = 'recent_smoker' spread = clinical.processed.spread spread = spread.replace({'yes':True, 'no':False}) invasion = clinical.processed.invasion invasion = invasion.replace({'yes':True, 'no':False}) year = clinical.processed.year old = clinical.processed.old_age clinical_vars = pd.concat([drinker, current_smoker, non_smoker, recent_smoker, spread, invasion, year, old, s4, lymph_stage], 1).fillna(' missing') clinical_vars = clinical_vars.ix[keepers_o].dropna() m2 = get_cox_ph(surv, two_hit, covariates=clinical_vars, print_desc=True, interactions=False); m3 = get_cox_ph(surv, covariates=clinical_vars[['drinker','recent_smoker','spread','invasion','year', 'old_age']], print_desc=True, interactions=False); LR_test(m2, m3) cc = [colors[4], 'grey','grey', '#9ecae1','#9ecae1', 'grey','grey', '#9ecae1','#9ecae1', 'grey', '#9ecae1','#9ecae1'] fig, ax = subplots(figsize=(6,5)) ci = convert_robj(robjects.r.summary(m2)[7]) haz = ci['exp(coef)'] for j,h in enumerate(haz): ax.scatter(h, j, marker='s', s=100, color=cc[j], edgecolors=['black'], zorder=10) ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))), lw=3, ls='-', marker='o', dash_joinstyle='bevel', color=cc[j]) ax.axvline(1, ls='--', color='black') ax.set_xscale('log') ax.set_xbound(.5,5) ax.set_ybound(-.5,len(ci.index) - .5) ax.set_xticks([.5, 1, 1.5, 2, 4]) ax.set_xticklabels([.5, 1, 1.5, 2, 4]) ax.set_yticks(range(len(ci.index))) ax.set_yticklabels(ci.index) ax.set_xlabel('Hazard Ratio') prettify_ax(ax) fig.tight_layout() fig.savefig(FIGDIR + 'mv_pb.pdf', tranparent=True) clinical_vars = pd.concat([current_smoker, non_smoker, recent_smoker, spread, invasion, year, old, s4, lymph_stage], 1).fillna(' missing') clinical_vars_y = clinical_vars.ix[ti(age < 75)].ix[keepers_o].dropna() clinical_vars_y = clinical_vars_y.replace(' missing', 'zzz') m1 = get_cox_ph(surv, covariates=clinical_vars_y, print_desc=True, interactions=False); m2 = get_cox_ph(surv, two_hit, covariates=clinical_vars_y, print_desc=True, interactions=False); fig, ax = subplots(figsize=(6,4)) ci = convert_robj(robjects.r.summary(m2)[7]) haz = ci['exp(coef)'] for j,h in enumerate(haz): ax.scatter(h, j, marker='s', s=100, color=cc[j], edgecolors=['black'], zorder=10) ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))), lw=3, ls='-', marker='o', dash_joinstyle='bevel', color=cc[j]) ax.axvline(1, ls='--', color='black') ax.set_xscale('log') ax.set_xbound(.5,12) ax.set_ybound(-.5,len(ci.index) - .5) ax.set_xticks([.5, 1, 1.5, 2, 4,8]) ax.set_xticklabels([.5, 1, 1.5, 2, 4,8]) ax.set_yticks(range(len(ci.index))) ax.set_yticklabels(ci.index) ax.set_xlabel('Hazard Ratio') prettify_ax(ax) fig.tight_layout() fig.savefig(FIGDIR + 'mv_pa.pdf', tranparent=True) clinical_vars_y = clinical_vars.ix[ti(age >= 75)].ix[keepers_o].dropna() clinical_vars_y = clinical_vars_y.replace(' missing', 'zzz') m1 = get_cox_ph(surv, covariates=clinical_vars_y, print_desc=True, interactions=False); m2 = get_cox_ph(surv, two_hit, covariates=clinical_vars_y, print_desc=True, interactions=False); fig, ax = subplots(figsize=(6,4)) ci = convert_robj(robjects.r.summary(m2)[7]) haz = ci['exp(coef)'] for j,h in enumerate(haz): ax.scatter(h, j, marker='s', s=100, color=cc[j], edgecolors=['black'], zorder=10) ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))), lw=3, ls='-', marker='o', dash_joinstyle='bevel', color=cc[j]) ax.axvline(1, ls='--', color='black') ax.set_xscale('log') ax.set_xbound(.5,12) ax.set_ybound(-.5,len(ci.index) - .5) ax.set_xticks([.5, 1, 1.5, 2, 4,8]) ax.set_xticklabels([.5, 1, 1.5, 2, 4,8]) ax.set_yticks(range(len(ci.index))) ax.set_yticklabels(ci.index) ax.set_xlabel('Hazard Ratio') prettify_ax(ax) fig.tight_layout() pts = ti(non_smoker.ix[keepers_o] != True).intersection(ti(old=='Age < 75')) clinical_vars_y = clinical_vars.ix[pts].fillna(' missing') m1 = get_cox_ph(surv, covariates=clinical_vars_y, print_desc=True, interactions=False); m2 = get_cox_ph(surv, two_hit, covariates=clinical_vars_y, print_desc=True, interactions=False); LR_test(m2, m1) fig, ax = subplots(figsize=(6,4)) ci = convert_robj(robjects.r.summary(m2)[7]) haz = ci['exp(coef)'] for j,h in enumerate(haz): ax.scatter(h, j, marker='s', s=100, color='grey', edgecolors=['black'], zorder=10) ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))), lw=3, ls='-', marker='o', dash_joinstyle='bevel', color='grey') ax.axvline(1, ls='--', color='black') ax.set_xscale('log') ax.set_xbound(.5,3.5) ax.set_ybound(-.5,len(ci.index) - .5) ax.set_xticks([.25,.5, 1, 1.5, 2, 4,8,16]) ax.set_xticklabels([.25,.5, 1, 1.5, 2, 4,8,16]) ax.set_yticks(range(len(ci.index))) ax.set_yticklabels(ci.index) ax.set_xlabel('Hazard Ratio') prettify_ax(ax) fig.tight_layout()