import NotebookImport from Imports import * p53_mut = mut.df.ix['TP53'].ix[keepers_o].dropna().astype(int) survival_and_stats(p53_mut, surv, figsize=(5,4), order=[2,1,0]) screen_feature(p53_mut>0, kruskal_pandas, clinical.processed.T).head() ecs = clinical.clinical.presenceofpathologicalnodalextracapsularspread ecs.name = 'Extra Capsular Spread' pd.crosstab(p53_mut>0, ecs).T.plot(kind='bar', rot=15) import re as re get_nums = lambda s: re.findall(r'\d+', s) def is_disruptive(v): c = v.Variant_Classification if c != 'Missense_Mutation': if 'Ins' in c or 'Del' in c: return 'InDel' else: return v.Variant_Classification.split('_')[0] else: s = v.Protein_Change aa = int(get_nums(s)[0]) if int(aa) in range(163,196): return 'L2' if int(aa) in range(236, 252): return 'L3' return 'other' p53 = FH.get_submaf(run.data_path, cancer.name, ['TP53'], fields='All').ix['TP53'] dd = p53.apply(is_disruptive, 1) dd = dd.replace('Silent',nan).dropna() p53 = p53.ix[dd.index] others = keepers_o.diff(p53.Tumor_Sample_Barcode.ix[dd.index]).intersection(mut.df.columns) dd.index = p53.Tumor_Sample_Barcode.ix[dd.index] dd = pd.concat([pd.Series('WT', others), dd]) dd = dd[[i in keepers_o for i in dd.index]] pc = pd.Series(list(p53.Protein_Change), index=p53.Tumor_Sample_Barcode) pc = pd.concat([pd.Series('WT', others), pc]) pc = pc[[i in keepers_o for i in pc.index]] s2 = surv.unstack().ix[dd.index] s2.index = range(len(dd)) s2 = s2.stack() pats = pd.Series(dd.index, range(len(dd))) dd.index = range(len(dd)) pc.index = range(len(dd)) df = pd.concat([pats, pc, dd, s2[:,'days'], s2[:,'event']], keys=['patient ID','Functional Class','Protien Change', 'Days to Death/Censoring', 'Death Indicator'], axis=1).sort(['patient ID']) df = df.set_index('patient ID') df.to_csv(FIGDIR + 'fig2b.csv') fig, ax = subplots(figsize=(3.5,2.7)) c={'WT': 'grey', 'Splice':colors[0], 'other': colors[5], 'L3': colors[1], 'L2':colors[2], 'Nonsense': colors[3], 'InDel': colors[4]} draw_survival_curve(dd, s2, colors=c, ax=ax) ax.legend().set_visible(False) prettify_ax(ax) fig.tight_layout() fig.savefig(FIGDIR + 'fig2b.pdf', transparent=True) survival_and_stats(dd, s2, colors=colors[:6] + ['grey'] + colors[6:], figsize=(4.5,6)) get_surv_fit_lr(s2, dd[dd!='WT']) dd = dd.replace('WT', 'aWT') f = get_cox_ph(s2, dd, interactions=False) ci = convert_robj(robjects.r.summary(f)[7]) ci.index = map(lambda s: s[7:], ci.index) n = ci.ix[0]*0 +1 n.name = 'WT' ci = ci.append(n) fig, ax = subplots(figsize=(7,4)) ci = ci.sort('exp(coef)') haz = ci['exp(coef)'] b = haz.plot(kind='bar', ax=ax, yerr=[haz - ci['lower .95'], ci['upper .95'] - haz], ecolor='black', rot=0, color=['grey', colors[5], colors[4], colors[0], colors[3], colors[2], colors[1]]) prettify_ax(ax) ax.set_ylabel('Hazard Ratio') from itertools import combinations sig = pd.Series({c: get_cox_ph_ms(s2, dd[dd.isin(c)], interactions=False)['LR'] for c in combinations(dd.unique(),2)}) sig.order() lo = pd.read_csv('../Extra_Data/amino_acids.csv', index_col=1) lo = lo.groupby(level=0).first() def is_disruptive(s): if s.endswith('*'): return True if s.endswith('splice'): return False if 'fs' in s: return False aa = s[3:-1] try: if int(aa) in range(163,196) + range(236, 252): if lo.Polarity[s[2]] != lo.Polarity[s[-1]]: return True except: pass return False p53 = FH.get_submaf(run.data_path, cancer.name, ['TP53'], fields='All').ix['TP53'] status = pd.concat([combine(p53.Protein_Change.map(is_disruptive), p53.is_silent==0), p53.Tumor_Sample_Barcode], axis=1, keys=['status','barcode']) status = status.set_index('barcode')['status'] status = (status == 'both').groupby(level=0).sum().clip_upper(1.) status = status.ix[mut.df.columns].fillna(-1).map({-1:'WT',0:'Non-Disruptive',1:'Disruptive'}) status = status.ix[keepers_o] survival_and_stats(status, surv, colors=colors[:6] + ['grey'] + colors[6:], figsize=(7,5)) get_surv_fit_lr(surv, status[status.isin(['Non-Disruptive', 'WT'])]) def is_disruptive_mod(s): if s.endswith('*'): return True if s.endswith('splice'): return True if 'fs' in s: return False aa = s[3:-1] try: if int(aa) in range(163,196) + range(236, 252): return True except: pass return False p53 = FH.get_submaf(run.data_path, cancer.name, ['TP53'], fields='All').ix['TP53'] status = pd.concat([combine(p53.Protein_Change.map(is_disruptive_mod), p53.is_silent==0), p53.Tumor_Sample_Barcode], axis=1, keys=['status','barcode']) status = status.set_index('barcode')['status'] status = (status == 'both').groupby(level=0).sum().clip_upper(1.) status = status.ix[mut.df.columns].fillna(-1).map({-1:'WT',0:'Non-Disruptive',1:'Disruptive'}) status = status.ix[keepers_o] survival_and_stats(status, surv, colors=colors[:6] + ['grey'] + colors[6:], figsize=(7,5)) f = get_cox_ph(surv, status[status.isin(['Non-Disruptive', 'WT'])]=='Non-Disruptive', interactions=False, print_desc=True); exp(.79), exp(.79) - exp(.79 - .353) cc = p53.set_index('Tumor_Sample_Barcode').Protein_Change cc = pd.concat([pd.Series('WT', others), cc]) cc = cc[cc.isin(true_index(cc.value_counts() > 5))] s2 = surv.unstack().ix[cc.index] s2.index = range(len(cc)) s2 = s2.stack() cc.index = range(len(cc)) survival_and_stats(cc, s2, colors=['grey'] + colors, figsize=(7,5))