import NotebookImport from Imports import * from Processing.Screen import * def surv_test(s, surv, cov_df): s = s.dropna() try: return get_cox_ph_ms(surv, s, cov_df, return_val='LR', interactions='just_feature') except: return pd.Series(index=['LR','feature_p', 'fmla', 'hazzard']) def run_screen(screen, filters, covariates): cov_df = pd.concat(covariates, axis=1) keepers_o = screen.get_patient_set(filters) cutoff = max(np.ceil(len(keepers_o) * .05), 10) df = screen.get_data(keepers_o, cutoff) univariate = cox_screen(df, surv) vec = univariate.LR.p.sort_index() univariate = pd.concat([univariate['hazard'], corrections(vec)], keys=['hazard', 'p'], axis=1) #hits = univariate[univariate['q_bh'] < .2].index hits = univariate.index full = df.ix[hits].apply(surv_test, args=(surv, cov_df,), axis=1) vec = full.LR.ix[univariate.index].sort_index() full = pd.concat([full[['fmla']], corrections(vec)], keys=['fmla','p'], axis=1) hits = true_index(full.p.bh_all.order() < .1) res = full.sort([('p','uncorrected')]).head() return res, full, univariate, keepers_o, df screen = Screen(mut, cn, rna, mirna, clinical.binary_df, surv, keepers_o) res, full, univariate, keepers_o, df = run_screen(screen, filters=[hpv], covariates=[old, age]) (mut.df.ix[:, keepers_o].sum(1) > 0).value_counts() df.shape df.groupby(level=0).size() len([i for i in df.ix['mutation'].index if i not in run.gene_sets]) len([i for i in df.ix['mutation'].index if i in run.gene_sets]) len([i for i in df.ix['rna'].index if i not in run.gene_sets]) len([i for i in df.ix['rna'].index if i in run.gene_sets]) (full.p.bh_all < .1).groupby(level=0).apply(pd.value_counts).unstack() (full.p.bh_all < .1).value_counts() rr = cox_screen(df, surv) haz = rr['hazard'][['exp(coef)','lower .95','upper .95']] p = rr.LR.p uni = haz.join(p) uni['PASS'] = uni.p < .05 multi = full.reset_index().sort(['level_0',('p','uncorrected')]).set_index(['level_0','level_1']) multi.index.names = ['data_type','event'] multi['p'] = multi['p'].clip_upper(1.) multi.columns = multi.columns.droplevel(0) multi['PASS'] = multi.bh_all < .1 f = pd.concat([uni, multi], keys=['Univariate','Multivariate'], axis=1) f[('BOTH','PASS')] = f.Univariate.PASS & f.Multivariate.PASS f = f.sort([('BOTH','PASS'),('Multivariate','uncorrected')], ascending=[False, True]) fd = f.copy() fd.index = pd.MultiIndex.from_tuples([(i[0], i[1].replace('_',' ')) for i in fd.index]) fd[('Multivariate','fmla')] = fd.Multivariate.astype(str).fmla.str.replace('\n','') fd.to_csv(FIGDIR + 'supplemental_table1.csv', float_format='%.2e') (f.Univariate.PASS & f.Multivariate.PASS).value_counts() hits = full.ix[ti((f.Univariate.PASS & f.Multivariate.PASS))] hits = hits.sort([('p','uncorrected')]) get_cox_ph(surv, df.ix['mutation'].ix['TP53'], print_desc=True); exp(1.07), exp(1.07) - exp(1.07 - .333) hits.ix['mutation'].ix['TP53'] hits.ix['mutation']['p'].sort('uncorrected').head(4) get_cox_ph(surv, df.ix['cna'].ix['del_3p14.2'], print_desc=True); exp(1.26), exp(1.26) - exp(1.26 - .369) hits.ix['cna']['p'].sort('uncorrected').head(4) hits.ix['cna']['p'].iloc[0] survival_and_stats(df.ix['cna'].ix['del_3p14.2'], surv, figsize=(6,4)) hits.ix['rna']['p'].sort('uncorrected').head(3) survival_and_stats(df.ix['rna'].ix['SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES'], surv) hits.ix['mirna']['p'].sort('uncorrected').head(4) survival_and_stats(df.ix['mirna'].ix['hsa-mir-3170'].dropna(), surv, figsize=(6,4)) hits.ix['clinical']['p'].sort('uncorrected').head(4) df = df.replace({'no': False, 'yes':True}) hit_df = df.ix[hits.index].astype(float) fet_within = pd.DataFrame({(a, b): fisher_exact_test(v1, v2) for a, v1 in hit_df.iterrows() for b, v2 in hit_df.iterrows() if a[0] == b[0] and a[1] > b[1]}).T fet_within.sort('p').head() s = {b for i, (a, v1) in enumerate(hit_df.iterrows()) for j, (b, v2) in enumerate(hit_df.iterrows()) if (i < j) and a[0] == b[0] and np.log2(fisher_exact_test(v1, v2)['odds_ratio']) > 4} hit_df = hit_df.ix[[i for i in hit_df.index if i not in s]] #to keep sorted order hit_df.groupby(level=0).size() import itertools as itertools fet= {} for dtypes in itertools.combinations(hit_df.index.levels[0], 2): fet[dtypes] = pd.DataFrame({(a[1], b[1]): fisher_exact_test(v1, v2) for a, v1 in hit_df.iterrows() for b, v2 in hit_df.iterrows() if a[0] != b[0] if a[0] == dtypes[0] if b[0] == dtypes[1]}).T fet = pd.concat(fet) fet.index = pd.MultiIndex.from_tuples([(i[0], i[2][0], i[1], i[2][1]) for i in fet.index]) fet['p bonf.'] = fet['p'] * len(fet) fet['PASS'] = (fet['p bonf.'] < .01) fet = fet.sort(['PASS','p'], ascending=[0,1]) fet.PASS.value_counts() clinical.binary_df.ix['stage_iv'].value_counts(0) df.shape pd.crosstab(combo, clinical.binary_df.ix['stage_iv']) fet.to_csv(FIGDIR + 'supplemental_table2.csv', float_format='%.2e') f3 = fet.groupby(level=[0,2]).apply(lambda s: s.sort('p').head(1)) f3.index = f3.index.droplevel([0,1]) f3['p_corrected'] = f3['p'] * len(fet) f3.index = pd.MultiIndex.from_tuples([tuple((i.replace('_',' ') for i in s)) for s in f3.index]) f3.sort('p') fig, axs = subplots(1,2, figsize=(10,4)) violin_plot_pandas(df.ix['cna'].ix['del_3p14.2'], rna.pathways.ix['BIOCARTA_AKAP13_PATHWAY'], ax=axs[0]) rna.loadings.ix['BIOCARTA_AKAP13_PATHWAY'].order().plot(kind='bar', ax=axs[1]) for ax in axs: prettify_ax(ax) pd.crosstab(df.ix['clinical'].ix['recent_smoker'], df.ix['cna'].ix['del_3p14.2']) fig, axs = subplots(1,2, figsize=(10,4)) violin_plot_pandas(df.ix['clinical'].ix['recent_smoker'], rna.pathways.ix['SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES'], ax=axs[0]) rna.loadings.ix['SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES'].order().plot(kind='bar', ax=axs[1]) for ax in axs: prettify_ax(ax) venn_pandas(df.ix['mutation'].ix['TP53'], df.ix['cna'].ix['del_3p14.2']); len(keepers_o) fisher_exact_test(df.ix['mutation'].ix['TP53'], df.ix['cna'].ix['del_3p14.2']) fisher_exact_test(df.ix['mutation'].ix['TP53'], df.ix['cna'].ix['del_3p14.2']).p * len(fet) combo = combine(df.ix['mutation'].ix['TP53'] > 0, df.ix['cna'].ix['del_3p14.2']) survival_and_stats(combo, clinical.survival.survival_5y, figsize=(6,4)) get_surv_fit_lr(surv, combo) two_hit = combo == 'both' p53_mut = df.ix['mutation'].ix['TP53'] del_3p = df.ix['cna'].ix['del_3p14.2'] two_hit.name = 'TP53_3p' survival_and_stats(two_hit, surv) m1 = get_cox_ph(surv, two_hit, [age, old], interactions='None', print_desc=True) m2 = get_cox_ph(surv, p53_mut, [age, old], interactions='None', print_desc=True) LR_test(m1, m2, df=1) m3 = get_cox_ph(surv, del_3p, [age, old], interactions='None', print_desc=True) LR_test(m1, m3, df=1) cox(two_hit.ix[true_index(p53_mut==1)].ix[ti(old==False)], surv) surv_test(two_hit.ix[true_index(p53_mut==1)], surv, [age, old]) surv_test(two_hit.ix[true_index(del_3p==1)], surv, [age, old]) r = screen_feature(two_hit, fisher_exact_test, df.ix['mutation']) r.head() len(r) r.ix['REACTOME_SOS_MEDIATED_SIGNALLING'].p * len(r) r.ix['CASP8'].p * len(r)