import NotebookImport from Imports import * from Processing.Screen import * def surv_test(s, surv, cov_df): s = s.dropna() try: return get_cox_ph_ms(surv, s, cov_df, return_val='LR', interactions='just_feature') except: return pd.Series(index=['LR','feature_p', 'fmla', 'hazzard']) def run_screen(screen, filters, covariates): cov_df = pd.concat(covariates, axis=1) keepers_o = screen.get_patient_set(filters) cutoff = max(np.ceil(len(keepers_o) * .05), 10) df = screen.get_data(keepers_o, cutoff) univariate = cox_screen(df, surv) vec = univariate.LR.p.sort_index() univariate = pd.concat([univariate['hazard'], corrections(vec)], keys=['hazard', 'p'], axis=1) #hits = univariate[univariate['q_bh'] < .2].index hits = univariate.index full = df.ix[hits].apply(surv_test, args=(surv, cov_df,), axis=1) vec = full.LR.ix[univariate.index].sort_index() full = pd.concat([full[['fmla']], corrections(vec)], keys=['fmla','p'], axis=1) hits = true_index(full.p.bh_all.order() < .1) res = full.sort([('p','uncorrected')]).head() return res, full, univariate, keepers_o, df p53_mut = mut.features.ix['TP53'].ix[keepers_o] del_3p = cn.features.ix[('Deletion', '3p14.2', 'Lesion')].ix[keepers_o] two_hit = combine(p53_mut > 0, del_3p < 0) == 'both' screen = Screen(mut, cn, rna, mirna, clinical.binary_df, surv, keepers_o.intersection(ti(two_hit))) res, full, univariate, keepers_o, df = run_screen(screen, filters=[hpv==1, two_hit==0, age>=85], covariates=[old, age]) (full.p.bh_all < .1).groupby(level=0).apply(pd.value_counts).unstack() (full.p.bh_all < .1).value_counts() muc5b = df.ix['mutation'].ix['MUC5B']>0 survival_and_stats(df.ix['mutation'].ix['MUC5B']>0, surv) rr = cox_screen(df.ix[full.p.bh_all < .1], surv) hits = full.ix[true_index(rr.LR.p < .05)] hits = hits.sort([('p','uncorrected')]) len(hits) hits res, full, univariate, keepers_o, df = run_screen(screen, filters=[hpv==1, two_hit==0], covariates=[old, age, clinical.binary_df.ix['pre_2000']]) df.shape df.groupby(level=0).size() rr = cox_screen(df.ix[full.p.bh_all < .1], surv) hits = full.ix[true_index(rr.LR.p < .05)] hits = hits.sort([('p','uncorrected')]) len(hits) hits.p.head(4) survival_and_stats(df.ix['clinical'].ix['recent_smoker'].dropna(), surv) hits.p.iloc[1] mir548k = df.ix['mirna'].ix['hsa-mir-548k'] survival_and_stats(mir548k, surv) survival_and_stats(df.ix['mutation'].ix['MUC5B'], surv) rr = cox_screen(df, surv) haz = rr['hazard'][['exp(coef)','lower .95','upper .95']] p = rr.LR.p uni = haz.join(p) uni['PASS'] = uni.p < .05 multi = full.reset_index().sort(['level_0',('p','uncorrected')]).set_index(['level_0','level_1']) multi.index.names = ['data_type','event'] multi['p'] = multi['p'].clip_upper(1.) multi.columns = multi.columns.droplevel(0) multi['PASS'] = multi.bh_all < .1 f = pd.concat([uni, multi], keys=['Univariate','Multivariate'], axis=1) f[('BOTH','PASS')] = f.Univariate.PASS & f.Multivariate.PASS f = f.sort([('BOTH','PASS'),('Multivariate','uncorrected')], ascending=[False, True]) f.to_csv(FIGDIR + 'supplemental_table2.csv', float_format='%.2e')