import NotebookImport
from Imports import *
importing IPython notebook from Imports.ipynb Populating the interactive namespace from numpy and matplotlib changing to source dirctory populating namespace with data
from Processing.Screen import *
def surv_test(s, surv, cov_df):
s = s.dropna()
try:
return get_cox_ph_ms(surv, s, cov_df, return_val='LR', interactions='just_feature')
except:
return pd.Series(index=['LR','feature_p', 'fmla', 'hazzard'])
def run_screen(screen, filters, covariates):
cov_df = pd.concat(covariates, axis=1)
keepers_o = screen.get_patient_set(filters)
cutoff = max(np.ceil(len(keepers_o) * .05), 10)
df = screen.get_data(keepers_o, cutoff)
univariate = cox_screen(df, surv)
vec = univariate.LR.p.sort_index()
univariate = pd.concat([univariate['hazard'], corrections(vec)],
keys=['hazard', 'p'], axis=1)
#hits = univariate[univariate['q_bh'] < .2].index
hits = univariate.index
full = df.ix[hits].apply(surv_test, args=(surv, cov_df,), axis=1)
vec = full.LR.ix[univariate.index].sort_index()
full = pd.concat([full[['fmla']], corrections(vec)],
keys=['fmla','p'], axis=1)
hits = true_index(full.p.bh_all.order() < .1)
res = full.sort([('p','uncorrected')]).head()
return res, full, univariate, keepers_o, df
p53_mut = mut.features.ix['TP53'].ix[keepers_o]
del_3p = cn.features.ix[('Deletion', '3p14.2', 'Lesion')].ix[keepers_o]
two_hit = combine(p53_mut > 0, del_3p < 0) == 'both'
screen = Screen(mut, cn, rna, mirna, clinical.binary_df, surv, keepers_o.intersection(ti(two_hit)))
res, full, univariate, keepers_o, df = run_screen(screen, filters=[hpv==1, two_hit==0, age>=85],
covariates=[old, age])
(full.p.bh_all < .1).groupby(level=0).apply(pd.value_counts).unstack()
False | True | |
---|---|---|
clinical | 11 | 2 |
cna | 71 | NaN |
mirna | 237 | NaN |
mutation | 220 | 5 |
rna | 455 | 7 |
5 rows × 2 columns
(full.p.bh_all < .1).value_counts()
False 994 True 14 dtype: int64
muc5b = df.ix['mutation'].ix['MUC5B']>0
survival_and_stats(df.ix['mutation'].ix['MUC5B']>0, surv)
rr = cox_screen(df.ix[full.p.bh_all < .1], surv)
hits = full.ix[true_index(rr.LR.p < .05)]
hits = hits.sort([('p','uncorrected')])
len(hits)
6
hits
fmla | p | |||||||
---|---|---|---|---|---|---|---|---|
fmla | uncorrected | bh_within | bh_all | bonf_all | bonf_within | two_step | ||
mutation | MUC5B | Surv(days, event) ~ feature + old\n | 7.73e-05 | 0.01 | 0.02 | 0.08 | 0.02 | 0.03 |
rna | BIOCARTA_PYK2_PATHWAY | Surv(days, event) ~ feature + old + feature:old\n | 1.68e-04 | 0.03 | 0.03 | 0.17 | 0.08 | 0.13 |
clinical | recent_smoker | Surv(days, event) ~ feature + old\n | 6.81e-04 | 0.00 | 0.06 | 0.69 | 0.01 | 0.02 |
pre_2000 | Surv(days, event) ~ feature\n | 6.98e-04 | 0.00 | 0.06 | 0.70 | 0.01 | 0.02 | |
mutation | BIOCARTA_PGC1A_PATHWAY | Surv(days, event) ~ feature + old + feature:old\n | 8.71e-04 | 0.04 | 0.07 | 0.88 | 0.20 | 0.20 |
rna | RAD51 | Surv(days, event) ~ feature + old + age + feat... | 1.29e-03 | 0.09 | 0.09 | 1.30 | 0.60 | 0.43 |
6 rows × 7 columns
res, full, univariate, keepers_o, df = run_screen(screen, filters=[hpv==1, two_hit==0],
covariates=[old, age, clinical.binary_df.ix['pre_2000']])
df.shape
(1008, 179)
df.groupby(level=0).size()
clinical 13 cna 71 mirna 237 mutation 225 rna 462 dtype: int64
rr = cox_screen(df.ix[full.p.bh_all < .1], surv)
hits = full.ix[true_index(rr.LR.p < .05)]
hits = hits.sort([('p','uncorrected')])
len(hits)
6
hits.p.head(4)
uncorrected | bh_within | bh_all | bonf_all | bonf_within | two_step | ||
---|---|---|---|---|---|---|---|
clinical | recent_smoker | 1.79e-05 | 2.33e-04 | 0.01 | 0.02 | 2.33e-04 | 0.00 |
mirna | hsa-mir-548k | 3.40e-05 | 8.05e-03 | 0.01 | 0.03 | 8.05e-03 | 0.04 |
rna | BIOCARTA_PYK2_PATHWAY | 3.15e-04 | 3.30e-02 | 0.04 | 0.32 | 1.46e-01 | 0.17 |
mutation | MUC5B | 3.71e-04 | 2.78e-02 | 0.04 | 0.37 | 8.34e-02 | 0.14 |
4 rows × 6 columns
survival_and_stats(df.ix['clinical'].ix['recent_smoker'].dropna(), surv)
hits.p.iloc[1]
uncorrected 3.40e-05 bh_within 8.05e-03 bh_all 1.14e-02 bonf_all 3.42e-02 bonf_within 8.05e-03 two_step 4.03e-02 Name: (mirna, hsa-mir-548k), dtype: float64
mir548k = df.ix['mirna'].ix['hsa-mir-548k']
survival_and_stats(mir548k, surv)
survival_and_stats(df.ix['mutation'].ix['MUC5B'], surv)
rr = cox_screen(df, surv)
haz = rr['hazard'][['exp(coef)','lower .95','upper .95']]
p = rr.LR.p
uni = haz.join(p)
uni['PASS'] = uni.p < .05
multi = full.reset_index().sort(['level_0',('p','uncorrected')]).set_index(['level_0','level_1'])
multi.index.names = ['data_type','event']
multi['p'] = multi['p'].clip_upper(1.)
multi.columns = multi.columns.droplevel(0)
multi['PASS'] = multi.bh_all < .1
f = pd.concat([uni, multi], keys=['Univariate','Multivariate'], axis=1)
f[('BOTH','PASS')] = f.Univariate.PASS & f.Multivariate.PASS
f = f.sort([('BOTH','PASS'),('Multivariate','uncorrected')], ascending=[False, True])
f.to_csv(FIGDIR + 'supplemental_table2.csv', float_format='%.2e')