import NotebookImport
from Imports import *
importing IPython notebook from Imports.ipynb Populating the interactive namespace from numpy and matplotlib changing to source dirctory
populating namespace with data
p53_mut = mut.features.ix['TP53'].ix[keepers_o].dropna() > 0
del_3p = cn.features.ix['Deletion'].ix['3p14.2'].ix[0].ix[keepers_o].dropna()
p53_mut.name = 'TP53'
del_3p.name = 'del_3p'
combo = combine(p53_mut==1, del_3p==-1)
combo = combo.map({'del_3p':'b', 'neither':'a', 'TP53':'c', 'both':'d'})
two_hit = combo=='d'
pd.crosstab(two_hit.map({True: 'TP53-3p Pos.', False:'TP53-3p Neg.'}),
clinical.stage.clinicalstage)
clinicalstage | stage i | stage ii | stage iii | stage iva | stage ivb | stage ivc |
---|---|---|---|---|---|---|
TP53 | ||||||
TP53-3p Neg. | 5 | 18 | 19 | 27 | 1 | 1 |
TP53-3p Pos. | 5 | 31 | 45 | 92 | 4 | 2 |
th = two_hit.map({True: 'TP53-3p Pos.', False:'TP53-3p Neg.'})
st = clinical.stage.clinicalstage.replace(['stage iva','stage ivb','stage ivc'],
'stage iv')
st = st.dropna().map(lambda s: s.split()[1].upper())
st.name = 'Stage'
th.name=''
ct = pd.crosstab(th, st).T
feature = two_hit
assignment = st
fig, axs = subplots(1,5, figsize=(10,3))
ct.plot(kind='bar', rot=0, ax=axs[0])
axs[0].legend(loc='upper left', frameon=False)
axs[0].set_ylabel('# of Patients')
for i, (l, s) in enumerate(feature.groupby(assignment)):
draw_survival_curve(s, surv, ax=axs[i+1],
title='{} = {}'.format(assignment.name, l))
axs[i+1].get_legend().set_visible(False)
for ax in axs:
prettify_ax(ax)
fig.tight_layout()
fig.savefig(FIGDIR + 'stage_breakdown.pdf')
kruskal_pandas(two_hit, st.replace({'I':1, 'II':2, 'III':3, 'IV':4}))
H 5.31 p 0.02 dtype: float64
fisher_exact_test(two_hit, st=='IV')
odds_ratio 1.75 p 0.05 dtype: float64
pd.crosstab(two_hit.map({True: 'TP53-3p Pos.', False:'TP53-3p Neg.'}),
clinical.clinical.neoplasmhistologicgrade)
neoplasmhistologicgrade | g1 | g2 | g3 | g4 | gx |
---|---|---|---|---|---|
h | |||||
TP53-3p Neg. | 14 | 37 | 15 | 1 | 4 |
TP53-3p Pos. | 9 | 125 | 44 | 0 | 1 |
grade = clinical.clinical.neoplasmhistologicgrade.replace('gx',nan).dropna()
grade.name = 'grade'
fisher_exact_test(two_hit, grade.dropna().isin(['g2','g3','g4']).dropna()==False)
odds_ratio 2.02e-01 p 4.04e-04 dtype: float64
gr = grade.replace('g4','g3+').replace('g3','g3+').str.upper()
gr.name = 'Grade'
ct = pd.crosstab(gr, th)
feature = two_hit
assignment = gr
fig, axs = subplots(1,4, figsize=(8,3))
ct.plot(kind='bar', rot=0, ax=axs[0])
axs[0].legend(loc='upper left', frameon=False)
axs[0].set_ylabel('# of Patients')
for i, (l, s) in enumerate(feature.groupby(assignment)):
draw_survival_curve(s, surv, ax=axs[i+1],
title='{} = {}'.format(assignment.name, l))
axs[i+1].get_legend().set_visible(False)
for ax in axs:
prettify_ax(ax)
fig.tight_layout()
fig.savefig(FIGDIR + 'grade_breakdown.pdf')
a,b = match_series(two_hit, grade.replace({'g1':1, 'g2':2, 'g3':3, 'g4':4}))
kruskal_pandas(a,b)
H 3.19 p 0.07 dtype: float64
kruskal_pandas(two_hit, grade.replace({'g1':1, 'g2':2, 'g3':3, 'g4':4}))
H 3.19 p 0.07 dtype: float64
grade = clinical.clinical.neoplasmhistologicgrade.replace('gx',nan).dropna()
grade.name = 'grade'
fisher_exact_test(two_hit, grade[grade.isin(['g2','g3','g4'])] == 'g2')
odds_ratio 1.23 p 0.60 dtype: float64
This cohort is overwhelmingly Stage IV and of high grade. This is likely due to the selection criteria for the TCGA study requiring a large amount of tissue, and thus skewing the cohort towards patients with larger tumors. In general we find that patients with TP53-3p events trend towards being in stage IV (odd ratio 1.7, P = 0.07) and of higher grade (g2,g3,g4 vs g1, odds ratio 4.9 P < .001). While this may be the case, this combination of events is still predictive of survival within these subgroups.
Tumor Subdivision
pd.crosstab(th,
clinical.processed.tumor_subdivision)
tumor_subdivision | larynx | oral cavity | oropharynx |
---|---|---|---|
TP53-3p Neg. | 15 | 52 | 4 |
TP53-3p Pos. | 60 | 109 | 9 |
pd.crosstab(th, clinical.processed.tumor_subdivision).T.plot(kind='bar', rot=0)
<matplotlib.axes.AxesSubplot at 0xcac6490>
feature = two_hit
assignment = clinical.processed.tumor_subdivision
fig, axs = subplots(1,4, figsize=(15,4))
ct.plot(kind='bar', rot=0, ax=axs[0])
axs[0].legend(loc='upper left', frameon=False)
axs[0].set_ylabel('# of Patients')
for i, (l, s) in enumerate(feature.groupby(assignment)):
draw_survival_curve(s, surv, ax=axs[i+1],
title='{} = {}'.format(assignment.name, l))
axs[i+1].get_legend().set_visible(False)
fig.tight_layout()
fisher_exact_test(two_hit.map({True: 'TP53-3p Pos.', False:'TP53-3p Neg.'}),
clinical.processed.tumor_subdivision=='larynx')
odds_ratio 1.88 p 0.07 dtype: float64
Smoking Status
smoking = clinical.clinical.tobaccosmokinghistory
pd.crosstab(two_hit, smoking)
tobaccosmokinghistory | current reformed smoker for < or = 15 years | current reformed smoker for > 15 years | current smoker | lifelong non-smoker |
---|---|---|---|---|
h | ||||
False | 12 | 22 | 18 | 18 |
True | 60 | 18 | 66 | 26 |
draw_survival_curves(two_hit, surv, smoking.dropna())
/cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.14.0.dev-py2.7-linux-x86_64.egg/pandas/core/common.py:1847: RuntimeWarning: tp_compare didn't return -1 or -2 for exception sample = v[:min(3,len(v))]
age_bin = 1.*(age > 55) + 1.*(age > 70)
age_bin = age_bin.map({0:' < 55', 1: ' >= 55 and <= 70', 2: '> 70'})
violin_plot_pandas(two_hit, age)
draw_survival_curves(two_hit, surv, age_bin)
arms = ['3p14.2','13q12.11','13q14.2','18q23','18q21.2','21q22.3']
cnn = cn.features.copy()
cnn.index = cnn.index.droplevel(2)
cnn = cnn.ix['Deletion'].ix[:, keepers_o]
count = (cnn.ix[arms] < 0).sum(1).order()
count.name = 'pts. w/ deletion'
count_p53 = pd.Series({i: sum(combine(p53_mut>0, v<0) == 'both')
for i,v in cnn.ix[arms].iterrows()}, name='pts. w/ TP53 + deletion')
fet = screen_feature(p53_mut, fisher_exact_test, cnn.ix[arms]<0)[['odds_ratio','p']]
fet = fet.join(count).join(count_p53)[['pts. w/ deletion','pts. w/ TP53 + deletion',
'odds_ratio','p']]
r = {}
for c,pts in {'All': keepers_o, 'TP53 mut.': ti(p53_mut>0), 'TP53 wt': ti(p53_mut==0)}.iteritems():
cox = cox_screen(cnn.ix[arms, pts] < 0, surv)
haz = cox['hazard']['exp(coef)']
p_uni = cox['LR']['p']
p_full = pd.Series({i: get_cox_ph_ms(surv, v, [age,old], interactions='just_feature')['LR']
for i,v in cnn.ix[arms, pts].iterrows()})
r[c] = pd.concat([haz, p_uni, p_full], keys=['hazard','p uni.', 'p full'], axis=1)
pd.concat(r, axis=1).sort([('All','hazard')])[::-1]
All | TP53 mut. | TP53 wt | |||||||
---|---|---|---|---|---|---|---|---|---|
hazard | p uni. | p full | hazard | p uni. | p full | hazard | p uni. | p full | |
3p14.2 | 3.54 | 4.32e-05 | 5.74e-06 | 2.97 | 0.01 | 0.00 | 2.29 | 0.21 | 0.06 |
18q23 | 1.13 | 5.64e-01 | 4.06e-01 | 0.79 | 0.30 | 0.34 | 5.14 | 0.02 | 0.00 |
13q12.11 | 1.13 | 5.45e-01 | 4.64e-01 | 0.82 | 0.33 | 0.33 | 3.72 | 0.05 | 0.01 |
21q22.3 | 1.08 | 7.05e-01 | 3.71e-01 | 0.90 | 0.61 | 0.94 | 1.41 | 0.62 | 1.00 |
18q21.2 | 1.05 | 8.21e-01 | 6.90e-01 | 0.75 | 0.19 | 0.20 | 3.73 | 0.05 | 0.01 |
13q14.2 | 0.90 | 6.05e-01 | 6.93e-01 | 0.67 | 0.06 | 0.06 | 3.68 | 0.05 | 0.01 |
p53_mut.ix[keepers_o].value_counts()
True 202 False 48 dtype: int64
fet
pts. w/ deletion | pts. w/ TP53 + deletion | odds_ratio | p | |
---|---|---|---|---|
3p14.2 | 205 | 179 | 6.59 | 3.56e-07 |
13q12.11 | 121 | 107 | 2.74 | 3.64e-03 |
21q22.3 | 107 | 95 | 2.66 | 5.80e-03 |
13q14.2 | 102 | 90 | 2.41 | 1.43e-02 |
18q23 | 164 | 140 | 2.26 | 1.73e-02 |
18q21.2 | 154 | 132 | 2.23 | 2.01e-02 |
del_18q = cn.features.ix['Deletion'].ix['18q23'].ix[0]
survival_and_stats(combine(p53_mut, del_18q<0), surv)
del_21q = cn.features.ix['Deletion'].ix['21q22.3'].ix[0]
survival_and_stats(combine(p53_mut, del_21q<0), surv)
del_13q = cn.features.ix['Deletion'].ix['13q14.2'].ix[0]
survival_and_stats(combine(p53_mut, del_13q<0), surv)