import NotebookImport
from Imports import *
p53_mut = mut.df.ix['TP53'].ix[keepers_o].dropna().astype(int)
survival_and_stats(p53_mut, surv, figsize=(5,4), order=[2,1,0])
screen_feature(p53_mut>0, kruskal_pandas, clinical.processed.T).head()
H | p | q | |
---|---|---|---|
spread_inferred | 7.65 | 0.01 | 0.06 |
smoker_inferred | 7.39 | 0.01 | 0.06 |
drinker_inferred | 6.45 | 0.01 | 0.07 |
invasion_inferred | 4.91 | 0.03 | 0.12 |
post_2000 | 0.69 | 0.41 | 1.00 |
5 rows × 3 columns
ecs = clinical.clinical.presenceofpathologicalnodalextracapsularspread
ecs.name = 'Extra Capsular Spread'
pd.crosstab(p53_mut>0, ecs).T.plot(kind='bar', rot=15)
<matplotlib.axes.AxesSubplot at 0xe462ed0>
It is important to note, that here a patient with multiple mutation is counted multiple times.
import re as re
get_nums = lambda s: re.findall(r'\d+', s)
def is_disruptive(v):
c = v.Variant_Classification
if c != 'Missense_Mutation':
if 'Ins' in c or 'Del' in c:
return 'InDel'
else:
return v.Variant_Classification.split('_')[0]
else:
s = v.Protein_Change
aa = int(get_nums(s)[0])
if int(aa) in range(163,196):
return 'L2'
if int(aa) in range(236, 252):
return 'L3'
return 'other'
p53 = FH.get_submaf(run.data_path, cancer.name, ['TP53'], fields='All').ix['TP53']
dd = p53.apply(is_disruptive, 1)
dd = dd.replace('Silent',nan).dropna()
p53 = p53.ix[dd.index]
others = keepers_o.diff(p53.Tumor_Sample_Barcode.ix[dd.index]).intersection(mut.df.columns)
dd.index = p53.Tumor_Sample_Barcode.ix[dd.index]
dd = pd.concat([pd.Series('WT', others), dd])
dd = dd[[i in keepers_o for i in dd.index]]
pc = pd.Series(list(p53.Protein_Change), index=p53.Tumor_Sample_Barcode)
pc = pd.concat([pd.Series('WT', others), pc])
pc = pc[[i in keepers_o for i in pc.index]]
s2 = surv.unstack().ix[dd.index]
s2.index = range(len(dd))
s2 = s2.stack()
pats = pd.Series(dd.index, range(len(dd)))
dd.index = range(len(dd))
pc.index = range(len(dd))
figdir = '/cellar/users/agross/figures/'
df = pd.concat([pats, pc, dd, s2[:,'days'], s2[:,'event']],
keys=['patient ID','Functional Class','Protien Change',
'Days to Death/Censoring', 'Death Indicator'],
axis=1).sort(['patient ID'])
df = df.set_index('patient ID')
df.to_csv(figdir + 'fig2b.csv')
fig, ax = subplots(figsize=(3.5,2.7))
c={'WT': 'grey', 'Splice':colors[0], 'other': colors[5], 'L3': colors[1], 'L2':colors[2],
'Nonsense': colors[3], 'InDel': colors[4]}
draw_survival_curve(dd, s2, colors=c, ax=ax)
ax.legend().set_visible(False)
prettify_ax(ax)
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/fig2b.pdf', transparent=True)
survival_and_stats(dd, s2, colors=colors[:6] + ['grey'] + colors[6:], figsize=(4.5,6))
get_surv_fit_lr(s2, dd[dd!='WT'])
Stats | Median Survival | 5y Survival | Log-Rank | |||||||
---|---|---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | chi2 | p | |
11.9 | 0.036 | |||||||||
other | 78 | 30 | 4 | 2.5 | NaN | 0.476 | 0.351 | 0.647 | ||
InDel | 48 | 19 | 3.53 | 1.5 | NaN | 0.421 | 0.262 | 0.676 | ||
Nonsense | 34 | 18 | 1.6 | 1.25 | NaN | 0.333 | 0.19 | 0.585 | ||
L3 | 31 | 12 | 2.16 | 1.5 | NaN | 0.229 | 0.0518 | 1 | ||
L2 | 30 | 18 | 1.08 | 0.986 | NaN | 0.251 | 0.124 | 0.512 | ||
Splice | 17 | 10 | 1.43 | 0.767 | NaN | 0.247 | 0.0842 | 0.724 |
7 rows × 10 columns
Bar Plot of Hazard Ratios for Supplement
dd = dd.replace('WT', 'aWT')
f = get_cox_ph(s2, dd, interactions=False)
ci = convert_robj(robjects.r.summary(f)[7])
ci.index = map(lambda s: s[7:], ci.index)
n = ci.ix[0]*0 +1
n.name = 'WT'
ci = ci.append(n)
fig, ax = subplots(figsize=(7,4))
ci = ci.sort('exp(coef)')
haz = ci['exp(coef)']
b = haz.plot(kind='bar', ax=ax,
yerr=[haz - ci['lower .95'], ci['upper .95'] - haz], ecolor='black',
rot=0, color=['grey', colors[5], colors[4], colors[0], colors[3],
colors[2], colors[1]])
prettify_ax(ax)
ax.set_ylabel('Hazard Ratio')
<matplotlib.text.Text at 0x111c8c10>
P-values for Bar Comparisons
from itertools import combinations
sig = pd.Series({c: get_cox_ph_ms(s2, dd[dd.isin(c)], interactions=False)['LR']
for c in combinations(dd.unique(),2)})
sig.order()
(aWT, L2) 4.06e-05 (aWT, Nonsense) 1.83e-03 (aWT, Splice) 2.61e-03 (L2, other) 4.88e-03 (aWT, L3) 1.18e-02 (aWT, InDel) 1.68e-02 (aWT, other) 2.74e-02 (InDel, L2) 3.58e-02 (L3, L2) 4.35e-02 (Splice, other) 8.19e-02 (Nonsense, other) 1.26e-01 (Nonsense, L2) 1.57e-01 (InDel, Splice) 2.21e-01 (Splice, L3) 2.73e-01 (InDel, Nonsense) 3.87e-01 (L3, other) 4.57e-01 (Nonsense, Splice) 5.59e-01 (Nonsense, L3) 5.66e-01 (InDel, other) 6.00e-01 (Splice, L2) 6.11e-01 (InDel, L3) 8.93e-01 dtype: float64
lo = pd.read_csv('../Extra_Data/amino_acids.csv', index_col=1)
lo = lo.groupby(level=0).first()
def is_disruptive(s):
if s.endswith('*'):
return True
if s.endswith('splice'):
return False
if 'fs' in s:
return False
aa = s[3:-1]
try:
if int(aa) in range(163,196) + range(236, 252):
if lo.Polarity[s[2]] != lo.Polarity[s[-1]]:
return True
except:
pass
return False
p53 = FH.get_submaf(run.data_path, cancer.name, ['TP53'], fields='All').ix['TP53']
status = pd.concat([combine(p53.Protein_Change.map(is_disruptive), p53.is_silent==0),
p53.Tumor_Sample_Barcode], axis=1,
keys=['status','barcode'])
status = status.set_index('barcode')['status']
status = (status == 'both').groupby(level=0).sum().clip_upper(1.)
status = status.ix[mut.df.columns].fillna(-1).map({-1:'WT',0:'Non-Disruptive',1:'Disruptive'})
status = status.ix[keepers_o]
survival_and_stats(status, surv, colors=colors[:6] + ['grey'] + colors[6:], figsize=(7,5))
get_surv_fit_lr(surv, status[status.isin(['Non-Disruptive', 'WT'])])
Stats | Median Survival | 5y Survival | Log-Rank | |||||||
---|---|---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | chi2 | p | |
8.12 | 0.00437 | |||||||||
Non-Disruptive | 140 | 61 | 2.58 | 1.71 | NaN | 0.4 | 0.299 | 0.534 | ||
WT | 45 | 10 | NaN | 4.71 | NaN | 0.664 | 0.494 | 0.893 |
3 rows × 10 columns
def is_disruptive_mod(s):
if s.endswith('*'):
return True
if s.endswith('splice'):
return True
if 'fs' in s:
return False
aa = s[3:-1]
try:
if int(aa) in range(163,196) + range(236, 252):
return True
except:
pass
return False
p53 = FH.get_submaf(run.data_path, cancer.name, ['TP53'], fields='All').ix['TP53']
status = pd.concat([combine(p53.Protein_Change.map(is_disruptive_mod), p53.is_silent==0),
p53.Tumor_Sample_Barcode], axis=1,
keys=['status','barcode'])
status = status.set_index('barcode')['status']
status = (status == 'both').groupby(level=0).sum().clip_upper(1.)
status = status.ix[mut.df.columns].fillna(-1).map({-1:'WT',0:'Non-Disruptive',1:'Disruptive'})
status = status.ix[keepers_o]
survival_and_stats(status, surv, colors=colors[:6] + ['grey'] + colors[6:], figsize=(7,5))
f = get_cox_ph(surv, status[status.isin(['Non-Disruptive', 'WT'])]=='Non-Disruptive', interactions=False,
print_desc=True);
coef exp(coef) se(coef) z p feature 0.79 2.2 0.353 2.24 0.025 Likelihood ratio test=5.81 on 1 df, p=0.0159 n= 150, number of events= 52
exp(.79), exp(.79) - exp(.79 - .353)
(2.2033964262559369, 0.65534034919959683)
cc = p53.set_index('Tumor_Sample_Barcode').Protein_Change
cc = pd.concat([pd.Series('WT', others), cc])
cc = cc[cc.isin(true_index(cc.value_counts() > 5))]
s2 = surv.unstack().ix[cc.index]
s2.index = range(len(cc))
s2 = s2.stack()
cc.index = range(len(cc))
survival_and_stats(cc, s2, colors=['grey'] + colors, figsize=(7,5))