Here is a general exploration of copy number, chromosomal instability, and global deletion rate within our cohort. This analysis was done after the identification of 3p deletion being a major event in the cohort, so most of the analysis revolves around understanding the specificity of 3p compared to global patterns of chromosomal gain and loss.
import NotebookImport
from Imports import *
importing IPython notebook from Imports.ipynb Populating the interactive namespace from numpy and matplotlib changing to source dirctory
populating namespace with data
rppa = cancer.load_data('RPPA')
cn_by_gene = FH.get_gistic_gene_matrix(run.data_path, cancer.name)
#cn.features.index = cn.features.index.droplevel(2)
cin = (cn_by_gene.groupby(level='Cytoband').median().abs() >= 1).sum()
cn_by_gene.index = map(lambda s: s.split('.')[0], cn_by_gene.index.get_level_values(0))
cin = (cn_by_gene.groupby(level=0).median().abs() >= 1).sum()
cn_by_gene = FH.get_gistic_gene_matrix(run.data_path, cancer.name)
cn_by_gene = cn_by_gene.ix[[i for i in cn_by_gene.index if i[2] in rna.df.index]]
pct_del = (cn_by_gene < 0).sum() / (1.*len(cn_by_gene))
pct_amp = (cn_by_gene > 0).sum() / (1.*len(cn_by_gene))
pct_altered = pct_amp + pct_del
pct_altered.name = 'CIN'
draw_survival_curves(pct_altered, surv, hpv.dropna())
violin_plot_pandas(hpv, pct_altered)
df = mut.df.ix[(mut.df.sum(1).order() > 5)]
tt = lambda a,b: kruskal_pandas(b,a)
s = screen_feature(cin, tt, df.clip_upper(1.))
s.head(10)
H | p | q | |
---|---|---|---|
A2ML1 | NaN | NaN | 1 |
ABCA1 | NaN | NaN | 1 |
ABCA10 | NaN | NaN | 1 |
ABCA12 | NaN | NaN | 1 |
ABCA13 | NaN | NaN | 1 |
ABCA2 | NaN | NaN | 1 |
ABCA4 | NaN | NaN | 1 |
ABCA6 | NaN | NaN | 1 |
ABCA9 | NaN | NaN | 1 |
ABCB1 | NaN | NaN | 1 |
df = mut.df.ix[(mut.df.sum(1).order() > 5)]
tt = lambda a,b: kruskal_pandas(b,a)
s = screen_feature(pct_altered, tt, df.clip_upper(1.))
s.head(10)
H | p | q | |
---|---|---|---|
A2ML1 | NaN | NaN | 1 |
ABCA1 | NaN | NaN | 1 |
ABCA10 | NaN | NaN | 1 |
ABCA12 | NaN | NaN | 1 |
ABCA13 | NaN | NaN | 1 |
ABCA2 | NaN | NaN | 1 |
ABCA4 | NaN | NaN | 1 |
ABCA6 | NaN | NaN | 1 |
ABCA9 | NaN | NaN | 1 |
ABCB1 | NaN | NaN | 1 |
df = cn.features.ix['Deletion']
df.index = df.index.get_level_values(0)
screen_feature(pct_altered.ix[keepers_o], rev_kruskal, df < 0).head()
H | p | q | |
---|---|---|---|
11q23.1 | 17.16 | 3.44e-05 | 0.00 |
11q14.2 | 15.45 | 8.49e-05 | 0.00 |
13q14.2 | 14.55 | 1.37e-04 | 0.00 |
9p21.3 | 8.23 | 4.11e-03 | 0.05 |
10p15.3 | 7.11 | 7.67e-03 | 0.07 |
df = cn.features.ix['Amplification']
df.index = df.index.get_level_values(0)
screen_feature(pct_altered.ix[keepers_o], tt, df > 0).head()
H | p | q | |
---|---|---|---|
7q21.3 | 6.20 | 0.01 | 0.14 |
9q34.3 | 5.96 | 0.01 | 0.14 |
5p15.33 | 5.66 | 0.02 | 0.14 |
20p12.2 | 5.33 | 0.02 | 0.14 |
9p13.3 | 4.64 | 0.03 | 0.14 |
fig, axs = subplots(1,2, figsize=(12,4))
violin_plot_pandas(cn.features.ix[('Deletion','5q11.2'), keepers_o].iloc[0],
pct_altered, ax=axs[0])
violin_plot_pandas(cn.features.ix[('Deletion','3p14.2'), keepers_o].iloc[0],
pct_altered, ax=axs[1])
del_3p = cn.features.ix[('Deletion','3p14.2')].iloc[0] < 0
del_3p = del_3p.ix[keepers_o]
del_3p.name = 'del_3p'
old.name = 'age_over_75'
Other chromosomal regions associated with 3p loss.
s = screen_feature(del_3p, fisher_exact_test, cn.features.ix['Deletion'] < 0)
s.head(10)
odds_ratio | p | q | ||
---|---|---|---|---|
3p14.2 | Lesion | inf | 1.01e-50 | 4.83e-49 |
3p14.3 | Lesion | 8976.00 | 9.28e-47 | 2.23e-45 |
3p25.3 | Lesion | 273.00 | 2.03e-34 | 3.26e-33 |
3p12.2 | Lesion | 225.17 | 4.28e-33 | 5.14e-32 |
18q23 | Lesion | 14.72 | 3.40e-13 | 3.26e-12 |
18q21.2 | Lesion | 8.26 | 4.02e-09 | 3.13e-08 |
9p23 | Lesion | 9.21 | 4.83e-09 | 3.13e-08 |
5q11.2 | Lesion | 10.85 | 5.22e-09 | 3.13e-08 |
4p16.3 | Lesion | 11.64 | 1.36e-08 | 7.26e-08 |
5q15 | Lesion | 8.99 | 4.02e-08 | 1.93e-07 |
args = [run.data_path] + [cancer.name]
f = '{}/analyses/{}/CopyNumber_Gistic2/all_lesions.conf_99.txt'.format(*args)
lesions = pd.read_table(f, index_col=0)
lesions = lesions.set_index('Descriptor').select(lambda s: s.startswith('TCGA'), 1)
lesions = lesions.groupby(lambda s: s[:12],1).first()
lesions.index = map(str.strip, lesions.index)
levels = lesions.ix['3p14.2'].ix[2].ix[keepers_o]
calls = lesions.ix['3p14.2'].ix[0].ix[keepers_o]
calls.name = 'del_3p'
#del_3p = lesions.ix['3p14.2'].ix[0] > 0
p53_mut = mut.features.ix['TP53']>0
fig, axs = subplots(1, 2, figsize=(12,4))
levels.hist(bins=50, ax=axs[0])
s = pd.DataFrame({c: log_rank(levels < c, surv) for c in levels.order()[2:-2]})
s.ix['p'].plot(ax=axs[1])
axs[0].set_xlabel('Copy Number')
axs[1].set_xlabel('Copy Number')
axs[1].set_ylabel('p-value')
axs[1].set_yscale('log')
colors = rcParams['axes.color_cycle']
f = '../Extra_Data/FH_HNSC__4_16_all_data_thresholded_by_genes.txt'
gistic = pd.read_table(f, index_col=[2, 1, 0], low_memory=False)
gistic = FH.fix_barcode_columns(gistic, tissue_code='01')
cn_by_gene = gistic
coords = pd.read_csv('../Extra_Data/gene_coords.csv', index_col=0)
#cn_by_gene = FH.get_gistic_gene_matrix(run.data_path, cancer.name)
cn_by_gene = cn_by_gene.reset_index(level=[0,1])
cn_by_gene = cn_by_gene.join(coords).set_index(['chrom','start', 'Cytoband','Locus ID'], append=True)
cn_by_gene = cn_by_gene.ix[:,keepers_o].dropna(axis=1)
ch_3 = cn_by_gene.xs('3', level='chrom')
p_arm = pd.Series(ch_3.index.get_level_values('Cytoband').map(lambda s: 'p' in s), ch_3.index)
p_arm = ch_3[p_arm].sortlevel('start')
p_arm = p_arm.ix[:-1]
#df = rna.df.xs('01', 1, 1).ix[p_arm.index.get_level_values(0)].dropna()#
#df = rna.features.ix['real'].ix[p_arm.index.get_level_values(0)].dropna()
df = rna.df.xs('01', 1, 1).ix[p_arm.index.get_level_values(0)].dropna()
#df = df.ix[screen_feature(del_3p, kruskal_pandas, df).p < .05]
df = df.ix[:,keepers_o].dropna(1)
rr = screen_feature(p53_mut, fisher_exact_test, p_arm < 0)
rr2 = screen_feature(rna.df.ix['TP53'][:,'01'], pearson_pandas, df)
rr2 = screen_feature(rna.df.ix['TP53'][:,'01'], pearson_pandas, df)
rr2 = rr2.ix[df.index].sort_index()
rr2.index = p_arm.sort_index().ix[rr2.index].index
rr2.ix['FHIT']
rho | p | q | |||
---|---|---|---|---|---|
start | Cytoband | Locus ID | |||
59710076 | 3p14.2 | 2272 | 0.27 | 1.16e-05 | 3.27e-04 |
fig = plt.figure(figsize=(8,4))
ax1 = plt.subplot2grid((7,1), (0,0), rowspan=3, colspan=1)
ax2 = plt.subplot2grid((7,1), (3,0), rowspan=3, colspan=1)
ax3 = plt.subplot2grid((7,1), (6,0), rowspan=1, colspan=1)
bands = p_arm.reset_index().groupby('Cytoband')
bands = bands.first().start.order()
starts = p_arm.index.get_level_values('start')
for i,(idx,v) in enumerate(bands.iteritems()):
try:
r = plt.Rectangle((v,0), bands.iloc[i+1]-v, 1, lw=2,
alpha=.5, fc=colors[i%2])
except:
r = plt.Rectangle((v,0), starts[-1] - v, 1, lw=2,
alpha=.5, fc=colors[i%2])
ax3.add_artist(r)
ax3.set_xticks(bands)
b = [{0:s,1:''}[(i%2)] for i,s in enumerate(bands.index)]
ax3.set_xticklabels(b, rotation=270+45, ha='left')
ax1.scatter(starts, (p_arm < -.1).sum(1), s=40, c=colors[2],
edgecolor='grey', lw=.5, alpha=.5)
ax2.scatter(rr2.index.get_level_values('start'), rr2.rho, s=40,
c=colors[4], edgecolor='grey', alpha=1)
#ax1.set_ybound(215,255)
ax1.set_ylabel('# of Patients\nwith Deletion\n\n', ha='center', va='center')
#ax2.set_ybound(-.5,4.)
ax2.set_ylabel('mRNA Survival\nAssociation\n(Log10 p)\n\n\n', ha='center', va='center')
ax3.set_yticks([])
fig.tight_layout()
for ax in [ax1, ax2, ax3]:
ax.set_xbound(starts[0],
starts[-1])
ax.set_xticks(bands)
for n, ss in exp2.ix[['FHIT']].iteritems():
ax2.annotate(n[0], (n[1], ss), xytext=(15, -5),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"))
ax1.set_xticklabels('')
ax2.set_xticklabels('')
#ax2.set_yticks([2,3,4,5,6,7,8])
fig.tight_layout()
#fig.savefig(FIGDIR + 'supp_2a.pdf', transparent=True)
fig = plt.figure(figsize=(8,4))
ax1 = plt.subplot2grid((7,1), (0,0), rowspan=3, colspan=1)
ax2 = plt.subplot2grid((7,1), (3,0), rowspan=3, colspan=1)
ax3 = plt.subplot2grid((7,1), (6,0), rowspan=1, colspan=1)
bands = p_arm.reset_index().groupby('Cytoband')
bands = bands.first().start.order()
starts = p_arm.index.get_level_values('start')
for i,(idx,v) in enumerate(bands.iteritems()):
try:
r = plt.Rectangle((v,0), bands.iloc[i+1]-v, 1, lw=2,
alpha=.5, fc=colors[i%2])
except:
r = plt.Rectangle((v,0), starts[-1] - v, 1, lw=2,
alpha=.5, fc=colors[i%2])
ax3.add_artist(r)
ax3.set_xticks(bands)
b = [{0:s,1:''}[(i%2)] for i,s in enumerate(bands.index)]
ax3.set_xticklabels(b, rotation=270+45, ha='left')
ax1.scatter(starts, (p_arm < -.1).sum(1), s=40, c=colors[2],
edgecolor='grey', lw=.5, alpha=.5)
ax2.scatter(starts, rr.odds_ratio.ix[p_arm.index], s=40,
c=colors[4], edgecolor='grey', alpha=.7)
#ax1.set_ybound(215,255)
ax1.set_ylabel('# of Patients\nwith Deletion\n\n', ha='center', va='center')
#ax2.set_ybound(-.5,4.)
ax2.set_ylabel('mRNA Survival\nAssociation\n(Log10 p)\n\n\n', ha='center', va='center')
ax3.set_yticks([])
fig.tight_layout()
for ax in [ax1, ax2, ax3]:
ax.set_xbound(starts[0],
starts[-1])
ax.set_xticks(bands)
for n, ss in exp2.ix[['FHIT']].iteritems():
ax2.annotate(n[0], (n[1], ss), xytext=(15, -5),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"))
ax1.set_xticklabels('')
ax2.set_xticklabels('')
ax2.set_yticks([2,3,4,5,6,7,8])
fig.tight_layout()
#fig.savefig(FIGDIR + 'supp_2a.pdf', transparent=True)
levels = lesions.ix['3p14.2'].ix[2]
calls = lesions.ix['3p14.2'].ix[0]
calls.name = 'del_3p'
cn_by_gene = FH.get_gistic_gene_matrix(run.data_path, cancer.name)
cn_by_gene = cn_by_gene.reset_index(level=[0,1])
cn_by_gene = cn_by_gene.join(coords).set_index(['chrom','start', 'Cytoband','Locus ID'], append=True)
cn_by_gene = cn_by_gene.ix[:,true_index(hpv==True)].dropna(axis=1)
ch_3 = cn_by_gene.xs('3', level='chrom')
p_arm = pd.Series(ch_3.index.get_level_values('Cytoband').map(lambda s: 'p' in s), ch_3.index)
p_arm = ch_3[p_arm].sortlevel('start')
p_arm = p_arm.ix[:-1] #bad annotation
rna_all = cancer.load_data('mRNASeq')
#df = rna_all.features.ix['real'].ix[p_arm.index.get_level_values(0)].dropna()
df = rna_all.df.xs('01', 1, 1).ix[p_arm.index.get_level_values(0)].dropna()
df = df.ix[:,true_index(hpv.dropna())]
#df = df.ix[screen_feature(del_3p, kruskal_pandas, df).p < .05]
test = lambda s: get_cox_ph_ms(surv, s, [age, old], interactions='just_first')
exp = df.apply(test, axis=1)
exp = exp.sort_index()
exp.index = p_arm.sort_index().ix[exp.index].index
exp2 = -exp.LR.map(log10)
fig = plt.figure(figsize=(8,4))
ax1 = plt.subplot2grid((7,1), (0,0), rowspan=3, colspan=1)
ax2 = plt.subplot2grid((7,1), (3,0), rowspan=3, colspan=1)
ax3 = plt.subplot2grid((7,1), (6,0), rowspan=1, colspan=1)
bands = p_arm.reset_index().groupby('Cytoband')
bands = bands.first().start.order()
starts = p_arm.index.get_level_values('start')
for i,(idx,v) in enumerate(bands.iteritems()):
try:
r = plt.Rectangle((v,0), bands.iloc[i+1]-v, 1, lw=2,
alpha=.5, fc=colors[i%2])
except:
r = plt.Rectangle((v,0), starts[-1] - v, 1, lw=2,
alpha=.5, fc=colors[i%2])
ax3.add_artist(r)
ax3.set_xticks(bands)
b = [{0:s,1:''}[(i%2)] for i,s in enumerate(bands.index)]
ax3.set_xticklabels(b, rotation=270+45, ha='left')
ax1.scatter(starts, (p_arm < -.1).sum(1), s=40, c=colors[2],
edgecolor='grey', alpha=1)
ax2.scatter(exp2.index.get_level_values('start'), exp2, s=40,
c=colors[4], edgecolor='grey', alpha=1)
#ax1.set_ybound(215,255)
ax1.set_ylabel('# of Patients\nwith Deletion\n\n', ha='center', va='center')
ax2.set_ybound(-.5,3)
ax2.set_ylabel('mRNA Survival\nAssociation\n(Log10 p)\n\n\n', ha='center', va='center')
ax3.set_yticks([])
fig.tight_layout()
for ax in [ax1, ax2, ax3]:
ax.set_xbound(starts[0],
starts[-1])
ax.set_xticks(bands)
for n, ss in exp2.order().tail(2).iteritems():
ax2.annotate(n[0], (n[1], ss), xytext=(15, -5),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"))
ax1.set_xticklabels('')
ax2.set_xticklabels('')
ax2.set_yticks([0,1,2,3,4,5])
fig.tight_layout()
fig.savefig(FIGDIR + 'supp_2b.pdf', transparent=True)
t1 = screen_feature(pct_del.ix[keepers_o], rev_kruskal, mut.df.ix[mut.df.ix[:,keepers_o].sum(1) > 5]>0)
t1.head(10)
H | p | q | |
---|---|---|---|
HRAS | 22.17 | 2.49e-06 | 0.00 |
CASP8 | 21.31 | 3.91e-06 | 0.00 |
TP53 | 21.20 | 4.14e-06 | 0.00 |
FBN1 | 12.34 | 4.44e-04 | 0.13 |
COL4A4 | 11.35 | 7.54e-04 | 0.18 |
PRDM9 | 8.64 | 3.28e-03 | 0.42 |
OR8K5 | 8.25 | 4.08e-03 | 0.42 |
PON1 | 8.10 | 4.43e-03 | 0.42 |
AFF3 | 7.93 | 4.85e-03 | 0.42 |
CDH10 | 7.85 | 5.08e-03 | 0.42 |
len(t1)
1202
t1 = screen_feature(pct_altered.ix[keepers_o], rev_kruskal,
cn.features.ix['Deletion'] < 0)
t2 = screen_feature(pct_altered.ix[keepers_o], rev_kruskal,
cn.features.ix['Amplification'] > 0)
del t1['q']
t1['p_bonf'] = t1.p * len(t1)
del t2['q']
t2['p_bonf'] = t2.p * len(t2)
pd.concat([t1.head(6), t2.head(6)], keys=['Deletion', 'Amplification'])
H | p | p_bonf | |||
---|---|---|---|---|---|
Deletion | 5q11.2 | Lesion | 66.52 | 3.47e-16 | 1.66e-14 |
1p13.2 | Lesion | 59.27 | 1.38e-14 | 6.60e-13 | |
3p12.2 | Lesion | 57.34 | 3.66e-14 | 1.76e-12 | |
5q35.3 | Lesion | 57.25 | 3.83e-14 | 1.84e-12 | |
5q15 | Lesion | 56.71 | 5.06e-14 | 2.43e-12 | |
3p14.2 | Lesion | 54.49 | 1.56e-13 | 7.51e-12 | |
Amplification | 12p13.33 | Lesion | 70.64 | 4.28e-17 | 1.11e-15 |
7q21.3 | Lesion | 65.69 | 5.29e-16 | 1.37e-14 | |
17q12 | Lesion | 58.10 | 2.50e-14 | 6.49e-13 | |
2q31.2 | Lesion | 55.93 | 7.50e-14 | 1.95e-12 | |
2q11.2 | Lesion | 55.41 | 9.81e-14 | 2.55e-12 | |
12q15 | Lesion | 49.86 | 1.65e-12 | 4.29e-11 |
len(t2)
26
len(t1)
48
Chromosomal segments associated with TP53 mutation in HPV- pateints.
In text as : "While mutation of TP53 has previously been associated with chromosomal instability, we find that TP53 associates with 3p chromosomal loss far more frequently than it does with deletions in other chromosomal regions with similar fragile sites, suggesting a selective advantage of this combination of events for tumor cells."
t3 = screen_feature(p53_mut.ix[keepers_o], fisher_exact_test,
cn.features.ix['Deletion'] < 0).head(5)
t4 = screen_feature(p53_mut.ix[keepers_o], fisher_exact_test,
cn.features.ix['Amplification'] > 0).head()
pd.concat([t3, t4], keys=['Deletion', 'Amplification'])
odds_ratio | p | q | |||
---|---|---|---|---|---|
Deletion | 3p14.3 | Lesion | 6.59 | 3.56e-07 | 8.55e-06 |
3p14.2 | Lesion | 6.59 | 3.56e-07 | 8.55e-06 | |
3p25.3 | Lesion | 5.49 | 1.81e-06 | 2.89e-05 | |
3p12.2 | Lesion | 5.07 | 6.28e-06 | 7.54e-05 | |
10p15.3 | Lesion | 4.00 | 6.74e-04 | 6.47e-03 | |
Amplification | 3q26.33 | Lesion | 6.35 | 9.00e-08 | 2.34e-06 |
8q24.21 | Lesion | 4.24 | 3.98e-05 | 5.18e-04 | |
12p13.33 | Lesion | 3.21 | 2.63e-03 | 2.28e-02 | |
9p24.1 | Lesion | 0.40 | 8.88e-03 | 5.77e-02 | |
18p11.31 | Lesion | 2.61 | 1.69e-02 | 8.51e-02 |
Association of percentage of genes amplified or deleted TP53 mutation in HPV- pateints.
kruskal_pandas(p53_mut.ix[keepers_o], pct_del)
H 2.12e+01 p 4.14e-06 dtype: float64
fig, axs = subplots(2,1, figsize=(4,6))
p53_mut = mut.features.ix['TP53'].ix[keepers_o].map({0:'TP53_WT', 1:'TP53_mut'})
violin_plot_pandas(p53_mut, pct_del, ax=axs[0])
axs[0].set_ylabel('% of Genes Deleted')
violin_plot_pandas(p53_mut, pct_amp, ax=axs[1])
axs[1].set_ylabel('% of Genes Amplified')
for ax in axs:
prettify_ax(ax)
ax.set_xlabel('')
fig.tight_layout()
fig.savefig(FIGDIR + 'p53_cin_supp.pdf', transparent=True)
violin_plot_pandas(del_3p, pct_altered)
del_3p.name = 'del_3p'
pct_del.name = 'CIN'
fmla = 'Surv(days, event) ~ del_3p + CIN + age_over_75 + age'
m1 = get_cox_ph(surv, covariates=[del_3p, pct_del, old, age], formula=fmla,
print_desc=True, interactions=False);
coef exp(coef) se(coef) z p del_3p 0.5015 1.651 0.160 3.136 0.0017 CIN 0.0634 1.065 0.119 0.531 0.6000 age_over_75 0.3120 1.366 0.119 2.629 0.0086 age -0.0375 0.963 0.136 -0.276 0.7800 Likelihood ratio test=26.4 on 4 df, p=2.57e-05 n= 250, number of events= 102
fmla = 'Surv(days, event) ~ CIN + age_over_75 + age'
m2 = get_cox_ph(surv, covariates=[del_3p, pct_del, old, age], formula=fmla,
print_desc=True, interactions=False);
coef exp(coef) se(coef) z p CIN 0.2833 1.328 0.0969 2.925 0.0034 age_over_75 0.2618 1.299 0.1168 2.241 0.0250 age -0.0352 0.965 0.1329 -0.265 0.7900 Likelihood ratio test=14.3 on 3 df, p=0.00253 n= 250, number of events= 102
LR_test(m1, m2)
0.00048909784102555808
del_3p.name = 'del_3p'
p53_mut = mut.features.ix['TP53'].ix[keepers_o]
fmla = 'Surv(days, event) ~ del_3p + CIN + age_over_75 + age'
m1 = get_cox_ph(surv, covariates=[del_3p.ix[ti(p53_mut==1)], pct_del, old, age], formula=fmla,
print_desc=True, interactions=False);
coef exp(coef) se(coef) z p del_3p 0.4081 1.504 0.157 2.599 0.0094 CIN -0.0794 0.924 0.128 -0.619 0.5400 age_over_75 0.2561 1.292 0.125 2.050 0.0400 age -0.0115 0.989 0.137 -0.084 0.9300 Likelihood ratio test=13.8 on 4 df, p=0.00779 n= 202, number of events= 92
fmla = 'Surv(days, event) ~ CIN + age_over_75 + age'
m2 = get_cox_ph(surv, covariates=[del_3p.ix[ti(p53_mut==1)], pct_del, old, age], formula=fmla,
print_desc=True, interactions=False);
coef exp(coef) se(coef) z p CIN 0.0836 1.087 0.110 0.760 0.450 age_over_75 0.2179 1.243 0.123 1.778 0.075 age -0.0148 0.985 0.132 -0.112 0.910 Likelihood ratio test=4.85 on 3 df, p=0.183 n= 202, number of events= 92
LR_test(m1, m2)
0.0027052080739890017
fmla = 'Surv(days, event) ~ del_3p + CIN + age_over_75 + age'
m1 = get_cox_ph(surv, covariates=[del_3p.ix[ti(p53_mut==0)], pct_del, old, age], formula=fmla,
print_desc=True, interactions=False);
coef exp(coef) se(coef) z p del_3p -0.663 0.515 0.753 -0.881 0.38 CIN 1.435 4.201 0.559 2.569 0.01 age_over_75 0.800 2.226 0.425 1.883 0.06 age -0.546 0.579 0.559 -0.977 0.33 Likelihood ratio test=14.7 on 4 df, p=0.00529 n= 48, number of events= 10
fmla = 'Surv(days, event) ~ CIN + age_over_75 + age'
m2 = get_cox_ph(surv, covariates=[del_3p.ix[ti(p53_mut==0)], pct_del, old, age], formula=fmla,
print_desc=True, interactions=False);
coef exp(coef) se(coef) z p CIN 1.054 2.870 0.323 3.268 0.0011 age_over_75 0.896 2.451 0.418 2.143 0.0320 age -0.486 0.615 0.559 -0.869 0.3800 Likelihood ratio test=13.9 on 3 df, p=0.00305 n= 48, number of events= 10
LR_test(m1, m2)
0.36082942654866224
pct_del.name = 'CIN'
fig, axs = subplots(3, 1, figsize=(3.5,3.5), sharey=True, sharex=False)
fmla = 'Surv(days, event) ~ del_3p + CIN + age_over_75 + age'
pts = [keepers_o, ti(p53_mut==1), ti(p53_mut==0)]
color_list = ['grey', '#a1d99b','#9ecae1']
for i, pt in enumerate(pts):
ax = axs[i]
m1 = get_cox_ph(surv, covariates=[del_3p.ix[pt], pct_del, old, age], formula=fmla,
print_desc=False, interactions=False);
ci = convert_robj(robjects.r.summary(m1)[7])
ci = ci.ix[['del_3p','CIN']]
ci.index = ['3p', 'CIN']
ci = ci.ix[['CIN', '3p']]
haz = ci['exp(coef)']
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color=color_list[i],
edgecolors=['black'], zorder=10)
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color=color_list[i])
ax.axvline(1, ls='--', color='black')
ax.set_xscale('log')
ax.set_ybound(-.5,1.5)
ax.set_xticks([.25,.5, .8, 1, 1.5, 2,4, 8])
ax.set_xticklabels([.25,.5, .8, 1, 1.5, 2, 4, 8])
ax.set_yticks([0,1])
ax.set_yticklabels(['CIN', '3p'])
prettify_ax(ax)
axs[0].set_xbound(.66,2.5)
axs[1].set_xbound(.66,2.5)
axs[2].set_xbound(.25,20)
axs[2].set_xticks([.5, 1, 2,4, 8])
axs[2].set_xticklabels([.5, 1, 2, 4, 8])
axs[2].set_xlabel('Hazard Ratio')
fig.tight_layout()
fig.savefig(FIGDIR + 'fig2c.pdf', transparent=True)
df1 = pd.concat([del_3p, pct_del, old, age, surv[:,'days'], surv[:,'event']],
keys=['3p','CIN','age_over_75','age', 'Days to Death/Censoring', 'Death Indicator'],
axis=1).dropna()
df1.to_csv(FIGDIR + 'fig2c_all.csv')
df1.ix[ti(p53_mut>0)].to_csv(FIGDIR + 'fig2c_TP53_mut.csv')
df1.ix[ti(p53_mut==0)].to_csv(FIGDIR + 'fig2c_TP53_WT.csv')
fig, axs = subplots(3, 1, figsize=(3.5,4.5), sharey=True, sharex=True)
fmla = 'Surv(days, event) ~ del_3p + CIN + age_over_75'
fmla2 = 'Surv(days, event) ~ CIN + age_over_75'
pts = [keepers_o, ti(p53_mut==1), ti(p53_mut==0)]
color_list = ['grey', '#a1d99b','#9ecae1']
for i, pt in enumerate(pts):
ax = axs[i]
m1 = get_cox_ph(surv, covariates=[del_3p.ix[pt], pct_del, old, age], formula=fmla,
print_desc=False, interactions=False);
m2 = get_cox_ph(surv, covariates=[del_3p.ix[pt], pct_del, old, age], formula=fmla2,
print_desc=False, interactions=False);
print LR_test(m1, m2)
ci = convert_robj(robjects.r.summary(m1)[7])
#ci.index = ['3p14.2', 'CIN', 'age > 75', 'age','year','spread']
#ci = ci.ix[['year', 'spread', 'age','age > 75', 'CIN', '3p14.2']]
haz = ci['exp(coef)']
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color=color_list[i],
edgecolors=['black'], zorder=10)
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color=color_list[i])
ax.axvline(1, ls='--', color='black')
ax.set_xscale('log')
ax.set_xbound(.5,3.5)
ax.set_ybound(-.5,len(ci.index) - .5)
ax.set_xticks([.67, 1, 1.5, 2,3])
ax.set_xticklabels([.67, 1, 1.5, 2, 3])
ax.set_yticks(range(len(ci.index)))
ax.set_yticklabels(ci.index)
prettify_ax(ax)
axs[2].set_xlabel('Hazard Ratio')
fig.tight_layout()
fig.savefig(FIGDIR + 'fig2c_alt.pdf', transparent=True)
0.00049071280384 0.00269718222184 0.418877206975
Even more significant in the presence of confounding variables such as age and year of diagnosis.
year = clinical.binary_df.ix['pre_2000']
year.name = 'year'
fig, axs = subplots(3, 1, figsize=(3.5,4.5), sharey=True, sharex=True)
fmla = 'Surv(days, event) ~ del_3p + CIN + age_over_75 + age + year'
fmla2 = 'Surv(days, event) ~ CIN + age_over_75 + age + year'
pts = [keepers_o, ti(p53_mut==1), ti(p53_mut==0)]
color_list = ['grey', '#a1d99b','#9ecae1']
for i, pt in enumerate(pts):
ax = axs[i]
m1 = get_cox_ph(surv, covariates=[del_3p.ix[pt], pct_del, old, age,
year == False], formula=fmla,
print_desc=False, interactions=False);
m2 = get_cox_ph(surv, covariates=[del_3p.ix[pt], pct_del, old, age,
year == False], formula=fmla2,
print_desc=False, interactions=False);
print LR_test(m1, m2)
ci = convert_robj(robjects.r.summary(m1)[7])
#ci.index = ['3p14.2', 'CIN', 'age > 75', 'age','year','spread']
#ci = ci.ix[['year', 'spread', 'age','age > 75', 'CIN', '3p14.2']]
haz = ci['exp(coef)']
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color=color_list[i],
edgecolors=['black'], zorder=10)
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color=color_list[i])
ax.axvline(1, ls='--', color='black')
ax.set_xscale('log')
ax.set_xbound(.5,3.5)
ax.set_ybound(-.5,len(ci.index) - .5)
ax.set_xticks([.67, 1, 1.5, 2,3])
ax.set_xticklabels([.67, 1, 1.5, 2, 3])
ax.set_yticks(range(len(ci.index)))
ax.set_yticklabels(ci.index)
prettify_ax(ax)
axs[2].set_xlabel('Hazard Ratio')
fig.tight_layout()
fig.savefig(FIGDIR + 'fig2c_alt.pdf', transparent=True)
0.000985887091617 0.00265660814229 0.286580014929