%pylab inline
Populating the interactive namespace from numpy and matplotlib
cd ../src
/cellar/users/agross/TCGA_Code/TCGA/src
from Processing.Imports import *
import Data.Firehose as FH
params = pd.read_table('../global_params.txt', header=None, squeeze=True,
index_col=0)
run_path = '{}/Firehose__{}/'.format(params.ix['OUT_PATH'], params.ix['RUN_DATE'])
run = get_run(run_path, 'Run_' + params.ix['VERSION'])
cancer = run.load_cancer(params.ix['CANCER'])
clinical = cancer.load_clinical()
mut = cancer.load_data('Mutation')
mut.uncompress()
cn = cancer.load_data('CN_broad')
cn.uncompress()
rna = pickle.load(open(cancer.path + '/mRNASeq/store/no_hpv.p', 'rb'))
mirna = pickle.load(open(cancer.path + '/miRNASeq/store/no_hpv.p', 'rb'))
clinical_processed = clinical.processed
clinical_processed['year'] = clinical_processed.year == 'post_2000'
hpv_inferred = clinical_processed.hpv_inferred
surv = clinical.survival.survival_5y
age = clinical.clinical.age.astype(float)
old = pd.Series(1.*(age>=75), name='old')
keepers_o = true_index(hpv_inferred == 0)
keepers_o = keepers_o.intersection(mut.features.columns)
keepers_o = keepers_o.intersection(cn.features.columns)
keepers_o = keepers_o.intersection(surv.unstack().index)
keepers_o = keepers_o.intersection(rna.features.columns)
keepers_o = keepers_o.intersection(mirna.features.columns)
keepers_o = keepers_o.intersection(true_index(age < 85))
cn_by_gene = FH.get_gistic_gene_matrix(run.data_path, cancer.name)
cn.features.index = cn.features.index.droplevel(2)
cin = ((cn.features.ix['Amplification'] > 0).sum() +
(cn.features.ix['Deletion'] > 0).sum())
cin.name = 'Chromosomal Instability'
pct_del = (cn_by_gene < 0).sum() / (1.*len(cn_by_gene))
pct_amp = (cn_by_gene > 0).sum() / (1.*len(cn_by_gene))
pct_altered = pct_amp + pct_del
pct_altered.name = 'CIN_pct_altered'
draw_survival_curves(pct_altered, surv, hpv_inferred.dropna())
violin_plot_pandas(hpv_inferred, pct_altered)
df = mut.df.ix[(mut.df.sum(1).order() > 5)]
tt = lambda a,b: kruskal_pandas(b,a)
s = screen_feature(pct_altered, tt, df.clip_upper(1.))
s.head(10)
H | p | q | |
---|---|---|---|
TP53 | 42.99 | 5.50e-11 | 9.27e-08 |
CASP8 | 23.59 | 1.19e-06 | 1.00e-03 |
HRAS | 19.55 | 9.82e-06 | 5.52e-03 |
PRDM9 | 12.99 | 3.14e-04 | 1.32e-01 |
FAM83G | 10.96 | 9.32e-04 | 3.00e-01 |
PIK3CA | 10.71 | 1.07e-03 | 3.00e-01 |
DOPEY2 | 10.34 | 1.30e-03 | 3.13e-01 |
PCLO | 9.91 | 1.64e-03 | 3.47e-01 |
SPEF2 | 9.60 | 1.95e-03 | 3.65e-01 |
SNAPC4 | 9.33 | 2.25e-03 | 3.80e-01 |
10 rows × 3 columns
df = cn.features.ix['Deletion']
df.index = df.index.get_level_values(0)
screen_feature(pct_altered, rev_kruskal, df < 0).head()
H | p | q | |
---|---|---|---|
3p12.2 | 136.59 | 1.48e-31 | 7.11e-30 |
3p14.2 | 134.35 | 4.58e-31 | 1.10e-29 |
5q11.2 | 126.61 | 2.26e-29 | 3.62e-28 |
3p14.3 | 122.84 | 1.51e-28 | 1.82e-27 |
3p25.3 | 113.45 | 1.72e-26 | 1.65e-25 |
5 rows × 3 columns
df = cn.features.ix['Amplification']
df.index = df.index.get_level_values(0)
screen_feature(pct_altered, tt, df > 0).head()
H | p | q | |
---|---|---|---|
7q21.3 | 116.09 | 4.55e-27 | 1.18e-25 |
3q26.33 | 112.63 | 2.61e-26 | 3.39e-25 |
12p13.33 | 109.82 | 1.07e-25 | 9.31e-25 |
2q11.2 | 104.18 | 1.84e-24 | 1.20e-23 |
2q31.2 | 98.44 | 3.36e-23 | 1.75e-22 |
5 rows × 3 columns
fig, axs = subplots(1,2, figsize=(12,4))
violin_plot_pandas(cn.features.ix[('Deletion','5q11.2')], pct_altered, ax=axs[0])
violin_plot_pandas(cn.features.ix[('Deletion','3p14.2')], pct_altered, ax=axs[1])
del_3p = cn.features.ix[('Deletion','3p14.2')] < 0
del_3p = del_3p.ix[keepers_o]
del_3p.name = 'del_3p'
old.name = 'age_over_75'
Other chromosomal regions associated with 3p loss.
s = screen_feature(del_3p, fisher_exact_test, cn.features.ix['Deletion'] < 0)
s.head(10)
odds_ratio | p | q | |
---|---|---|---|
3p14.2 | inf | 8.26e-51 | 3.96e-49 |
3p14.3 | 9020.00 | 7.65e-47 | 1.84e-45 |
3p25.3 | 248.18 | 8.05e-34 | 1.29e-32 |
3p12.2 | 226.33 | 3.57e-33 | 4.28e-32 |
18q23 | 14.43 | 4.86e-13 | 4.66e-12 |
18q21.2 | 8.13 | 4.84e-09 | 3.53e-08 |
9p23 | 9.09 | 5.70e-09 | 3.53e-08 |
5q11.2 | 10.73 | 5.88e-09 | 3.53e-08 |
4p16.3 | 11.52 | 1.50e-08 | 8.00e-08 |
5q15 | 8.89 | 4.12e-08 | 1.98e-07 |
10 rows × 3 columns
args = [run.data_path] + [cancer.name]
f = '{}/analyses/{}/CopyNumber_Gistic2/all_lesions.conf_99.txt'.format(*args)
lesions = pd.read_table(f, index_col=0)
lesions = lesions.set_index('Descriptor').select(lambda s: s.startswith('TCGA'), 1)
lesions = lesions.groupby(lambda s: s[:12],1).first()
lesions.index = map(str.strip, lesions.index)
levels = lesions.ix['3p14.2'].ix[2].ix[keepers_o]
calls = lesions.ix['3p14.2'].ix[0].ix[keepers_o]
calls.name = 'del_3p'
args = [run.data_path] + [cancer.name]
f = '{}/analyses/{}/CopyNumber_Gistic2/broad_values_by_arm.txt'.format(*args)
arm = pd.read_table(f, index_col=0)
arm = arm.groupby(lambda s: s[:12],1).first()
p53_mut = mut.features.ix['TP53'] > 0
fig, axs = subplots(1, 2, figsize=(12,4))
levels.hist(bins=50, ax=axs[0])
s = pd.DataFrame({c: log_rank(levels < c, surv) for c in levels.order()[2:-2]})
s.ix['p'].plot(ax=axs[1])
axs[0].set_xlabel('Copy Number')
axs[1].set_xlabel('Copy Number')
axs[1].set_ylabel('p-value')
axs[1].set_yscale('log')
colors = rcParams['axes.color_cycle']
coords = pd.read_csv('../Extra_Data/gene_coords.csv', index_col=0)
cn_by_gene = FH.get_gistic_gene_matrix(run.data_path, cancer.name)
cn_by_gene = cn_by_gene.reset_index(level=[0,1])
cn_by_gene = cn_by_gene.join(coords).set_index(['chrom','start', 'Cytoband','Locus ID'], append=True)
cn_by_gene = cn_by_gene.ix[:,keepers_o].dropna(axis=1)
ch_3 = cn_by_gene.xs('3', level='chrom')
p_arm = pd.Series(ch_3.index.get_level_values('Cytoband').map(lambda s: 'p' in s), ch_3.index)
p_arm = ch_3[p_arm].sortlevel('start')
p_arm = p_arm.ix[:-1]
#df = rna.df.xs('01', 1, 1).ix[p_arm.index.get_level_values(0)].dropna()#
#df = rna.features.ix['real'].ix[p_arm.index.get_level_values(0)].dropna()
df = rna.df.xs('01', 1, 1).ix[p_arm.index.get_level_values(0)].dropna()
#df = df.ix[screen_feature(del_3p, kruskal_pandas, df).p < .05]
df = df.ix[:,keepers_o].dropna(1)
test = lambda s: get_cox_ph_ms(surv, s, [age, old], interactions='just_first')
exp = df.apply(test, axis=1)
exp = exp.sort_index()
exp.index = p_arm.sort_index().ix[exp.index].index
exp2 = -exp.LR.map(log10)
fig = plt.figure(figsize=(8,4))
ax1 = plt.subplot2grid((7,1), (0,0), rowspan=3, colspan=1)
ax2 = plt.subplot2grid((7,1), (3,0), rowspan=3, colspan=1)
ax3 = plt.subplot2grid((7,1), (6,0), rowspan=1, colspan=1)
bands = p_arm.reset_index().groupby('Cytoband')
bands = bands.first().start.order()
starts = p_arm.index.get_level_values('start')
for i,(idx,v) in enumerate(bands.iteritems()):
try:
r = plt.Rectangle((v,0), bands.iloc[i+1]-v, 1, lw=2,
alpha=.5, fc=colors[i%2])
except:
r = plt.Rectangle((v,0), starts[-1] - v, 1, lw=2,
alpha=.5, fc=colors[i%2])
ax3.add_artist(r)
ax3.set_xticks(bands)
b = [{0:s,1:''}[(i%2)] for i,s in enumerate(bands.index)]
ax3.set_xticklabels(b, rotation=270+45, ha='left')
ax1.scatter(starts, (p_arm < -.1).sum(1), s=40, c=colors[2],
edgecolor='grey', lw=.5, alpha=.5)
ax2.scatter(exp2.index.get_level_values('start'), exp2, s=40,
c=colors[4], edgecolor='grey', alpha=.7)
#ax1.set_ybound(215,255)
ax1.set_ylabel('# of Patients\nwith Deletion\n\n', ha='center', va='center')
ax2.set_ybound(-.5,4.)
ax2.set_ylabel('mRNA Survival\nAssociation\n(Log10 p)\n\n\n', ha='center', va='center')
ax3.set_yticks([])
fig.tight_layout()
for ax in [ax1, ax2, ax3]:
ax.set_xbound(starts[0],
starts[-1])
ax.set_xticks(bands)
for n, ss in exp2.order().tail(2).iteritems():
ax2.annotate(n[0], (n[1], ss), xytext=(15, -5),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"))
ax1.set_xticklabels('')
ax2.set_xticklabels('')
ax2.set_yticks([0,1,2,3,4,5])
fig.tight_layout()
#fig.savefig('/cellar/users/agross/figures/supp_2a.pdf', transparent=True)
levels = lesions.ix['3p14.2'].ix[2]
calls = lesions.ix['3p14.2'].ix[0]
calls.name = 'del_3p'
cn_by_gene = FH.get_gistic_gene_matrix(run.data_path, cancer.name)
cn_by_gene = cn_by_gene.reset_index(level=[0,1])
cn_by_gene = cn_by_gene.join(coords).set_index(['chrom','start', 'Cytoband','Locus ID'], append=True)
cn_by_gene = cn_by_gene.ix[:,true_index(hpv_inferred==True)].dropna(axis=1)
ch_3 = cn_by_gene.xs('3', level='chrom')
p_arm = pd.Series(ch_3.index.get_level_values('Cytoband').map(lambda s: 'p' in s), ch_3.index)
p_arm = ch_3[p_arm].sortlevel('start')
p_arm = p_arm.ix[:-1] #bad annotation
rna_all = cancer.load_data('mRNASeq')
#df = rna_all.features.ix['real'].ix[p_arm.index.get_level_values(0)].dropna()
df = rna_all.df.xs('01', 1, 1).ix[p_arm.index.get_level_values(0)].dropna()
df = df.ix[:,true_index(hpv_inferred.dropna())]
#df = df.ix[screen_feature(del_3p, kruskal_pandas, df).p < .05]
test = lambda s: get_cox_ph_ms(surv, s, [age, old], interactions='just_first')
exp = df.apply(test, axis=1)
exp = exp.sort_index()
exp.index = p_arm.sort_index().ix[exp.index].index
exp2 = -exp.LR.map(log10)
fig = plt.figure(figsize=(8,4))
ax1 = plt.subplot2grid((7,1), (0,0), rowspan=3, colspan=1)
ax2 = plt.subplot2grid((7,1), (3,0), rowspan=3, colspan=1)
ax3 = plt.subplot2grid((7,1), (6,0), rowspan=1, colspan=1)
bands = p_arm.reset_index().groupby('Cytoband')
bands = bands.first().start.order()
starts = p_arm.index.get_level_values('start')
for i,(idx,v) in enumerate(bands.iteritems()):
try:
r = plt.Rectangle((v,0), bands.iloc[i+1]-v, 1, lw=2,
alpha=.5, fc=colors[i%2])
except:
r = plt.Rectangle((v,0), starts[-1] - v, 1, lw=2,
alpha=.5, fc=colors[i%2])
ax3.add_artist(r)
ax3.set_xticks(bands)
b = [{0:s,1:''}[(i%2)] for i,s in enumerate(bands.index)]
ax3.set_xticklabels(b, rotation=270+45, ha='left')
ax1.scatter(starts, (p_arm < -.1).sum(1), s=40, c=colors[2],
edgecolor='grey', alpha=1)
ax2.scatter(exp2.index.get_level_values('start'), exp2, s=40,
c=colors[4], edgecolor='grey', alpha=1)
#ax1.set_ybound(215,255)
ax1.set_ylabel('# of Patients\nwith Deletion\n\n', ha='center', va='center')
ax2.set_ybound(-.5,3)
ax2.set_ylabel('mRNA Survival\nAssociation\n(Log10 p)\n\n\n', ha='center', va='center')
ax3.set_yticks([])
fig.tight_layout()
for ax in [ax1, ax2, ax3]:
ax.set_xbound(starts[0],
starts[-1])
ax.set_xticks(bands)
for n, ss in exp2.order().tail(2).iteritems():
ax2.annotate(n[0], (n[1], ss), xytext=(15, -5),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"))
ax1.set_xticklabels('')
ax2.set_xticklabels('')
ax2.set_yticks([0,1,2,3,4,5,6,7])
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/supp_2b.pdf', transparent=True)
t1 = screen_feature(pct_altered.ix[keepers_o], rev_kruskal, mut.df.ix[mut.df.ix[:,keepers_o].sum(1) > 5])
t1.head(10)
H | p | q | |
---|---|---|---|
CASP8 | 22.75 | 1.85e-06 | 0.00 |
HRAS | 18.21 | 1.11e-04 | 0.07 |
TP53 | 16.86 | 2.18e-04 | 0.09 |
HERC2 | 10.02 | 1.55e-03 | 0.47 |
TGFBR2 | 11.88 | 2.63e-03 | 0.63 |
CCDC129 | 8.16 | 4.27e-03 | 0.71 |
ZFHX3 | 8.01 | 4.64e-03 | 0.71 |
COL4A4 | 7.98 | 4.73e-03 | 0.71 |
PIK3CA | 7.47 | 6.28e-03 | 0.77 |
AKAP11 | 7.40 | 6.52e-03 | 0.77 |
10 rows × 3 columns
t1 = screen_feature(pct_altered.ix[keepers_o], rev_kruskal,
cn.features.ix['Deletion'] < 0)
t2 = screen_feature(pct_altered.ix[keepers_o], rev_kruskal,
cn.features.ix['Amplification'] > 0)
del t1['q']
t1['p_bonf'] = t1.p * len(t1)
del t2['q']
t2['p_bonf'] = t2.p * len(t2)
pd.concat([t1.head(6), t2.head(6)], keys=['Deletion', 'Amplification'])
H | p | p_bonf | ||
---|---|---|---|---|
Deletion | 5q11.2 | 67.04 | 2.66e-16 | 1.28e-14 |
1p13.2 | 59.09 | 1.51e-14 | 7.24e-13 | |
5q15 | 57.26 | 3.81e-14 | 1.83e-12 | |
3p12.2 | 56.70 | 5.08e-14 | 2.44e-12 | |
5q35.3 | 56.19 | 6.58e-14 | 3.16e-12 | |
3p14.2 | 54.33 | 1.70e-13 | 8.16e-12 | |
Amplification | 12p13.33 | 70.95 | 3.67e-17 | 9.53e-16 |
7q21.3 | 66.42 | 3.65e-16 | 9.49e-15 | |
17q12 | 58.23 | 2.33e-14 | 6.06e-13 | |
2q31.2 | 54.03 | 1.97e-13 | 5.13e-12 | |
2q11.2 | 53.07 | 3.22e-13 | 8.37e-12 | |
12q15 | 50.28 | 1.33e-12 | 3.46e-11 |
12 rows × 3 columns
Chromosomal segments associated with TP53 mutation in HPV- pateints.
In text as : "While mutation of TP53 has previously been associated with chromosomal instability, we find that TP53 associates with 3p chromosomal loss far more frequently than it does with deletions in other chromosomal regions with similar fragile sites, suggesting a selective advantage of this combination of events for tumor cells."
t3 = screen_feature(p53_mut.ix[keepers_o], fisher_exact_test,
cn.features.ix['Deletion'] < 0).head(5)
t4 = screen_feature(p53_mut.ix[keepers_o], fisher_exact_test,
cn.features.ix['Amplification'] > 0).head()
pd.concat([t3, t4], keys=['Deletion', 'Amplification'])
odds_ratio | p | q | ||
---|---|---|---|---|
Deletion | 3p14.3 | 6.34 | 5.34e-07 | 1.24e-05 |
3p14.2 | 6.34 | 5.34e-07 | 1.24e-05 | |
3p25.3 | 5.73 | 7.73e-07 | 1.24e-05 | |
3p12.2 | 4.88 | 7.98e-06 | 9.58e-05 | |
10p15.3 | 4.10 | 4.17e-04 | 4.01e-03 | |
Amplification | 3q26.33 | 6.06 | 1.53e-07 | 3.98e-06 |
8q24.21 | 4.08 | 5.14e-05 | 6.68e-04 | |
12p13.33 | 3.30 | 1.68e-03 | 1.45e-02 | |
18p11.31 | 2.68 | 1.14e-02 | 7.39e-02 | |
9p24.1 | 0.41 | 1.48e-02 | 7.68e-02 |
10 rows × 3 columns
Association of percentage of genes amplified or deleted TP53 mutation in HPV- pateints.
fig, axs = subplots(2,1, figsize=(4,6))
p53_mut = mut.features.ix['TP53'].ix[keepers_o].map({0:'TP53_WT', 1:'TP53_mut'})
violin_plot_pandas(p53_mut, pct_del, ax=axs[0])
axs[0].set_ylabel('% of Genes Deleted')
violin_plot_pandas(p53_mut, pct_amp, ax=axs[1])
axs[1].set_ylabel('% of Genes Amplified')
for ax in axs:
prettify_ax(ax)
ax.set_xlabel('')
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/p53_cin_supp.pdf', transparent=True)
violin_plot_pandas(del_3p, pct_altered)
del_3p.name = 'del_3p'
fmla = 'Surv(days, event) ~ del_3p + CIN_pct_altered'
m1 = get_cox_ph(surv, covariates=[del_3p, pct_altered, old, age], formula=fmla,
print_desc=True, interactions=False);
coef exp(coef) se(coef) z p del_3p 0.4899 1.632 0.155 3.164 0.0016 CIN_pct_altered -0.0117 0.988 0.113 -0.103 0.9200 Likelihood ratio test=16.5 on 2 df, p=0.000256 n= 251, number of events= 102
fmla = 'Surv(days, event) ~ CIN_pct_altered'
m2 = get_cox_ph(surv, covariates=[del_3p, pct_altered], formula=fmla,
print_desc=True, interactions=False);
coef exp(coef) se(coef) z p CIN_pct_altered 0.187 1.21 0.0948 1.98 0.048 Likelihood ratio test=3.94 on 1 df, p=0.0471 n= 251, number of events= 102
LR_test(m1, m2)
0.00038631550656680378
del_3p.name = 'del_3p'
p53_mut = mut.features.ix['TP53'].ix[keepers_o]
fmla = 'Surv(days, event) ~ del_3p + CIN_pct_altered'
m1 = get_cox_ph(surv, covariates=[del_3p.ix[ti(p53_mut==1)], pct_altered], formula=fmla,
print_desc=True, interactions=False);
coef exp(coef) se(coef) z p del_3p 0.3788 1.460 0.154 2.463 0.014 CIN_pct_altered -0.0823 0.921 0.117 -0.701 0.480 Likelihood ratio test=8.28 on 2 df, p=0.0159 n= 202, number of events= 92
fmla = 'Surv(days, event) ~ CIN_pct_altered'
m2 = get_cox_ph(surv, covariates=[del_3p.ix[ti(p53_mut==1)], pct_altered], formula=fmla,
print_desc=True, interactions=False);
coef exp(coef) se(coef) z p CIN_pct_altered 0.0457 1.05 0.104 0.437 0.66 Likelihood ratio test=0.19 on 1 df, p=0.662 n= 202, number of events= 92
LR_test(m1, m2)
0.004454787751949814
fmla = 'Surv(days, event) ~ del_3p + CIN_pct_altered'
m1 = get_cox_ph(surv, covariates=[del_3p.ix[ti(p53_mut==0)], pct_altered], formula=fmla,
print_desc=True, interactions=False);
coef exp(coef) se(coef) z p del_3p -0.0572 0.944 0.522 -0.109 0.91 CIN_pct_altered 0.5815 1.789 0.480 1.213 0.23 Likelihood ratio test=3 on 2 df, p=0.223 n= 49, number of events= 10
fmla = 'Surv(days, event) ~ CIN_pct_altered'
m2 = get_cox_ph(surv, covariates=[del_3p.ix[ti(p53_mut==0)], pct_altered], formula=fmla,
print_desc=True, interactions=False);
coef exp(coef) se(coef) z p CIN_pct_altered 0.543 1.72 0.321 1.69 0.09 Likelihood ratio test=2.99 on 1 df, p=0.0839 n= 49, number of events= 10
LR_test(m1, m2)
0.91280932948862625
fig, axs = subplots(3, 1, figsize=(3.5,3.5), sharey=True, sharex=True)
fmla = 'Surv(days, event) ~ del_3p + CIN_pct_altered'
pts = [keepers_o, ti(p53_mut==1), ti(p53_mut==0)]
color_list = ['grey', '#a1d99b','#9ecae1']
for i, pt in enumerate(pts):
ax = axs[i]
m1 = get_cox_ph(surv, covariates=[del_3p.ix[pt], pct_altered], formula=fmla,
print_desc=False, interactions=False);
ci = convert_robj(robjects.r.summary(m1)[7])
ci.index = ['3p14.2', 'CIN']
ci = ci.ix[['CIN', '3p14.2']]
haz = ci['exp(coef)']
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color=color_list[i],
edgecolors=['black'], zorder=10)
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color=color_list[i])
ax.axvline(1, ls='--', color='black')
ax.set_xscale('log')
ax.set_xbound(.6,2.5)
ax.set_ybound(-.5,1.5)
ax.set_xticks([.67, .8, 1, 1.25, 1.5, 2])
ax.set_xticklabels([.67, .8, 1, 1.25, 1.5, 2])
ax.set_yticks([0,1])
ax.set_yticklabels(['CIN', '3p14.2'])
prettify_ax(ax)
axs[2].set_xlabel('Hazard Ratio')
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/fig2c.pdf', transparent=True)
fig, axs = subplots(3, 1, figsize=(3.5,4.5), sharey=True, sharex=True)
fmla = 'Surv(days, event) ~ del_3p + CIN_pct_altered + age_over_75'
fmla2 = 'Surv(days, event) ~ CIN_pct_altered + age_over_75'
pts = [keepers_o, ti(p53_mut==1), ti(p53_mut==0)]
color_list = ['grey', '#a1d99b','#9ecae1']
for i, pt in enumerate(pts):
ax = axs[i]
m1 = get_cox_ph(surv, covariates=[del_3p.ix[pt], pct_altered, old, age,
clinical_processed.year == False,
clinical_processed.spread_inferred], formula=fmla,
print_desc=False, interactions=False);
m2 = get_cox_ph(surv, covariates=[del_3p.ix[pt], pct_altered, old, age,
clinical_processed.year == False,
clinical_processed.spread_inferred], formula=fmla2,
print_desc=False, interactions=False);
print LR_test(m1, m2)
ci = convert_robj(robjects.r.summary(m1)[7])
#ci.index = ['3p14.2', 'CIN', 'age > 75', 'age','year','spread']
#ci = ci.ix[['year', 'spread', 'age','age > 75', 'CIN', '3p14.2']]
haz = ci['exp(coef)']
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color=color_list[i],
edgecolors=['black'], zorder=10)
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color=color_list[i])
ax.axvline(1, ls='--', color='black')
ax.set_xscale('log')
ax.set_xbound(.5,3.5)
ax.set_ybound(-.5,len(ci.index) - .5)
ax.set_xticks([.67, 1, 1.5, 2,3])
ax.set_xticklabels([.67, 1, 1.5, 2, 3])
ax.set_yticks(range(len(ci.index)))
ax.set_yticklabels(ci.index)
prettify_ax(ax)
axs[2].set_xlabel('Hazard Ratio')
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/fig2c_alt.pdf', transparent=True)
0.000114383181974 0.00229895545575 0.753373464534
Even more significant in the presence of confounding variables such as age and year of diagnosis.
fig, axs = subplots(3, 1, figsize=(3.5,4.5), sharey=True, sharex=True)
fmla = 'Surv(days, event) ~ del_3p + CIN_pct_altered + age_over_75 + age + year'
fmla2 = 'Surv(days, event) ~ CIN_pct_altered + age_over_75 + age + year'
pts = [keepers_o, ti(p53_mut==1), ti(p53_mut==0)]
color_list = ['grey', '#a1d99b','#9ecae1']
for i, pt in enumerate(pts):
ax = axs[i]
m1 = get_cox_ph(surv, covariates=[del_3p.ix[pt], pct_altered, old, age,
clinical_processed.year == False,
clinical_processed.spread_inferred], formula=fmla,
print_desc=False, interactions=False);
m2 = get_cox_ph(surv, covariates=[del_3p.ix[pt], pct_altered, old, age,
clinical_processed.year == False,
clinical_processed.spread_inferred], formula=fmla2,
print_desc=False, interactions=False);
print LR_test(m1, m2)
ci = convert_robj(robjects.r.summary(m1)[7])
#ci.index = ['3p14.2', 'CIN', 'age > 75', 'age','year','spread']
#ci = ci.ix[['year', 'spread', 'age','age > 75', 'CIN', '3p14.2']]
haz = ci['exp(coef)']
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color=color_list[i],
edgecolors=['black'], zorder=10)
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color=color_list[i])
ax.axvline(1, ls='--', color='black')
ax.set_xscale('log')
ax.set_xbound(.5,3.5)
ax.set_ybound(-.5,len(ci.index) - .5)
ax.set_xticks([.67, 1, 1.5, 2,3])
ax.set_xticklabels([.67, 1, 1.5, 2, 3])
ax.set_yticks(range(len(ci.index)))
ax.set_yticklabels(ci.index)
prettify_ax(ax)
axs[2].set_xlabel('Hazard Ratio')
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/fig2c_alt.pdf', transparent=True)
0.000222998526521 0.00199188598036 0.794914823755