These results are used in the generation of Extended Data Figure 1.
%pylab inline
Populating the interactive namespace from numpy and matplotlib
cd ../src
/cellar/users/agross/TCGA_Code/TCGA/src
from Processing.Imports import *
params = pd.read_table('../global_params.txt', header=None, squeeze=True,
index_col=0)
run_path = '{}/Firehose__{}/'.format(params.ix['OUT_PATH'], params.ix['RUN_DATE'])
run = get_run(run_path, 'Run_' + params.ix['VERSION'])
cancer = run.load_cancer(params.ix['CANCER'])
clinical = cancer.load_clinical()
mut = cancer.load_data('Mutation')
mut.uncompress()
cn = cancer.load_data('CN_broad')
cn.uncompress()
rna = cancer.load_data('mRNASeq')
mirna = cancer.load_data('miRNASeq')
rppa = cancer.load_data('RPPA')
#meth = cancer.load_data('Methylation')
clinical_processed = clinical.processed
hpv_inferred = clinical_processed.hpv_inferred.dropna()
surv = clinical.survival.survival_5y
surv_all = clinical.survival.survival
age = clinical.clinical.age.astype(float)
old = pd.Series(1.*(age>=75), name='old')
hpv_inferred = hpv_inferred.map({True:'HPV+', False:'HPV-'})
survival_and_stats(hpv_inferred, surv, figsize=(6,4))
rcParams['font.size'] = 12
fig, ax = subplots(figsize=(5,3))
draw_survival_curve(hpv_inferred, surv, ax=ax)
ax.legend(title=False, frameon=False, loc='lower left')
prettify_ax(ax)
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/hpv_sup_a.pdf', transparent=True)
fig, ax = subplots(figsize=(3,3))
pd.crosstab(hpv_inferred, clinical.processed.tumor_subdivision).T.plot(kind='bar', ax=ax, rot=15);
ax.legend(title=False, frameon=False)
ax.set_ylabel('# of Patients')
prettify_ax(ax)
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/hpv_sup_b.pdf', transparent=True)
survival_and_stats(1.*(age >= 85) + 1.*(age >=75), surv)
fig, ax = subplots(figsize=(5,3))
draw_survival_curve(1.*(age >= 85) + 1.*(age >=75), surv, ax=ax, colors=[colors[2], colors[4], colors[0]])
ax.legend(title=False, frameon=False, loc='lower right')
prettify_ax(ax)
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/hpv_sup_c.pdf', transparent=True)
fig, ax = subplots(figsize=(3,3))
age.hist(color='grey')
prettify_ax(ax)
ax.set_ylabel('# of Patients')
ax.set_xlabel('Age in Years')
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/hpv_sup_d.pdf', transparent=True)
fisher_exact_test(hpv_inferred, clinical.clinical.gender)
odds_ratio 0.27 p 0.00 dtype: float64
pd.crosstab(hpv_inferred, clinical.clinical.gender).T.plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0x5f9eb50>
violin_plot_pandas(hpv_inferred, clinical.processed.pack_years)
plt.ylim(0,150)
(0, 150)
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv_inferred, age, ax=ax)
prettify_ax(ax)
hpv_all = pd.read_csv('../Extra_Data/hpv_summary_3_20_13_distribute.csv', index_col=0)
hpv = hpv_all.Molecular_HPV.map({0:'HPV-', 1:'HPV+'})
hpv.name = 'HPV'
pd.crosstab(hpv_all.Clinical_HPV_Interpretation, hpv).T
Clinical_HPV_Interpretation | Missing | negative | positive |
---|---|---|---|
HPV | |||
HPV+ | 4 | 16 | 15 |
HPV- | 6 | 238 | 0 |
t = pd.crosstab(hpv_all.Clinical_HPV_Interpretation, hpv).T
pd.crosstab(hpv_all.Clinical_HPV_Interpretation, hpv).T.to_html
<bound method DataFrame.to_html of Clinical_HPV_Interpretation Missing negative positive HPV HPV+ 4 16 15 HPV- 6 238 0>
hpv_type = hpv_all[hpv_all.Molecular_HPV==1].maxSequencingSource
hpv_type = hpv_type.map(lambda s: s[-2:])
hpv_type.name = 'hpv_type'
survival_and_stats(hpv_type, surv, figsize=(6,4))
s = screen_feature(hpv_inferred, kruskal_pandas, rna.global_vars.T)
s.head()
H | p | q | |
---|---|---|---|
pc1 | 45.35 | 1.65e-11 | 7.08e-11 |
background | 44.95 | 2.02e-11 | 7.08e-11 |
pathway_pc1 | 36.45 | 1.57e-09 | 3.66e-09 |
filtered_pc2 | 25.62 | 4.15e-07 | 7.26e-07 |
filtered_pc1 | 22.26 | 2.38e-06 | 3.33e-06 |
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv_inferred, rna.global_vars.pc1, ax=ax)
ax.set_ylabel('mRNA PC1')
ax.set_xlabel('')
prettify_ax(ax)
s = screen_feature(hpv_inferred, kruskal_pandas, rna.features)
print s.head()
H p q (binary, UGT2B28) 90.85 1.55e-21 8.58e-18 (real, FKBP9) 79.16 5.73e-19 1.59e-15 (real, CDKN2A) 76.34 2.38e-18 4.41e-15 (real, PCNA) 73.61 9.52e-18 1.32e-14 (pathways, REACTOME_SYNTHESIS_OF_DNA) 70.67 4.21e-17 4.67e-14
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv_inferred, rna.features.ix['real'].ix['PCNA'], ax=ax)
ax.set_ylabel('PCNA mRNA Exp.')
prettify_ax(ax)
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv_inferred, rna.features.ix['real'].ix['CDKN2A'], ax=ax)
ax.set_ylabel('CDKN2A mRNA Exp.')
prettify_ax(ax)
s = screen_feature(hpv_inferred, kruskal_pandas, rna.pathways)
print s.head()
H p q REACTOME_SYNTHESIS_OF_DNA 70.67 4.21e-17 1.15e-15 REACTOME_G1_S_TRANSITION 69.73 6.80e-17 1.15e-15 REACTOME_S_PHASE 69.70 6.91e-17 1.15e-15 REACTOME_TRANSCRIPTION_COUPLED_NER 68.45 1.30e-16 1.63e-15 BIOCARTA_SRCRPTP_PATHWAY 55.37 1.00e-13 1.00e-12
violin_plot_pandas(hpv_inferred, rna.features.ix['pathways'].ix['REACTOME_SYNTHESIS_OF_DNA'])
fig, axs = subplots(1,3, figsize=(9,3))
violin_plot_pandas(hpv_inferred, rna.global_vars.pc1, ax=axs[0])
axs[0].set_ylabel('mRNA PC1')
violin_plot_pandas(hpv_inferred, rna.features.ix['real'].ix['PCNA'], ax=axs[1])
axs[1].set_ylabel('PCNA mRNA Exp.')
violin_plot_pandas(hpv_inferred, rna.features.ix['real'].ix['CDKN2A'], ax=axs[2])
axs[2].set_ylabel('CDKN2A mRNA Exp.')
for ax in axs:
ax.set_xlabel('')
prettify_ax(ax)
fig.tight_layout(w_pad=3)
s = screen_feature(hpv_inferred, kruskal_pandas, mirna.global_vars.T)
s.head()
H | p | q | |
---|---|---|---|
pc2 | 38.46 | 5.60e-10 | 2.76e-09 |
filtered_pc1 | 37.13 | 1.10e-09 | 2.76e-09 |
pc1 | 14.62 | 1.32e-04 | 2.20e-04 |
background | 4.64 | 3.13e-02 | 3.91e-02 |
filtered_pc2 | 0.01 | 9.32e-01 | 9.32e-01 |
screen_feature(hpv_inferred, kruskal_pandas, mirna.features).head()
H | p | q | |
---|---|---|---|
(real, hsa-mir-9-2) | 72.38 | 1.77e-17 | 3.13e-15 |
(real, hsa-mir-9-1) | 72.05 | 2.10e-17 | 3.13e-15 |
(real, hsa-mir-15b) | 50.75 | 1.05e-12 | 8.03e-11 |
(real, hsa-mir-106a) | 50.70 | 1.07e-12 | 8.03e-11 |
(real, hsa-mir-25) | 45.92 | 1.23e-11 | 7.35e-10 |
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv_inferred, mirna.global_vars.pc1, ax=ax)
ax.set_ylabel('miRNA PC1')
prettify_ax(ax)
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv_inferred, mirna.features.ix['real'].ix['hsa-mir-9-2'], ax=ax)
ax.set_ylabel('mir-9-2 Exp.')
prettify_ax(ax)
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv_inferred, mirna.features.ix['real'].ix['hsa-mir-15b'], ax=ax)
ax.set_ylabel('mir-15b Exp.')
prettify_ax(ax)
fig, axs = subplots(1,3, figsize=(9,3))
violin_plot_pandas(hpv_inferred, mirna.global_vars.pc1, ax=axs[0])
axs[0].set_ylabel('miRNA PC1')
violin_plot_pandas(hpv_inferred, mirna.features.ix['real'].ix['hsa-mir-9-2'], ax=axs[1])
axs[1].set_ylabel('mir-9-2 Exp.')
violin_plot_pandas(hpv_inferred, mirna.features.ix['real'].ix['hsa-mir-15b'], ax=axs[2])
axs[2].set_ylabel('mir-15b Exp.')
for ax in axs:
ax.set_xlabel('')
prettify_ax(ax)
fig.tight_layout(w_pad=3)
s = screen_feature(hpv_inferred, kruskal_pandas, rppa.df.xs('01',1,1))
s.head()
H | p | q | |
---|---|---|---|
(RB1, Rb_pS807_S811-R-V) | 20.29 | 6.67e-06 | 6.30e-04 |
(RB1, Rb-M-V) | 20.13 | 7.24e-06 | 6.30e-04 |
(CAV1, Caveolin-1-R-V) | 18.14 | 2.05e-05 | 1.19e-03 |
(IRS1, IRS1-R-V) | 16.19 | 5.73e-05 | 2.49e-03 |
(NOTCH1, Notch1-R-V) | 15.67 | 7.55e-05 | 2.63e-03 |
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv_inferred, rppa.df.ix['RB1'].ix[1].ix[:,'01'], ax=ax)
ax.set_ylabel('RB1 (Rb-M-V) Antibody')
prettify_ax(ax)
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv_inferred, rppa.df.ix['CAV1'].ix[0].ix[:,'01'], ax=ax)
ax.set_ylabel('Caveolin-1-R-V Antibody')
prettify_ax(ax)
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv_inferred, rppa.df.ix['NOTCH1'].ix[0].ix[:,'01'], ax=ax)
ax.set_ylabel('NOTCH1-R-V Antibody')
prettify_ax(ax)
fig, axs = subplots(1,3, figsize=(9,3))
violin_plot_pandas(hpv_inferred, rppa.df.ix['RB1'].ix[1].ix[:,'01'], ax=axs[0])
axs[0].set_ylabel('RB1 (Rb-M-V) Antibody')
violin_plot_pandas(hpv_inferred, rppa.df.ix['CAV1'].ix[0].ix[:,'01'], ax=axs[1])
axs[1].set_ylabel('Caveolin-1-R-V Antibody')
violin_plot_pandas(hpv_inferred, rppa.df.ix['NOTCH1'].ix[0].ix[:,'01'], ax=axs[2])
axs[2].set_ylabel('NOTCH1-R-V Antibody')
for ax in axs:
ax.set_xlabel('')
prettify_ax(ax)
fig.tight_layout(w_pad=3)
s = screen_feature(hpv_inferred, chi2_cont_test, mut.features)
s.head(6)
chi2 | p | dof | q | |
---|---|---|---|---|
TP53 | 100.90 | 9.67e-24 | 1 | 9.56e-21 |
KEGG_MAPK_SIGNALING_PATHWAY | 42.50 | 7.06e-11 | 1 | 2.53e-08 |
KEGG_CELL_CYCLE | 42.34 | 7.67e-11 | 1 | 2.53e-08 |
ZNF750 | 13.99 | 1.84e-04 | 1 | 4.54e-02 |
CDKN2A | 12.42 | 4.25e-04 | 1 | 8.42e-02 |
FAT1 | 11.56 | 6.73e-04 | 1 | 1.11e-01 |
hpv_inferred.name = 'HPV+'
venn_pandas(mut.features.ix['TP53'], hpv_inferred=='HPV+')
<matplotlib_venn._venn2.Venn2 instance at 0xc833368>
s = screen_feature(hpv_inferred, chi2_cont_test, cn.features)
s.head(10)
chi2 | p | dof | q | |
---|---|---|---|---|
(Deletion, 9p21.3, Lesion) | 50.58 | 1.14e-12 | 1 | 8.01e-11 |
(Deletion, 18q23, Lesion) | 47.16 | 6.54e-12 | 1 | 2.29e-10 |
(Deletion, 18q21.2, Lesion) | 45.53 | 1.50e-11 | 1 | 3.50e-10 |
(Deletion, 3p25.3, Lesion) | 40.61 | 1.85e-10 | 1 | 3.24e-09 |
(Deletion, 8p23.2, Lesion) | 36.38 | 1.62e-09 | 1 | 2.27e-08 |
(Amplification, 20q11.22, Lesion) | 35.15 | 2.34e-08 | 2 | 2.72e-07 |
(Deletion, 3p12.2, Lesion) | 30.11 | 4.09e-08 | 1 | 4.09e-07 |
(Amplification, 7p11.2, Lesion) | 33.54 | 5.21e-08 | 2 | 4.56e-07 |
(Deletion, 9p23, Lesion) | 27.12 | 1.91e-07 | 1 | 1.49e-06 |
(Amplification, 8q24.21, Lesion) | 28.06 | 8.05e-07 | 2 | 5.64e-06 |
hpv_inferred.name = ''
p16_del = cn.df.ix[('Deletion', '9p21.3', 'Lesion')]<0
p16_del.name = 'deletion'
p16_mut = mut.df.ix['CDKN2A']>0
p16_mut.name = 'mutation'
cp = combine(p16_del, p16_mut).replace('both', 'mutation + deletion')
cp.name = 'p16 Status'
pd.crosstab(cp, hpv_inferred).plot(kind='bar', rot=15);
fig, axs = subplots(1,2, figsize=(8,3))
hpv_inferred.name = 'HPV'
rb1 = rppa.df.ix['RB1'].ix[1].ix[:,'01']
f = combine(hpv_inferred=='HPV+', mut.df.ix['TP53']>0).map({'TP53':'TP53mut\nHPV-','neither': 'TP53wt\nHPV-',
'HPV': 'TP53wt\nHPV+'})
f = f.dropna()
f.name = 'TP53 Status'
violin_plot_pandas(f, rb1, ax=axs[0]);
violin_plot_pandas(f, rna.df.ix['TP53'].ix[:,'01'], ax=axs[1]);
axs[1].set_ylabel('TP53 Expression')
for ax in axs:
ax.set_title('')