Here we include a general analysis of HPV within the first round of TCGA patients (which eventually became the discovery cohort after filters). Most of this analysis was not used in the final paper, but it can be seen that there are very clear global differences in HPV+ and HPV- patients which would confound analysis of these patients as a combined cohort.
import NotebookImport
from Imports import *
importing IPython notebook from Imports.ipynb Populating the interactive namespace from numpy and matplotlib changing to source dirctory populating namespace with data
For the majority of the analysis we use the RNA and mRNA datasets filtered down to HPV- patients. Here we use the full datasets, so we need to reload them on top of the others (which are loaded by default in the Imports notebook).
rna = cancer.load_data('mRNASeq')
mirna = cancer.load_data('miRNASeq')
rppa = cancer.load_data('RPPA')
hpv = hpv.map({True:'HPV+', False:'HPV-'})
fig, ax = subplots(figsize=(5,3))
draw_survival_curve(hpv, surv, ax=ax)
ax.legend(title=False, frameon=False, loc='lower left')
prettify_ax(ax)
fig.tight_layout()
fig.savefig(FIGDIR + 'hpv_sup_a.pdf', transparent=True)
survival_stat_plot(get_surv_fit(surv, hpv))
fig, ax = subplots(figsize=(3,3))
ct = pd.crosstab(hpv, clinical.processed.tumor_subdivision)
ct.T.plot(kind='bar', ax=ax, rot=15)
ax.legend(title=False, frameon=False)
ax.set_ylabel('# of Patients')
prettify_ax(ax)
fig.tight_layout()
fig.savefig(FIGDIR + 'hpv_sup_b.pdf', transparent=True)
fig, ax = subplots(figsize=(5,3))
draw_survival_curve(1.*(age >= 85) + 1.*(age >=75), surv, ax=ax, colors=[colors[2], colors[4], colors[0]])
ax.legend(title=False, frameon=False, loc='lower right')
prettify_ax(ax)
fig.tight_layout()
fig.savefig(FIGDIR + 'hpv_sup_c.pdf', transparent=True)
survival_stat_plot(get_surv_fit(surv, 1.*(age >= 85) + 1.*(age >=75)))
fig, ax = subplots(figsize=(3,3))
age.hist(color='grey')
prettify_ax(ax)
ax.set_ylabel('# of Patients')
ax.set_xlabel('Age in Years')
fig.tight_layout()
fig.savefig(FIGDIR + 'hpv_sup_d.pdf', transparent=True)
fisher_exact_test(hpv, clinical.clinical.gender)
odds_ratio 2.28e-01 p 7.92e-04 dtype: float64
pd.crosstab(hpv, clinical.clinical.gender).T.plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0xc8fabd0>
violin_plot_pandas(hpv, clinical.processed.pack_years)
plt.ylim(0,150);
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv, age, ax=ax)
prettify_ax(ax)
hpv_all = pd.read_csv('../Extra_Data/hpv_summary_3_20_13_distribute.csv', index_col=0)
hpv_s = hpv_all.Molecular_HPV.map({0:'HPV-', 1:'HPV+'})
hpv_s.name = 'HPV'
pd.crosstab(hpv_all.Clinical_HPV_Interpretation, hpv_s).T
Clinical_HPV_Interpretation | Missing | negative | positive |
---|---|---|---|
HPV | |||
HPV+ | 4 | 16 | 15 |
HPV- | 6 | 238 | 0 |
hpv_new = pd.read_table('../Extra_Data/nationwidechildrens.org_auxiliary_hnsc.txt',
skiprows=[1], index_col=0, na_values=['[Not Available]'])
hpv_type2 = hpv_new['hpv_call_1']
pd.crosstab(hpv_new['hpv_status'], mut.features.ix['TP53'])
TP53 | 0.0 | 1.0 |
---|---|---|
hpv_status | ||
Indeterminate | 0 | 2 |
Negative | 51 | 189 |
Positive | 42 | 22 |
hpv_type = hpv_all[hpv_all.Molecular_HPV==1].maxSequencingSource
hpv_type = hpv_type.map(lambda s: s[-2:])
hpv_type = 'HPV' + hpv_type
hpv_type = hpv_type.combine_first(hpv_type2).dropna()
hpv_type.name = 'hpv_type'
survival_and_stats(hpv_type, surv, figsize=(6,4))
s = screen_feature(hpv, kruskal_pandas, rna.global_vars.T)
s.head()
H | p | q | |
---|---|---|---|
pathway_pc1 | 58.62 | 1.91e-14 | 1.34e-13 |
filtered_pc2 | 47.37 | 5.88e-12 | 1.89e-11 |
pc2 | 45.96 | 1.20e-11 | 1.89e-11 |
background | 45.74 | 1.35e-11 | 1.89e-11 |
filtered_pc1 | 45.74 | 1.35e-11 | 1.89e-11 |
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv, rna.global_vars.pc1, ax=ax)
ax.set_ylabel('mRNA PC1')
ax.set_xlabel('')
prettify_ax(ax)
s = screen_feature(hpv, kruskal_pandas, rna.features)
print s.head()
H p q binary GBX1 133.13 8.48e-31 3.90e-27 C1orf14 130.12 3.85e-30 8.86e-27 ZPBP2 120.62 4.63e-28 7.10e-25 UGT2B28 108.39 2.20e-25 2.54e-22 C11orf85 99.98 1.54e-23 1.42e-20
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv, rna.features.ix['real'].ix['PCNA'], ax=ax)
ax.set_ylabel('PCNA mRNA Exp.')
prettify_ax(ax)
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv, rna.features.ix['real'].ix['CDKN2A'], ax=ax)
ax.set_ylabel('CDKN2A mRNA Exp.')
prettify_ax(ax)
s = screen_feature(hpv, kruskal_pandas, rna.pathways)
print s.head()
H p q REACTOME_S_PHASE 93.89 3.34e-22 9.35e-21 REACTOME_BASE_EXCISION_REPAIR 77.50 1.33e-18 1.86e-17 BIOCARTA_PLATELETAPP_PATHWAY 74.38 6.44e-18 6.01e-17 REACTOME_CELL_CYCLE_CHECKPOINTS 66.62 3.29e-16 1.94e-15 BIOCARTA_SRCRPTP_PATHWAY 66.52 3.46e-16 1.94e-15
violin_plot_pandas(hpv, rna.features.ix['pathways'].ix['REACTOME_S_PHASE'])
fig, axs = subplots(1,3, figsize=(9,3))
violin_plot_pandas(hpv, rna.global_vars.pc1, ax=axs[0])
axs[0].set_ylabel('mRNA PC1')
violin_plot_pandas(hpv, rna.features.ix['real'].ix['PCNA'], ax=axs[1])
axs[1].set_ylabel('PCNA mRNA Exp.')
violin_plot_pandas(hpv, rna.features.ix['real'].ix['CDKN2A'], ax=axs[2])
axs[2].set_ylabel('CDKN2A mRNA Exp.')
for ax in axs:
ax.set_xlabel('')
prettify_ax(ax)
fig.tight_layout(w_pad=3)
s = screen_feature(hpv, kruskal_pandas, mirna.global_vars.T)
s.head()
H | p | q | |
---|---|---|---|
pc2 | 49.09 | 2.44e-12 | 1.22e-11 |
filtered_pc1 | 39.06 | 4.11e-10 | 1.03e-09 |
filtered_pc2 | 21.17 | 4.20e-06 | 7.00e-06 |
pc1 | 15.40 | 8.70e-05 | 1.09e-04 |
background | 3.63 | 5.68e-02 | 5.68e-02 |
screen_feature(hpv, kruskal_pandas, mirna.features).head()
H | p | q | ||
---|---|---|---|---|
real | hsa-mir-9-2 | 86.88 | 1.16e-20 | 1.58e-18 |
hsa-mir-9-1 | 86.82 | 1.19e-20 | 1.58e-18 | |
hsa-mir-15b | 73.16 | 1.19e-17 | 1.06e-15 | |
hsa-mir-25 | 57.93 | 2.72e-14 | 1.61e-12 | |
hsa-mir-299 | 57.71 | 3.04e-14 | 1.61e-12 |
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv, mirna.global_vars.pc1, ax=ax)
ax.set_ylabel('miRNA PC1')
prettify_ax(ax)
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv, mirna.features.ix['real'].ix['hsa-mir-9-2'], ax=ax)
ax.set_ylabel('mir-9-2 Exp.')
prettify_ax(ax)
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv, mirna.features.ix['real'].ix['hsa-mir-15b'], ax=ax)
ax.set_ylabel('mir-15b Exp.')
prettify_ax(ax)
fig, axs = subplots(1,3, figsize=(9,3))
violin_plot_pandas(hpv, mirna.global_vars.pc1, ax=axs[0])
axs[0].set_ylabel('miRNA PC1')
violin_plot_pandas(hpv, mirna.features.ix['real'].ix['hsa-mir-9-2'], ax=axs[1])
axs[1].set_ylabel('mir-9-2 Exp.')
violin_plot_pandas(hpv, mirna.features.ix['real'].ix['hsa-mir-15b'], ax=axs[2])
axs[2].set_ylabel('mir-15b Exp.')
for ax in axs:
ax.set_xlabel('')
prettify_ax(ax)
fig.tight_layout(w_pad=3)
s = screen_feature(hpv, kruskal_pandas, rppa.df.xs('01',1,1))
s.head()
H | p | q | ||
---|---|---|---|---|
CDKN2A | p16_INK4a-R-C | 28.25 | 1.07e-07 | 1.70e-05 |
RB1 | Rb_pS807_S811-R-V | 19.08 | 1.25e-05 | 1.00e-03 |
CAV1 | Caveolin-1-R-V | 17.39 | 3.04e-05 | 1.57e-03 |
SYK | Syk-M-V | 16.90 | 3.93e-05 | 1.57e-03 |
CLDN7 | Claudin-7-R-V | 16.06 | 6.13e-05 | 1.96e-03 |
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv, rppa.df.ix['RB1'].ix[0][:,'01'], ax=ax)
ax.set_ylabel('RB1 (Rb-M-V) Antibody')
prettify_ax(ax)
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv, rppa.df.ix['CAV1'].ix[0][:,'01'], ax=ax)
ax.set_ylabel('Caveolin-1-R-V Antibody')
prettify_ax(ax)
fig, ax = subplots(figsize=(3,3))
violin_plot_pandas(hpv, rppa.df.ix['NOTCH1'].ix[0][:,'01'], ax=ax)
ax.set_ylabel('NOTCH1-R-V Antibody')
prettify_ax(ax)
fig, axs = subplots(1,3, figsize=(9,3))
violin_plot_pandas(hpv, rppa.df.ix['RB1'].ix[0][:,'01'], ax=axs[0])
axs[0].set_ylabel('RB1 (Rb-M-V) Antibody')
violin_plot_pandas(hpv, rppa.df.ix['CAV1'].ix[0][:,'01'], ax=axs[1])
axs[1].set_ylabel('Caveolin-1-R-V Antibody')
violin_plot_pandas(hpv, rppa.df.ix['NOTCH1'].ix[0][:,'01'], ax=axs[2])
axs[2].set_ylabel('NOTCH1-R-V Antibody')
for ax in axs:
ax.set_xlabel('')
prettify_ax(ax)
fig.tight_layout(w_pad=3)
s = screen_feature(hpv, chi2_cont_test, mut.features)
s.head(6)
chi2 | p | dof | q | |
---|---|---|---|---|
TP53 | 96.24 | 1.02e-22 | 1 | 9.91e-20 |
KEGG_CELL_CYCLE | 42.25 | 8.03e-11 | 1 | 3.92e-08 |
KEGG_MAPK_SIGNALING_PATHWAY | 33.43 | 7.37e-09 | 1 | 2.40e-06 |
CDKN2A | 12.06 | 5.16e-04 | 1 | 1.26e-01 |
FAT1 | 10.41 | 1.25e-03 | 1 | 2.45e-01 |
ZNF750 | 8.97 | 2.74e-03 | 1 | 3.83e-01 |
hpv.name = 'HPV+'
venn_pandas(mut.features.ix['TP53'], hpv == 'HPV+');
s = screen_feature(hpv, chi2_cont_test, cn.features)
s.head(10)
chi2 | p | dof | q | |||
---|---|---|---|---|---|---|
Deletion | 9p21.3 | Lesion | 60.34 | 7.99e-15 | 1 | 5.91e-13 |
3p25.3 | Lesion | 54.32 | 1.70e-13 | 1 | 4.28e-12 | |
18q23 | Lesion | 54.28 | 1.73e-13 | 1 | 4.28e-12 | |
18q21.2 | Lesion | 52.40 | 4.52e-13 | 1 | 8.37e-12 | |
8p23.2 | Lesion | 42.70 | 6.37e-11 | 1 | 9.43e-10 | |
3p12.2 | Lesion | 38.99 | 4.25e-10 | 1 | 5.25e-09 | |
9p23 | Lesion | 37.73 | 8.15e-10 | 1 | 8.61e-09 | |
Amplification | 7p11.2 | Lesion | 38.59 | 4.18e-09 | 2 | 3.86e-08 |
Deletion | 11q23.1 | Lesion | 30.88 | 2.74e-08 | 1 | 2.25e-07 |
3p14.2 | Lesion | 26.66 | 2.42e-07 | 1 | 1.79e-06 |
hpv.name = ''
p16_del = cn.df.ix[('Deletion', '9p21.3', 'Lesion')]<0
p16_del.name = 'deletion'
p16_mut = mut.df.ix['CDKN2A']>0
p16_mut.name = 'mutation'
cp = combine(p16_del, p16_mut).replace('both', 'mutation + deletion')
cp.name = 'p16 Status'
pd.crosstab(cp, hpv).plot(kind='bar', rot=15);
fig, axs = subplots(1,2, figsize=(8,3))
hpv.name = 'HPV'
rb1 = rppa.df.ix['RB1'].ix[0][:,'01']
f = combine(hpv=='HPV+', mut.df.ix['TP53']>0).map({'TP53':'TP53mut\nHPV-','neither': 'TP53wt\nHPV-',
'HPV': 'TP53wt\nHPV+'})
f = f.dropna()
f.name = 'TP53 Status'
violin_plot_pandas(f, rb1, ax=axs[0]);
violin_plot_pandas(f, rna.df.ix['TP53'][:,'01'], ax=axs[1]);
axs[1].set_ylabel('TP53 Expression')
for ax in axs:
ax.set_title('')
prettify_ax(ax)