import NotebookImport from Imports import * rna = cancer.load_data('mRNASeq') mirna = cancer.load_data('miRNASeq') rppa = cancer.load_data('RPPA') hpv = hpv.map({True:'HPV+', False:'HPV-'}) fig, ax = subplots(figsize=(5,3)) draw_survival_curve(hpv, surv, ax=ax) ax.legend(title=False, frameon=False, loc='lower left') prettify_ax(ax) fig.tight_layout() fig.savefig(FIGDIR + 'hpv_sup_a.pdf', transparent=True) survival_stat_plot(get_surv_fit(surv, hpv)) fig, ax = subplots(figsize=(3,3)) ct = pd.crosstab(hpv, clinical.processed.tumor_subdivision) ct.T.plot(kind='bar', ax=ax, rot=15) ax.legend(title=False, frameon=False) ax.set_ylabel('# of Patients') prettify_ax(ax) fig.tight_layout() fig.savefig(FIGDIR + 'hpv_sup_b.pdf', transparent=True) fig, ax = subplots(figsize=(5,3)) draw_survival_curve(1.*(age >= 85) + 1.*(age >=75), surv, ax=ax, colors=[colors[2], colors[4], colors[0]]) ax.legend(title=False, frameon=False, loc='lower right') prettify_ax(ax) fig.tight_layout() fig.savefig(FIGDIR + 'hpv_sup_c.pdf', transparent=True) survival_stat_plot(get_surv_fit(surv, 1.*(age >= 85) + 1.*(age >=75))) fig, ax = subplots(figsize=(3,3)) age.hist(color='grey') prettify_ax(ax) ax.set_ylabel('# of Patients') ax.set_xlabel('Age in Years') fig.tight_layout() fig.savefig(FIGDIR + 'hpv_sup_d.pdf', transparent=True) fisher_exact_test(hpv, clinical.clinical.gender) pd.crosstab(hpv, clinical.clinical.gender).T.plot(kind='bar') violin_plot_pandas(hpv, clinical.processed.pack_years) plt.ylim(0,150); fig, ax = subplots(figsize=(3,3)) violin_plot_pandas(hpv, age, ax=ax) prettify_ax(ax) hpv_all = pd.read_csv('../Extra_Data/hpv_summary_3_20_13_distribute.csv', index_col=0) hpv_s = hpv_all.Molecular_HPV.map({0:'HPV-', 1:'HPV+'}) hpv_s.name = 'HPV' pd.crosstab(hpv_all.Clinical_HPV_Interpretation, hpv_s).T hpv_new = pd.read_table('../Extra_Data/nationwidechildrens.org_auxiliary_hnsc.txt', skiprows=[1], index_col=0, na_values=['[Not Available]']) hpv_type2 = hpv_new['hpv_call_1'] pd.crosstab(hpv_new['hpv_status'], mut.features.ix['TP53']) hpv_type = hpv_all[hpv_all.Molecular_HPV==1].maxSequencingSource hpv_type = hpv_type.map(lambda s: s[-2:]) hpv_type = 'HPV' + hpv_type hpv_type = hpv_type.combine_first(hpv_type2).dropna() hpv_type.name = 'hpv_type' survival_and_stats(hpv_type, surv, figsize=(6,4)) s = screen_feature(hpv, kruskal_pandas, rna.global_vars.T) s.head() fig, ax = subplots(figsize=(3,3)) violin_plot_pandas(hpv, rna.global_vars.pc1, ax=ax) ax.set_ylabel('mRNA PC1') ax.set_xlabel('') prettify_ax(ax) s = screen_feature(hpv, kruskal_pandas, rna.features) print s.head() fig, ax = subplots(figsize=(3,3)) violin_plot_pandas(hpv, rna.features.ix['real'].ix['PCNA'], ax=ax) ax.set_ylabel('PCNA mRNA Exp.') prettify_ax(ax) fig, ax = subplots(figsize=(3,3)) violin_plot_pandas(hpv, rna.features.ix['real'].ix['CDKN2A'], ax=ax) ax.set_ylabel('CDKN2A mRNA Exp.') prettify_ax(ax) s = screen_feature(hpv, kruskal_pandas, rna.pathways) print s.head() violin_plot_pandas(hpv, rna.features.ix['pathways'].ix['REACTOME_S_PHASE']) fig, axs = subplots(1,3, figsize=(9,3)) violin_plot_pandas(hpv, rna.global_vars.pc1, ax=axs[0]) axs[0].set_ylabel('mRNA PC1') violin_plot_pandas(hpv, rna.features.ix['real'].ix['PCNA'], ax=axs[1]) axs[1].set_ylabel('PCNA mRNA Exp.') violin_plot_pandas(hpv, rna.features.ix['real'].ix['CDKN2A'], ax=axs[2]) axs[2].set_ylabel('CDKN2A mRNA Exp.') for ax in axs: ax.set_xlabel('') prettify_ax(ax) fig.tight_layout(w_pad=3) s = screen_feature(hpv, kruskal_pandas, mirna.global_vars.T) s.head() screen_feature(hpv, kruskal_pandas, mirna.features).head() fig, ax = subplots(figsize=(3,3)) violin_plot_pandas(hpv, mirna.global_vars.pc1, ax=ax) ax.set_ylabel('miRNA PC1') prettify_ax(ax) fig, ax = subplots(figsize=(3,3)) violin_plot_pandas(hpv, mirna.features.ix['real'].ix['hsa-mir-9-2'], ax=ax) ax.set_ylabel('mir-9-2 Exp.') prettify_ax(ax) fig, ax = subplots(figsize=(3,3)) violin_plot_pandas(hpv, mirna.features.ix['real'].ix['hsa-mir-15b'], ax=ax) ax.set_ylabel('mir-15b Exp.') prettify_ax(ax) fig, axs = subplots(1,3, figsize=(9,3)) violin_plot_pandas(hpv, mirna.global_vars.pc1, ax=axs[0]) axs[0].set_ylabel('miRNA PC1') violin_plot_pandas(hpv, mirna.features.ix['real'].ix['hsa-mir-9-2'], ax=axs[1]) axs[1].set_ylabel('mir-9-2 Exp.') violin_plot_pandas(hpv, mirna.features.ix['real'].ix['hsa-mir-15b'], ax=axs[2]) axs[2].set_ylabel('mir-15b Exp.') for ax in axs: ax.set_xlabel('') prettify_ax(ax) fig.tight_layout(w_pad=3) s = screen_feature(hpv, kruskal_pandas, rppa.df.xs('01',1,1)) s.head() fig, ax = subplots(figsize=(3,3)) violin_plot_pandas(hpv, rppa.df.ix['RB1'].ix[0][:,'01'], ax=ax) ax.set_ylabel('RB1 (Rb-M-V) Antibody') prettify_ax(ax) fig, ax = subplots(figsize=(3,3)) violin_plot_pandas(hpv, rppa.df.ix['CAV1'].ix[0][:,'01'], ax=ax) ax.set_ylabel('Caveolin-1-R-V Antibody') prettify_ax(ax) fig, ax = subplots(figsize=(3,3)) violin_plot_pandas(hpv, rppa.df.ix['NOTCH1'].ix[0][:,'01'], ax=ax) ax.set_ylabel('NOTCH1-R-V Antibody') prettify_ax(ax) fig, axs = subplots(1,3, figsize=(9,3)) violin_plot_pandas(hpv, rppa.df.ix['RB1'].ix[0][:,'01'], ax=axs[0]) axs[0].set_ylabel('RB1 (Rb-M-V) Antibody') violin_plot_pandas(hpv, rppa.df.ix['CAV1'].ix[0][:,'01'], ax=axs[1]) axs[1].set_ylabel('Caveolin-1-R-V Antibody') violin_plot_pandas(hpv, rppa.df.ix['NOTCH1'].ix[0][:,'01'], ax=axs[2]) axs[2].set_ylabel('NOTCH1-R-V Antibody') for ax in axs: ax.set_xlabel('') prettify_ax(ax) fig.tight_layout(w_pad=3) s = screen_feature(hpv, chi2_cont_test, mut.features) s.head(6) hpv.name = 'HPV+' venn_pandas(mut.features.ix['TP53'], hpv == 'HPV+'); s = screen_feature(hpv, chi2_cont_test, cn.features) s.head(10) hpv.name = '' p16_del = cn.df.ix[('Deletion', '9p21.3', 'Lesion')]<0 p16_del.name = 'deletion' p16_mut = mut.df.ix['CDKN2A']>0 p16_mut.name = 'mutation' cp = combine(p16_del, p16_mut).replace('both', 'mutation + deletion') cp.name = 'p16 Status' pd.crosstab(cp, hpv).plot(kind='bar', rot=15); fig, axs = subplots(1,2, figsize=(8,3)) hpv.name = 'HPV' rb1 = rppa.df.ix['RB1'].ix[0][:,'01'] f = combine(hpv=='HPV+', mut.df.ix['TP53']>0).map({'TP53':'TP53mut\nHPV-','neither': 'TP53wt\nHPV-', 'HPV': 'TP53wt\nHPV+'}) f = f.dropna() f.name = 'TP53 Status' violin_plot_pandas(f, rb1, ax=axs[0]); violin_plot_pandas(f, rna.df.ix['TP53'][:,'01'], ax=axs[1]); axs[1].set_ylabel('TP53 Expression') for ax in axs: ax.set_title('') prettify_ax(ax)