import NotebookImport from Imports import * pearson_pandas(rna.df.ix['TP53'], rna.df.ix['FHIT']) ax.annotate? rcParams['font.size'] = 15 def linear_regression(a,b): a,b = match_series(a,b) res = stats.linregress(a,b) return pd.Series({'slope':res[0], 'intercept':res[1], 'r-value':res[2], 'p-value':res[3], 'stderr':res[4]}) def line_me(slope, intercept, start=0, end=100, ax=None): if ax is None: ax = plt.gca() line = lambda x: slope*x + intercept ax.plot([start, end], [line(start), line(end)], lw=3, alpha=.8, color=colors[0], ls='--') def remove_leading_zero(f, places=2): f = round(f, places) if abs(f - 1) < .01: return '1.0' elif abs(f) < .01: return '' elif abs(f) > 1: f = str(f) elif f > 0: f = str(f)[1:] else: f = '-' + str(f)[2:] return f def regression_string(reg): r_value = round(reg['r-value'],2) r_value = str(r_value)[1:] if r_value != 1 else '1.0' r_value = '$r={}$'.format(r_value) slope = remove_leading_zero(reg['slope']) intercept = remove_leading_zero(reg['intercept']) if (len(intercept) != 0) and (intercept[0] != '-'): intercept = '+' + intercept line = '$y={}x {}$'.format(slope, intercept) return '\n'.join([r_value, line]) def plot_regression(x, y, ax=None): x,y = match_series(x,y) fig, ax = init_ax(ax, figsize=(5,5)) series_scatter(x,y, ax=ax, ann=None) reg = linear_regression(x, y) ax.annotate(regression_string(reg), (.5,.05), xycoords='axes fraction', size=18) line_me(reg['slope'], reg['intercept'], start=x.min(), end=x.max(), ax=ax) line_me(1, 0, start=x.min(), end=x.max(), ax=ax) xy = x.append(y) ax.set_xbound(xy.min() - 3, xy.max() + 3) ax.set_ybound(xy.min() - 3, xy.max() + 3) prettify_ax(ax) reg = linear_regression(rna.df.ix['TP53'][:,'01'], rna.df.ix['FHIT'][:,'01']) reg FH.read_rnaSeq('/ce) fig, ax = subplots(figsize=(6,4)) series_scatter(rna.df.ix['TP53'][:,'01'], rna.df.ix['FHIT'][:,'01'], ax=ax, ann=None) ax.annotate('$r=.28$', (11.5, -3), size=20) line_me(reg['slope'], reg['intercept'], 5.5, 13, ax) prettify_ax(ax) ax.set_xlabel('Log TP53 mRNA Expression') ax.set_ylabel('Log FHIT mRNA Expression') fig.tight_layout() fig.savefig('/cellar/users/agross/figures/TP53_FHIT.png', dpi=300) from Figures.MemoPlot import pathway_plot p = 'REACTOME_SOS_MEDIATED_SIGNALLING' fig = plt.figure(figsize=(6,3.7)) ax = plt.subplot2grid((1,4), (0,0), colspan=3) df = pathway_plot(mut.df.ix[run.gene_sets[p], keepers_o]>0, bar=None, ax=ax) ax = plt.subplot2grid((1,4), (0,3), colspan=1) dd = mut.df.ix[run.gene_sets[p], keepers_o] dd = dd.ix[dd.sum(1).dropna().order().index[::-1]] vec = dd.apply(lambda s: s.idxmax() if s.max() > 0 else 'WT') mm = pd.concat([vec[vec == 'WT'], vec[vec!='WT']], keys=['WT','Mut.']).unstack() mm = mm.apply(pd.value_counts, 1).fillna(0) mm = mm.ix[:,vec.value_counts().index] mm.plot(kind='bar', stacked=True, color=['grey'] + ['green'] * 10, ax=ax, lw=.5, width=.75) ax.get_legend().set_visible(False) ax.set_ylabel('Num. Patients') prettify_ax(ax) fig.tight_layout() fig.savefig(FIGDIR + 'fig1bc.pdf', transparent=True) p = 'REACTOME_SOS_MEDIATED_SIGNALLING' df = mut.df.ix[run.gene_sets[p], keepers_o].dropna().T > 0 df[p] = df.sum(1) > 0 df = df.ix[df.sum(1).order().index[::-1]] df.to_csv(FIGDIR + 'fig1bc.csv') f = rna.pathways.ix['SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES'].ix[keepers_o] f.groupby(binarize(f)).min() p = 'SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES' fig, axs = subplots(1,2, figsize=(6,3.7)) rna.loadings[p].order().plot(kind='barh', ax=axs[0], color=colors[0], width=.75) axs[0].set_xticks([-.4, -.2, 0, .2, .4]) axs[0].set_xlabel('Gene Loading') axs[0].set_ylabel('PIP3 Signaling') h = rna.pathways.ix[p].ix[keepers_o].hist(ax=axs[1], bins=15, color=colors[1]) [b.set_facecolor('grey') for b in h.patches[:-7]] axs[1].set_ylabel('Num. Patients') axs[1].set_xlabel('Consensus Pathway Value') for ax in axs: prettify_ax(ax) fig.tight_layout() fig.savefig(FIGDIR + 'fig1de.pdf', transparent=True) rna.loadings['SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES'].order().to_csv(FIGDIR + 'fig1d.csv') f = rna.pathways.ix['SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES'].ix[keepers_o] df = pd.concat([f,binarize(f)], keys=['Consensus Pathway Value','Pathway Induction Indicator'], axis=1).sort('Consensus Pathway Value', ascending=False) df.to_csv(FIGDIR + 'fig1e.csv') p53_mut = mut.features.ix['TP53'].ix[keepers_o].dropna() del_3p = cn.features.ix['Deletion'].ix['3p14.2'].ix[0].ix[keepers_o].dropna() combo = combine(p53_mut==1, del_3p==-1) combo = combo.map({'Lesion':'b', 'neither':'a', 'TP53':'c', 'both':'d'}) two_hit = combo=='d' get_surv_fit_lr(clinical.survival.survival_5y, two_hit) p53_mut = mut.features.ix['TP53'].ix[keepers_o] del_3p = cn.features.ix['Deletion'].ix['3p14.2'].ix[0].ix[keepers_o] mir_3170 = mirna.features.ix['binary'].ix['hsa-mir-3170'].ix[keepers_o] #ras = binarize(rna.pathways.ix['REACTOME_SOS_MEDIATED_SIGNALLING'].ix[keepers_o]) pip3 = binarize(rna.pathways.ix['SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES'].ix[keepers_o]) smoker = clinical.binary_df.ix['recent_smoker'].ix[keepers_o].dropna() fig, axs = subplots(1,5, figsize=(6.5, 2.5), sharey=True) var_list = [p53_mut.map({1: 'mutated',0:'wild-type'}), del_3p.map({-1: 'deleted', 0: 'wild-type'}), mir_3170.map({0: 'absent', 1: 'present'}), pip3.map({0:'low', 1:'high'}), smoker.map({1: 'yes', 0 : 'no'}) ] labels = ['TP53','3p14.2','mir-3170','PIP3 Pathway\nExpression','Recent\nSmoking'] color_list = array(colors) n_rows = len(var_list) for i,var in enumerate(var_list): ax = axs[i] surv_fit = get_surv_fit(surv, var) surv_fit = surv_fit.sort([('5y Survival', 'Surv')]) t = surv_fit['5y Survival'] b = (t['Surv']).plot(kind='bar', ax=ax, color=colors[i], yerr=[t.Surv-t.Lower, t.Upper-t.Surv], ecolor='black', alpha=.7, width=.75) bar_labels = ['{}'.format(j) for j,v in surv_fit['Stats']['# Patients'].iteritems()] counts = [ax.annotate('{}'.format(int(v)), (j, .05), ha='center') for j,v in enumerate(surv_fit['Stats']['# Patients'])] ax.set_xticklabels(bar_labels, rotation=340) ax.set_xlabel(labels[i]) prettify_ax(ax) axs[0].set_ylabel('5 Year Survival') axs[0].set_yticks([0, .5, 1.]) fig.tight_layout(w_pad=.2) fig.savefig(FIGDIR + 'fig2a.pdf', transparent=True) labels = ['TP53','3p14.2','mir-3170','PIP3 Pathway Expression','Recent Smoking', 'Days to Death/Censoring', 'Death Indicator'] df = pd.concat(var_list + [surv[:,'days'] , surv[:,'event']], keys=labels, axis=1).dropna() df.to_csv(FIGDIR + 'fig2a.csv') get_surv_fit_lr(surv, two_hit) fig, ax = subplots(figsize=(5,2.75)) c = {'a':colors_th[3], 'b': colors_th[2], 'c': colors_th[1], 'd': colors_th[0]} draw_survival_curve(combo, surv, colors=c, ax=ax) ax.legend().set_visible(False) prettify_ax(ax) fig.tight_layout() plt.savefig(FIGDIR + 'fig2e.pdf', transparent=True) survival_stat_plot(get_surv_fit(surv, combo).ix[['d','c','b','a']], colors=colors_th) get_cox_ph(surv, combo=='d', print_desc=True); exp(1.15), exp(1.15) - exp(1.15 - .28) cc = combo.map({'a': 'Neither', 'b':'3p only', 'c':'TP53 only', 'd': 'TP53 and 3p'}) df = pd.concat([p53_mut, del_3p, cc, surv[:,'days'], surv[:,'event']], keys=['TP53', '3p', 'Combination', 'Days to Death/Censoring', 'Death Indicator'], axis=1).dropna() df.to_csv(FIGDIR + 'fig2d.csv') from matplotlib_venn import venn2 fig, axs = subplots(1,2, figsize=(4,2), sharey=True) p53_mut.map({1:'Mut.',0:'WT'}).value_counts().plot(kind='bar', ax=axs[0], rot=0, alpha=.7, width=.75) axs[0].set_xlabel('TP53') axs[0].set_ylabel('# of Patients') del_3p.map({-1:'+/-',0:'+/+'}).value_counts().plot(kind='bar', ax=axs[1], rot=0, color=colors[1], alpha=.7, width=.75) axs[1].set_xlabel('3p Arm') for ax in axs: prettify_ax(ax) fig.tight_layout() fig.savefig(FIGDIR + 'p53_3p_freq.pdf') fig, ax = subplots(figsize=(2.,2.)) a,b = p53_mut == 1, del_3p==-1 b.name = ' ' venn_pandas(a,b, colors=['white','white','white'], alpha=1) venn_pandas(a,b, colors=np.array(colors_th)[[2,1,0]], alpha=.8) r = plt.Rectangle((-.65,-.6), 1.28, 1.2, fill=True, alpha=.7, zorder=-10, facecolor=colors_th[3], edgecolor='black', lw=3) ax.add_artist(r); plt.savefig(FIGDIR + 'fig2d.pdf', transparent=True) mir548k = mirna.features.ix['binary'].ix['hsa-mir-548k'] muc5b = mut.features.ix['MUC5B'] fig, axs = subplots(1,2, figsize=(7,3)) st_1 = combine(mir548k, two_hit) st_1 = st_1.map({'hsa-mir-548k':1, 'neither':1, 'TP53':2, 'both': 3}) draw_survival_curve(st_1, surv, colors={1:colors_st[2], 2:colors_st[1], 3:colors_st[0]}, ax=axs[0]) st_2 = combine(muc5b, two_hit) st_2 = st_2.map({'MUC5B':1, 'neither':1, 'TP53':2, 'both': 3}) draw_survival_curve(st_2, surv, colors={1:colors_st[2], 2:colors_st[1], 3:colors_st[0]}, ax=axs[1]) for ax in axs: prettify_ax(ax) ax.get_legend().set_visible(False) fig.tight_layout() fig.savefig(FIGDIR + 'fig4ab.pdf', transparent=True) cc = combo.map({'a': 'Neither', 'b':'3p only', 'c':'TP53 only', 'd': 'TP53 and 3p'}) df = pd.concat([p53_mut, del_3p, cc, mir548k, muc5b, surv[:,'days'], surv[:,'event']], keys=['TP53', '3p', 'Combination', 'mir548k', 'muc5b', 'Days to Death/Censoring', 'Death Indicator'], axis=1).dropna() df.to_csv(FIGDIR + 'fig3ab.csv') survival_stat_plot(get_surv_fit(surv, st_1), colors=[colors_st[2], colors_st[1], colors_st[0]]) survival_stat_plot(get_surv_fit(surv, st_2), colors=[colors_st[2], colors_st[1], colors_st[0]]) smoker = clinical.clinical.tobaccosmokinghistory pts = ti(smoker.dropna() != 'lifelong non-smoker').intersection(ti(clinical.clinical.age < 75)) pts = pts.intersection(keepers_o) fig, axs = subplots(1,3, figsize=(10,3)) c = {'a':colors_th[3], 'b': colors_th[2], 'c': colors_th[1], 'd': colors_th[0]} draw_survival_curve(combo.ix[pts], surv, colors=c, ax=axs[0]) st_1 = combine(mir548k, two_hit).ix[pts] st_1 = st_1.map({'hsa-mir-548k':1, 'neither':1, 'TP53':2, 'both': 3}) draw_survival_curve(st_1, surv, colors={1:colors_st[2], 2:colors_st[1], 3:colors_st[0]}, ax=axs[1]) st_2 = combine(muc5b, two_hit).ix[pts] st_2 = st_2.map({'MUC5B':1, 'neither':1, 'TP53':2, 'both': 3}) draw_survival_curve(st_2, surv, colors={1:colors_st[2], 2:colors_st[1], 3:colors_st[0]}, ax=axs[2]) for ax in axs: prettify_ax(ax) ax.get_legend().set_visible(False) fig.tight_layout() fig.savefig(FIGDIR + 'supp_fig4ab.pdf', transparent=True) survival_stat_plot(get_surv_fit(surv, combo.ix[pts]), colors=colors_th[::-1]) survival_stat_plot(get_surv_fit(surv, st_1), colors=[colors_st[2], colors_st[1], colors_st[0]]) survival_stat_plot(get_surv_fit(surv, st_2), colors=[colors_st[2], colors_st[1], colors_st[0]]) patients = true_index(two_hit) coords = pd.read_csv('../Extra_Data/gene_coords.csv', index_col=0) coords['start'] = coords['start'].astype(float) cn_by_gene = FH.get_gistic_gene_matrix(run.data_path, cancer.name) cn_by_gene = cn_by_gene.reset_index(level=[0,1]) cn_by_gene = cn_by_gene.join(coords).set_index(['chrom','start', 'Cytoband','Locus ID'], append=True) cn_by_gene = cn_by_gene.ix[:,patients].dropna(axis=1) ch_3 = cn_by_gene.xs('11', level='chrom') p_arm = pd.Series(ch_3.index.get_level_values('Cytoband').map(lambda s: '11q13' in s), ch_3.index) p_arm = ch_3[p_arm].sortlevel('start') p_arm = p_arm[1:] test = lambda s: get_cox_ph_ms(surv, s, [age, old, clinical.processed.year], interactions=None) Calculate survival associations for genes in this region. df = rna.df.xs('01', 1, 1).ix[p_arm.index.get_level_values(0)].dropna() df = df.ix[:,patients].dropna(1) df = df[(df == -3).sum(1) != len(df.T)] exp = df.apply(test, axis=1) exp = exp.sort_index() exp.index = p_arm.sort_index().ix[exp.index].index exp2 = -exp.LR.map(log10) mir = pd.read_table('ftp://mirbase.smith.man.ac.uk/pub/mirbase/12.0/genomes/hsa.gff', skiprows=7, header=None) mir.index = mir.apply(lambda s: s[8].split('"')[-2], 1) mir = mir[[0,3]] mir.columns = ['Chromosome', 'start'] r = p_arm.index.get_level_values('start')[[0,-1]] p_range = lambda s: (s > r[0]) * (s < r[1]) loc = mir[mir.Chromosome == '11'].sort('start') loc = loc[loc['start'].apply(p_range)] df = mirna.df.ix[loc.index].dropna().xs('01',1,1) df = df.ix[:,patients].dropna(1) df = df[(df == -3).sum(1) != len(df.T)] mir_surv = df.apply(test, axis=1) loc = loc.set_index(['Chromosome', 'start'], drop=False, append=True) mir_surv.index = loc.index mir2 = -mir_surv.LR.map(log10) fig = plt.figure(figsize=(7,4)) ax1 = plt.subplot2grid((7,1), (0,0), rowspan=3, colspan=1) ax2 = plt.subplot2grid((7,1), (3,0), rowspan=3, colspan=1) ax3 = plt.subplot2grid((7,1), (6,0), rowspan=1, colspan=1) bands = p_arm.reset_index().groupby('Cytoband') bands = bands.first().start.order() starts = p_arm.index.get_level_values('start') for i,(idx,v) in enumerate(bands.iteritems()): try: r = plt.Rectangle((v,0), bands.iloc[i+1]-v, 1, lw=2, alpha=.5, fc=colors[i%2]) except: r = plt.Rectangle((v,0), starts[-1] - v, 1, lw=2, alpha=.5, fc=colors[i%2]) ax3.add_artist(r) ax3.set_xticks(bands) b = [s for i,s in enumerate(bands.index)] ax3.set_xticklabels(b, rotation=270+45, ha='left') ax1.scatter(starts, (p_arm > 1).sum(1), s=40, c=colors[2], edgecolor='grey', alpha=1) ax2.scatter(exp2.index.get_level_values('start'), exp2, s=40, c=colors[4], edgecolor='grey', alpha=1) ax2.scatter(mir2.index.get_level_values('start'), mir2, s=40, c=colors[0], edgecolor='grey', alpha=1) #ax1.set_ybound(215,255) ax1.set_ylabel('# of Patients\nwith Amp.\n\n', ha='center', va='center') ax2.set_ybound(-.5,0) ax2.set_ylabel('mRNA/miRNA\nCox Surv.\n(Log10 p)\n\n\n', ha='center', va='center') ax3.set_yticks([]) fig.tight_layout() for ax in [ax1, ax2, ax3]: ax.set_xbound(starts[0], starts[-1]) ax.set_xticks(bands) for n, ss in mir2.order().tail(1).iteritems(): ax2.annotate(n[0], (n[2], ss), xytext=(15, -5), textcoords='offset points', arrowprops=dict(arrowstyle="->")) for n, ss in exp2.ix[['CCND1', 'FADD']].iteritems(): ax2.annotate(n[0], (n[1], ss), xytext=(15, -5), textcoords='offset points', arrowprops=dict(arrowstyle="->")) ax1.set_xticklabels('') ax2.set_xticklabels('') ax2.set_yticks([0,1,2,3,4]) fig.tight_layout() fig.savefig(FIGDIR + 'fig4c.pdf', transparent=True) (p_arm > 1).sum(1).to_csv(FIGDIR + 'fig3c_count.csv') exp2.to_csv(FIGDIR + 'fig3c_p.csv') mir2.to_csv(FIGDIR + 'fig3c_p_mir.csv') patients = true_index(two_hit) mir_level = mirna.df.ix['hsa-mir-548k'].ix[patients] mir_level.name = 'exp' cn_level = cn_by_gene.xs('MIR548K', level=0).iloc[0] cn_level.name = 'cn' mir_level.groupby(level=1).size() fig, axs = subplots(2,1, figsize=(4,5)) ax=axs[0] violin_plot_series(mir_level.clip_lower(-3), ax=ax) ax.axhline(-1, ls='--', lw=2, color='grey') ax.set_xlabel('Tissue Type') ax.set_ylabel('mir-548k Exp.') #ax.set_xticklabels(['Tumor','Normal']) prettify_ax(ax) ax = axs[1] violin_plot_pandas(cn_level, mir_level[:,'01'], order=[-1,0,1,2], ax=ax) ax.axhline(-1, ls='--', lw=2, color='grey') ax.set_xlabel('mir-548k Copy Number') ax.set_ylabel('mir-548k Exp.') prettify_ax(ax) fig.tight_layout() fig.savefig(FIGDIR + 'supp_fig4ab.pdf', tranparent=True) fig, ax = subplots(figsize=(4.5, 3.5)) cb = combine(cn_level==2, mir_level[:,'01'] > -1) draw_survival_curve(cb, surv, ax=ax) ax.legend(loc='lower left', frameon=False) prettify_ax(ax) fig.savefig(FIGDIR + 'supp_fig4c.pdf', tranparent=True) survival_stat_plot(get_surv_fit(surv, cb))