import NotebookImport from Imports import * f = '../Data/MAFs/PR_TCGA_HNSC_PAIR_Capture_All_Pairs_QCPASS_v4.aggregated.capture.tcga.uuid.automated.somatic.maf.txt' mut_new = pd.read_table(f, skiprows=4, low_memory=False) keep = (mut_new.Variant_Classification.isin(['Silent', 'Intron', "3'UTR", "5'UTR"])==False) mut_new = mut_new[keep] mut_new['barcode'] = mut_new.Tumor_Sample_Barcode.map(lambda s: s[:12]) mut_new = mut_new.groupby(['barcode','Hugo_Symbol']).size().unstack().fillna(0).T mut_old = mut.df.ix[mut_new.index, mut_new.columns].dropna([0,1], how='all') del_3p = cn.features.ix['Deletion'].ix['3p14.2'] del_3p.name = '3p_deletion' f = '../Extra_Data/FH_HNSC__4_16_all_data_thresholded_by_genes.txt' gistic = pd.read_table(f, index_col=[2, 1, 0], low_memory=False) gistic = FH.fix_barcode_columns(gistic, tissue_code='01') del_3p = gistic.ix['3p14.2'].median(0) del_3p.name = '3p_deletion' mut_all = mut.df.combine_first(mut_new) clinical_cohort = mut.df.columns molecular_cohort = mut_new.columns.diff(mut.features.columns) hpv_neg_cohort = mut_all.columns.intersection(true_index(hpv == 0)) molecular_cohort_n = molecular_cohort.intersection(hpv_neg_cohort) cohorts = {'Discovery': clinical_cohort, 'Validation': molecular_cohort, 'All': mut_all.columns} hpv.name = 'HPV' ct = pd.concat({c: combine(hpv, mut_all.ix['TP53']>0).ix[s].value_counts() for c,s in cohorts.iteritems()}, axis=1) ct.ix[['neither','HPV','TP53','both'],['Discovery','Validation','All']] stats = pd.concat({c: fisher_exact_test(hpv.ix[s], mut_all.ix['TP53'].ix[s]>0) for c,s in cohorts.iteritems()}, axis=1) stats[['Discovery','Validation','All']] cohorts = {'Discovery': keepers_o, 'Validation': molecular_cohort_n, 'HPV-': hpv_neg_cohort} ct = pd.concat({c: combine(mut_all.ix['TP53'].ix[s].dropna()>0, del_3p<0).value_counts() for c,s in cohorts.iteritems()}, axis=1) ct.ix[['neither','3p_deletion','TP53','both'],['Discovery','Validation','HPV-']] ct.sum() stats = pd.concat({c: fisher_exact_test(mut_all.ix['TP53'].ix[s]>0, del_3p<0) for c,s in cohorts.iteritems()}, axis=1) stats[['Discovery','Validation','HPV-']] cn.features.index = cn.features.index.droplevel(2) r1 = screen_feature(mut_all.ix['TP53'].ix[molecular_cohort_n] > 0, fisher_exact_test, cn.features.ix['Deletion'] < 0) r2 = screen_feature(mut_all.ix['TP53'].ix[molecular_cohort_n] > 0, fisher_exact_test, cn.features.ix['Amplification'] > 0) r3 = screen_feature(mut_all.ix['TP53'].ix[keepers_o] > 0, fisher_exact_test, cn.features.ix['Deletion'] < 0) r4 = screen_feature(mut_all.ix['TP53'].ix[keepers_o] > 0, fisher_exact_test, cn.features.ix['Amplification'] > 0) v1 = pd.concat([r3, r1], keys=['Discovery','Validation'], axis=1).sort([('Discovery','p')]) v2 = pd.concat([r4, r2], keys=['Discovery','Validation'], axis=1).sort([('Discovery','p')]) v3 = pd.concat([v1.head(6), v2.head(6)], keys=['Deletion','Amplification']) v3.columns = v3.columns.swaplevel(0,1) v3 = v3.sort_index(axis=1) del v3['q'] v3[('q','bonf')] = pd.concat([v3.p.Discovery['Deletion'] * len(r3), v3.p.Discovery['Amplification'] * len(r4)], keys=['Deletion','Amplification']) v3 combo_all = combine(mut_all.ix['TP53']>0, del_3p<0) two_hit = combo_all == 'both' two_hit.name = 'TP53-3p' ct = pd.concat({c: combine(two_hit, mut_all.ix['CASP8']>0).ix[s].value_counts() for c,s in cohorts.iteritems()}, axis=1) ct.ix[['neither','CASP8','TP53-3p','both'],['Discovery','Validation','HPV-']] stats = pd.concat({c: fisher_exact_test(two_hit.ix[s], mut_all.ix['CASP8']>0) for c,s in cohorts.iteritems()}, axis=1) stats[['Discovery','Validation','HPV-']] combo_all = combine(mut_all.ix['TP53']>0, del_3p<0) two_hit = combo_all == 'both' two_hit.name = 'TP53-3p' gs = run.gene_sets['REACTOME_SOS_MEDIATED_SIGNALLING'] sos1_pathway = mut_all.ix[gs].sum()>0 sos1_pathway.name = 'SOS1 Pathway' ct = pd.concat({c: combine(two_hit, sos1_pathway>0).ix[s].value_counts() for c,s in cohorts.iteritems()}, axis=1) ct.ix[['neither','SOS1 Pathway','TP53-3p','both'],['Discovery','Validation','HPV-']] stats = pd.concat({c: fisher_exact_test(two_hit.ix[s], sos1_pathway) for c,s in cohorts.iteritems()}, axis=1) stats[['Discovery','Validation','HPV-']]