%pylab inline
Populating the interactive namespace from numpy and matplotlib
cd ../src
/cellar/users/agross/TCGA_Code/TCGA/src
from Processing.Imports import *
from Figures.Survival import survival_and_stats
from Processing.Screen import *
params = pd.read_table('../global_params.txt', header=None, squeeze=True,
index_col=0)
run_path = '{}/Firehose__{}/'.format(params.ix['OUT_PATH'], params.ix['RUN_DATE'])
run = get_run(run_path, 'Run_' + params.ix['VERSION'])
cancer = run.load_cancer(params.ix['CANCER'])
clinical = cancer.load_clinical()
mut = cancer.load_data('Mutation')
mut.uncompress()
cn = cancer.load_data('CN_broad')
cn.uncompress()
rppa = cancer.load_data('RPPA')
rna = pickle.load(open(cancer.path + '/mRNASeq/store/no_hpv.p', 'rb'))
mirna = pickle.load(open(cancer.path + '/miRNASeq/store/no_hpv.p', 'rb'))
#meth = cancer.load_data('Methylation')
clinical_processed = clinical.processed
#clinical_processed = clinical_processed.replace('yes', 1.).replace('no', 0.)
clinical_processed['year'] = clinical_processed.year == 'post_2000'
hpv_inferred = clinical_processed.hpv_inferred.astype(int)
surv = clinical.survival.survival_5y
age = clinical.clinical.age.astype(float)
old = pd.Series(1.*(age>=75), name='old')
n0 = clinical_processed.lymph_stage == 'n0'
n0.name = 'lymph_n0'
s4 = clinical_processed.stage == 'stge iv'
s4.name = 'Stage_IV'
oc = clinical_processed.tumor_subdivision == 'oral cavity'
oc.name = 'oral_cavity'
lx = clinical_processed.tumor_subdivision == 'larynx'
lx.name = 'larynx'
ox = clinical_processed.tumor_subdivision == 'oropharynx'
ox.name = 'oropharynx'
year = clinical_processed.year
white = clinical.clinical.race == 'white'
white.name = 'race_white'
gender = clinical.clinical.gender == 'male'
gender.name = 'gender_male'
inferred = clinical_processed[['drinker_inferred','invasion_inferred',
'smoker_inferred','spread_inferred']]
clinical_df = pd.concat([n0, s4, oc, lx, ox, year, white, gender], 1)
clinical_df = pd.concat([clinical_df, inferred], 1)
from Processing.Screen import *
from Stats.Survival import *
def get_interactions(df, cov_df, surv, test):
binary = df[df.T.describe().ix['unique'] == 2]
'''drop redundant features within a data-type'''
s = {b for i, (a, v1) in enumerate(binary.iterrows())
for j, (b, v2) in enumerate(binary.iterrows())
if (i < j)
and a[0] == b[0]
and np.log2(fisher_exact_test(v1, v2)['odds_ratio']) > 4}
binary = binary.ix[binary.index.diff(s)]
n_tests = (len(binary) * (len(binary) - 1)) / 2
s = pd.DataFrame({(a, b): fisher_exact_test(v1, v2)
for a, v1 in binary.iterrows()
for b, v2 in binary.iterrows()
if a[0] != b[0]
and a[1] > b[1]}).T
int_pairs = s[s.p < (.05 / n_tests)].sort('p')
int_pairs['interaction'] = int_pairs.odds_ratio.apply(lambda s: 'both' if s>1 else 'neither')
int_associations = {}
for p, vals in int_pairs.iterrows():
combo = combine(binary.ix[p[0]], binary.ix[p[1]])
vec = combo == vals['interaction']
int_associations[p] = test(vec, surv, cov_df)
int_associations = pd.DataFrame(int_associations).T
return s, int_associations
df = r1.df.ix[ti(r1.full.p.bh_all < .1)]
df = df.ix[r1.full.p.uncorrected.ix[df.index].order().index]
fisher_exact_test(r1.df.ix['mutation'].ix['TP53'], r1.df.ix['cna'].ix['del_3p14.2'])
odds_ratio 6.58e+00 p 1.28e-07 dtype: float64
s, int_associations = get_interactions(df, [age, old], surv, test)
len(r1.patients)
261
v = cn.features.ix['Deletion'].ix['3p14.2'].ix['Lesion']
test(v.ix[r1.patients], surv, [old, age])
LR 8.33e-06 feature_p 0.0013 fmla Surv(days, event) ~ feature + old + age + feat... hazzard 0.0819 dtype: object
int_associations.sort('LR')
LR | feature_p | fmla | hazzard | |
---|---|---|---|---|
((mutation, TP53), (rna, KEGG_WNT_SIGNALING_PATHWAY)) | 2.69e-06 | 2.09e-05 | Surv(days, event) ~ feature + old + feature:old\n | 3.76 |
((cna, del_3p14.2), (mutation, TP53)) | 4.22e-06 | 3.14e-05 | Surv(days, event) ~ feature + old + feature:old\n | 3.65 |
((mirna, hsa-mir-3170), (cna, del_3p14.2)) | 1.03e-05 | 1.45e-05 | Surv(days, event) ~ feature + old\n | 2.37 |
((cna, del_3p14.2), (rna, P4HA1)) | 1.25e-05 | 4.33e-05 | Surv(days, event) ~ feature + old\n | 2.54 |
((mutation, TP53), (rna, ST_GA13_PATHWAY)) | 1.89e-05 | 0.000111 | Surv(days, event) ~ feature + old\n | 2.72 |
((cna, del_3p14.2), (rna, KEGG_WNT_SIGNALING_PATHWAY)) | 2.11e-05 | 0.000368 | Surv(days, event) ~ feature + age + old + feat... | 5.34 |
((cna, del_3p14.2), (rna, ST_GA13_PATHWAY)) | 2.44e-05 | 0.000142 | Surv(days, event) ~ feature + old\n | 2.76 |
((cna, del_3p14.2), (rna, REACTOME_G_BETA_GAMMA_SIGNALLING_THROUGH_PI3KGAMMA)) | 8.07e-05 | 0.000207 | Surv(days, event) ~ feature + old\n | 2.33 |
((mutation, TP53), (rna, ST_ERK1_ERK2_MAPK_PATHWAY)) | 0.000106 | 0.000327 | Surv(days, event) ~ feature + old\n | 2.36 |
((mirna, hsa-mir-548k), (rna, CDC25C)) | 0.000146 | 0.000108 | Surv(days, event) ~ feature + old\n | 2.12 |
((cna, del_3p14.2), (rna, BIOCARTA_GSK3_PATHWAY)) | 0.000172 | 0.000478 | Surv(days, event) ~ feature + old\n | 2.32 |
((clinical, smoker_inferred), (cna, del_3p14.2)) | 0.000202 | 0.000326 | Surv(days, event) ~ feature + old\n | 2.1 |
((cna, del_3p14.2), (rna, QSOX2)) | 0.000276 | 0.000419 | Surv(days, event) ~ feature + old\n | 2.06 |
((cna, del_3p14.2), (rna, MYBL2)) | 0.000414 | 0.0014 | Surv(days, event) ~ feature + age + old + feat... | 2.58 |
((mutation, TP53), (rna, REACTOME_APOPTOSIS_INDUCED_DNA_FRAGMENTATION)) | 0.000515 | 0.00119 | Surv(days, event) ~ feature + old\n | 2.17 |
((cna, del_3p14.2), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.00052 | 0.993 | Surv(days, event) ~ feature + old + feature:old\n | 4e-07 |
((cna, del_3p14.2), (rna, TROAP)) | 0.00079 | 0.00142 | Surv(days, event) ~ feature + old\n | 2.04 |
((mirna, hsa-mir-3664), (cna, del_11q14.2)) | 0.00217 | 0.000809 | Surv(days, event) ~ feature + old\n | 2.31 |
((clinical, smoker_inferred), (rna, QSOX2)) | 0.00233 | 0.0024 | Surv(days, event) ~ feature + old\n | 1.8 |
((clinical, smoker_inferred), (cna, del_11q14.2)) | 0.00269 | 0.00185 | Surv(days, event) ~ feature + old\n | 1.88 |
((clinical, smoker_inferred), (mirna, hsa-mir-3170)) | 0.00286 | 0.0026 | Surv(days, event) ~ feature + old\n | 1.79 |
((clinical, smoker_inferred), (cna, amp_7q21.3)) | 0.00446 | 0.00366 | Surv(days, event) ~ feature + old\n | 1.79 |
((mirna, hsa-mir-548k), (cna, del_11q14.2)) | 0.00748 | 0.00476 | Surv(days, event) ~ feature + old\n | 1.89 |
((cna, del_3p14.2), (mutation, BIOCARTA_MAL_PATHWAY)) | 0.015 | 0.0312 | Surv(days, event) ~ feature + old\n | 0.428 |
((cna, del_3p14.2), (rna, REACTOME_MEMBRANE_TRAFFICKING)) | 0.0173 | 0.0323 | Surv(days, event) ~ feature + old\n | 0.449 |
((cna, del_3p14.2), (rna, REACTOME_MITOCHONDRIAL_TRNA_AMINOACYLATION)) | 0.0227 | 0.0803 | Surv(days, event) ~ feature + old + feature:old\n | 0.336 |
((cna, del_3p14.2), (rna, ST_GA12_PATHWAY)) | 0.0523 | 0.0764 | Surv(days, event) ~ feature + old\n | 0.519 |
((mirna, hsa-mir-3170), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.176 | 0.187 | Surv(days, event) ~ feature + old\n | 0.738 |
((cna, del_3p14.2), (rna, REACTOME_GRB2_EVENTS_IN_EGFR_SIGNALING)) | 0.246 | 0.27 | Surv(days, event) ~ feature + old\n | 0.675 |
((clinical, smoker_inferred), (rna, REACTOME_FGFR_LIGAND_BINDING_AND_ACTIVATION)) | 0.334 | 0.296 | Surv(days, event) ~ feature + old\n | 1.71 |
((clinical, lymph_n0), (rna, ST_GA13_PATHWAY)) | 0.392 | 0.416 | Surv(days, event) ~ feature + old\n | 0.709 |
((clinical, smoker_inferred), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.406 | 0.417 | Surv(days, event) ~ feature + old\n | 0.806 |
((cna, del_11q14.2), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.763 | 0.762 | Surv(days, event) ~ feature + old\n | 1.06 |
((cna, amp_7q21.3), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.95 | 0.95 | Surv(days, event) ~ feature + old\n | 0.987 |
34 rows × 4 columns
clinical.followup.ix['TCGA-BA-5149'].T
patient.followups.followup | patient.followups.followup_2 | |
---|---|---|
additionalpharmaceuticaltherapy | no | yes |
additionalradiationtherapy | no | no |
additionalsurgerylocoregionalprocedure | NaN | NaN |
additionalsurgerymetastaticprocedure | NaN | yes |
bcrfollowupbarcode | tcga-ba-5149-f9974 | tcga-ba-5149-f34520 |
bcrfollowupuuid | bdc3c30c-d049-4e40-b525-2ba1c72910dd | 1f5c433a-6d0d-4b17-b742-5d3777ddd278 |
daystoadditionalsurgerylocoregionalprocedure | NaN | NaN |
daystoadditionalsurgerymetastaticprocedure | NaN | 409 |
daystocompletionofcurativetx | NaN | NaN |
daystodeath | NaN | 806 |
daystolastfollowup | 248 | NaN |
daystonewtumoreventadditionalsurgeryprocedure | NaN | NaN |
daystonewtumoreventafterinitialtreatment | NaN | 389 |
diseaseaftercurativetx | NaN | NaN |
followupcasereportformsubmissionreason | NaN | scheduled follow-up submission |
followuptreatmentsuccess | NaN | progressive disease |
lostfollowup | NaN | no |
methodofcurativetx | NaN | NaN |
newneoplasmeventoccurrenceanatomicsite | NaN | distant metastasis |
newneoplasmeventtype | NaN | NaN |
newneoplasmoccurrenceanatomicsitetext | NaN | lung |
newtumoreventadditionalsurgeryprocedure | NaN | NaN |
newtumoreventafterinitialtreatment | no | yes |
patientdeathreason | NaN | NaN |
personneoplasmcancerstatus | tumor free | with tumor |
primarytherapyoutcomesuccess | complete remission/response | complete remission/response |
progressiondeterminedby | NaN | NaN |
radiationtherapy | yes | yes |
smokelesstobaccouseageatquit | NaN | NaN |
smokelesstobaccouseatdiag | NaN | NaN |
smokelesstobaccouseregularly | NaN | NaN |
targetedmoleculartherapy | yes | yes |
vitalstatus | alive | dead |
form_completion | 2011-04-04 00:00:00 | 2012-08-21 00:00:00 |
34 rows × 2 columns
s.sort('p')
odds_ratio | p | |
---|---|---|
((cna, del_3p14.2), (rna, KEGG_WNT_SIGNALING_PATHWAY)) | 19.24 | 2.53e-15 |
((cna, del_3p14.2), (rna, BIOCARTA_GSK3_PATHWAY)) | 9.10 | 2.37e-10 |
((clinical, smoker_inferred), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.17 | 8.98e-10 |
((cna, del_3p14.2), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.13 | 1.83e-09 |
((cna, del_3p14.2), (rna, REACTOME_G_BETA_GAMMA_SIGNALLING_THROUGH_PI3KGAMMA)) | 7.35 | 2.92e-09 |
((cna, del_3p14.2), (rna, REACTOME_MITOCHONDRIAL_TRNA_AMINOACYLATION)) | 0.13 | 5.82e-09 |
((clinical, smoker_inferred), (cna, amp_7q21.3)) | 5.16 | 1.30e-08 |
((cna, del_3p14.2), (rna, ST_GA13_PATHWAY)) | 8.23 | 2.63e-08 |
((cna, del_3p14.2), (rna, ST_GA12_PATHWAY)) | 0.12 | 3.17e-08 |
((cna, del_3p14.2), (mutation, TP53)) | 6.58 | 1.28e-07 |
((clinical, smoker_inferred), (cna, del_3p14.2)) | 5.87 | 1.46e-07 |
((clinical, smoker_inferred), (cna, del_11q14.2)) | 4.77 | 2.29e-07 |
((clinical, smoker_inferred), (rna, QSOX2)) | 3.93 | 3.89e-07 |
((cna, del_3p14.2), (rna, REACTOME_GRB2_EVENTS_IN_EGFR_SIGNALING)) | 0.18 | 3.99e-07 |
((clinical, smoker_inferred), (mirna, hsa-mir-3170)) | 3.66 | 1.03e-06 |
((cna, del_3p14.2), (rna, REACTOME_MEMBRANE_TRAFFICKING)) | 0.18 | 1.36e-06 |
((clinical, smoker_inferred), (rna, REACTOME_FGFR_LIGAND_BINDING_AND_ACTIVATION)) | 0.15 | 1.52e-06 |
((mutation, TP53), (rna, ST_ERK1_ERK2_MAPK_PATHWAY)) | 5.00 | 1.97e-06 |
((cna, del_3p14.2), (rna, TROAP)) | 4.99 | 2.18e-06 |
((cna, del_11q14.2), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.21 | 3.10e-06 |
((mirna, hsa-mir-3664), (cna, del_11q14.2)) | 4.54 | 4.22e-06 |
((mutation, TP53), (rna, KEGG_WNT_SIGNALING_PATHWAY)) | 5.15 | 6.31e-06 |
((mirna, hsa-mir-548k), (cna, del_11q14.2)) | 3.40 | 8.17e-06 |
((mirna, hsa-mir-548k), (rna, CDC25C)) | 4.49 | 9.34e-06 |
((cna, del_3p14.2), (rna, QSOX2)) | 4.23 | 1.33e-05 |
((mutation, TP53), (rna, ST_GA13_PATHWAY)) | 5.05 | 1.68e-05 |
((cna, del_3p14.2), (rna, P4HA1)) | 4.17 | 1.78e-05 |
((mirna, hsa-mir-3170), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.29 | 1.88e-05 |
((clinical, lymph_n0), (rna, ST_GA13_PATHWAY)) | 0.22 | 2.13e-05 |
((cna, del_3p14.2), (rna, MYBL2)) | 4.19 | 2.14e-05 |
((cna, amp_7q21.3), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.27 | 2.39e-05 |
((mirna, hsa-mir-3170), (cna, del_3p14.2)) | 4.31 | 2.59e-05 |
((mutation, TP53), (rna, REACTOME_APOPTOSIS_INDUCED_DNA_FRAGMENTATION)) | 4.53 | 2.94e-05 |
((cna, del_3p14.2), (mutation, BIOCARTA_MAL_PATHWAY)) | 0.23 | 3.69e-05 |
((mirna, hsa-mir-411), (rna, P4HA1)) | 4.78 | 5.31e-05 |
((cna, del_11q14.2), (rna, REACTOME_FGFR_LIGAND_BINDING_AND_ACTIVATION)) | 0.28 | 5.77e-05 |
((clinical, spread_inferred), (rna, ST_GA13_PATHWAY)) | 9.75 | 6.11e-05 |
((mirna, hsa-mir-548k), (rna, TROAP)) | 3.74 | 6.60e-05 |
((cna, del_3p14.2), (rna, ST_ERK1_ERK2_MAPK_PATHWAY)) | 3.90 | 7.03e-05 |
((mirna, hsa-mir-548k), (rna, REACTOME_FGFR_LIGAND_BINDING_AND_ACTIVATION)) | 0.29 | 7.42e-05 |
((mirna, hsa-mir-548k), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.29 | 7.49e-05 |
((cna, del_3p14.2), (rna, AURKAPS1)) | 4.05 | 1.20e-04 |
((mirna, hsa-mir-3170), (rna, ST_ERK1_ERK2_MAPK_PATHWAY)) | 3.24 | 1.31e-04 |
((cna, amp_7q21.3), (rna, BIOCARTA_GSK3_PATHWAY)) | 3.72 | 1.31e-04 |
((mirna, hsa-mir-548k), (rna, AURKAPS1)) | 4.07 | 1.36e-04 |
((cna, del_3p14.2), (rna, REACTOME_CONVERSION_FROM_APC_CDC20_TO_APC_CDH1_IN_LATE_ANAPHASE)) | 3.52 | 1.58e-04 |
((cna, del_11q14.2), (rna, REACTOME_GRB2_EVENTS_IN_EGFR_SIGNALING)) | 0.25 | 1.83e-04 |
((clinical, spread_inferred), (rna, ST_GA12_PATHWAY)) | 0.11 | 1.84e-04 |
((mirna, hsa-mir-100), (rna, P4HA1)) | 3.32 | 1.97e-04 |
((rna, KEGG_WNT_SIGNALING_PATHWAY), (mutation, BIOCARTA_MAL_PATHWAY)) | 0.26 | 2.00e-04 |
((cna, del_3p14.2), (rna, REACTOME_APOPTOSIS_INDUCED_DNA_FRAGMENTATION)) | 3.82 | 2.14e-04 |
((clinical, smoker_inferred), (rna, REACTOME_APOPTOSIS_INDUCED_DNA_FRAGMENTATION)) | 3.32 | 2.65e-04 |
((clinical, spread_inferred), (rna, HTN3)) | 0.21 | 3.37e-04 |
((mirna, hsa-mir-3664), (rna, REACTOME_FGFR_LIGAND_BINDING_AND_ACTIVATION)) | 0.28 | 3.40e-04 |
((clinical, smoker_inferred), (rna, TROAP)) | 2.90 | 3.48e-04 |
((cna, amp_7q21.3), (rna, REACTOME_FGFR_LIGAND_BINDING_AND_ACTIVATION)) | 0.33 | 3.48e-04 |
((mirna, hsa-mir-548k), (rna, CDK1)) | 3.00 | 3.86e-04 |
((clinical, spread_inferred), (rna, P4HA1)) | 3.52 | 4.43e-04 |
((cna, del_11q14.2), (mutation, TP53)) | 4.11 | 4.66e-04 |
((rna, TROAP), (mutation, TP53)) | 3.27 | 4.89e-04 |
... | ... |
622 rows × 2 columns
int_associations.sort('LR')
LR | feature_p | fmla | hazzard | |
---|---|---|---|---|
((mutation, TP53), (rna, KEGG_WNT_SIGNALING_PATHWAY)) | 2.69e-06 | 2.09e-05 | Surv(days, event) ~ feature + old + feature:old\n | 3.76 |
((cna, del_3p14.2), (mutation, TP53)) | 4.22e-06 | 3.14e-05 | Surv(days, event) ~ feature + old + feature:old\n | 3.65 |
((cna, del_3p14.2), (mirna, hsa-mir-3170)) | 1.03e-05 | 1.45e-05 | Surv(days, event) ~ feature + old\n | 2.37 |
((cna, del_3p14.2), (rna, P4HA1)) | 1.25e-05 | 4.33e-05 | Surv(days, event) ~ feature + old\n | 2.54 |
((mutation, TP53), (rna, ST_GA13_PATHWAY)) | 1.89e-05 | 0.000111 | Surv(days, event) ~ feature + old\n | 2.72 |
((cna, del_3p14.2), (rna, KEGG_WNT_SIGNALING_PATHWAY)) | 2.11e-05 | 0.000368 | Surv(days, event) ~ feature + age + old + feat... | 5.34 |
((cna, del_3p14.2), (rna, ST_GA13_PATHWAY)) | 2.44e-05 | 0.000142 | Surv(days, event) ~ feature + old\n | 2.76 |
((cna, del_3p14.2), (rna, REACTOME_G_BETA_GAMMA_SIGNALLING_THROUGH_PI3KGAMMA)) | 8.07e-05 | 0.000207 | Surv(days, event) ~ feature + old\n | 2.33 |
((mutation, TP53), (rna, ST_ERK1_ERK2_MAPK_PATHWAY)) | 0.000106 | 0.000327 | Surv(days, event) ~ feature + old\n | 2.36 |
((mirna, hsa-mir-548k), (rna, CDC25C)) | 0.000146 | 0.000108 | Surv(days, event) ~ feature + old\n | 2.12 |
((cna, del_3p14.2), (rna, BIOCARTA_GSK3_PATHWAY)) | 0.000172 | 0.000478 | Surv(days, event) ~ feature + old\n | 2.32 |
((clinical, smoker_inferred), (cna, del_3p14.2)) | 0.000202 | 0.000326 | Surv(days, event) ~ feature + old\n | 2.1 |
((cna, del_3p14.2), (rna, QSOX2)) | 0.000276 | 0.000419 | Surv(days, event) ~ feature + old\n | 2.06 |
((cna, del_3p14.2), (rna, MYBL2)) | 0.000414 | 0.0014 | Surv(days, event) ~ feature + age + old + feat... | 2.58 |
((mutation, TP53), (rna, REACTOME_APOPTOSIS_INDUCED_DNA_FRAGMENTATION)) | 0.000515 | 0.00119 | Surv(days, event) ~ feature + old\n | 2.17 |
((cna, del_3p14.2), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.00052 | 0.993 | Surv(days, event) ~ feature + old + feature:old\n | 4e-07 |
((cna, del_3p14.2), (rna, TROAP)) | 0.00079 | 0.00142 | Surv(days, event) ~ feature + old\n | 2.04 |
((cna, del_11q14.2), (mirna, hsa-mir-3664)) | 0.00217 | 0.000809 | Surv(days, event) ~ feature + old\n | 2.31 |
((clinical, smoker_inferred), (rna, QSOX2)) | 0.00233 | 0.0024 | Surv(days, event) ~ feature + old\n | 1.8 |
((clinical, smoker_inferred), (cna, del_11q14.2)) | 0.00269 | 0.00185 | Surv(days, event) ~ feature + old\n | 1.88 |
((clinical, smoker_inferred), (mirna, hsa-mir-3170)) | 0.00286 | 0.0026 | Surv(days, event) ~ feature + old\n | 1.79 |
((clinical, smoker_inferred), (cna, amp_7q21.3)) | 0.00446 | 0.00366 | Surv(days, event) ~ feature + old\n | 1.79 |
((cna, del_11q14.2), (mirna, hsa-mir-548k)) | 0.00748 | 0.00476 | Surv(days, event) ~ feature + old\n | 1.89 |
((cna, del_3p14.2), (mutation, BIOCARTA_MAL_PATHWAY)) | 0.015 | 0.0312 | Surv(days, event) ~ feature + old\n | 0.428 |
((cna, del_3p14.2), (rna, REACTOME_MEMBRANE_TRAFFICKING)) | 0.0173 | 0.0323 | Surv(days, event) ~ feature + old\n | 0.449 |
((cna, del_3p14.2), (rna, REACTOME_MITOCHONDRIAL_TRNA_AMINOACYLATION)) | 0.0227 | 0.0803 | Surv(days, event) ~ feature + old + feature:old\n | 0.336 |
((cna, del_3p14.2), (rna, ST_GA12_PATHWAY)) | 0.0523 | 0.0764 | Surv(days, event) ~ feature + old\n | 0.519 |
((mirna, hsa-mir-3170), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.176 | 0.187 | Surv(days, event) ~ feature + old\n | 0.738 |
((cna, del_3p14.2), (rna, REACTOME_GRB2_EVENTS_IN_EGFR_SIGNALING)) | 0.246 | 0.27 | Surv(days, event) ~ feature + old\n | 0.675 |
((clinical, smoker_inferred), (rna, REACTOME_FGFR_LIGAND_BINDING_AND_ACTIVATION)) | 0.334 | 0.296 | Surv(days, event) ~ feature + old\n | 1.71 |
((clinical, lymph_n0), (rna, ST_GA13_PATHWAY)) | 0.392 | 0.416 | Surv(days, event) ~ feature + old\n | 0.709 |
((clinical, smoker_inferred), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.406 | 0.417 | Surv(days, event) ~ feature + old\n | 0.806 |
((cna, del_11q14.2), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.763 | 0.762 | Surv(days, event) ~ feature + old\n | 1.06 |
((cna, amp_7q21.3), (rna, SIG_PIP3_SIGNALING_IN_B_LYMPHOCYTES)) | 0.95 | 0.95 | Surv(days, event) ~ feature + old\n | 0.987 |
34 rows × 4 columns
def test(s, surv, cov_df):
s = s.dropna()
try:
return get_cox_ph_ms(surv, s, cov_df, return_val='LR', interactions='just_feature')
except:
return pd.Series(index=['LR','feature_p', 'fmla', 'hazzard'])
def run_screen(screen, filters, covariates, save=True):
cov_df = pd.concat(covariates, axis=1)
keepers_o = screen.get_patient_set(filters)
cutoff = max(np.ceil(len(keepers_o) * .05), 10)
df = screen.get_data(keepers_o, cutoff)
univariate = cox_screen(df, surv)
vec = univariate.LR.p.sort_index()
univariate = pd.concat([univariate['hazard'], corrections(vec)],
keys=['hazard', 'p'], axis=1)
#hits = univariate[univariate['q_bh'] < .2].index
hits = univariate.index
full = df.ix[hits].apply(test, args=(surv, cov_df,), axis=1)
vec = full.LR.ix[univariate.index].sort_index()
full = pd.concat([full[['fmla']], corrections(vec)],
keys=['fmla','p'], axis=1)
hits = true_index(full.p.bh_all.order() < .1)
try:
pairs, interactions = get_interactions(df.ix[hits], cov_df,
surv, test)
ii = interactions[['LR','fmla']]
ii.columns = pd.MultiIndex.from_tuples([('p','uncorrected'), ('fmla','fmla')])
ii.index = pd.MultiIndex.from_tuples([('aggregate', i) for i in ii.index])
res = full.append(ii).sort([('p','uncorrected')])
except:
res = full.sort([('p','uncorrected')]).head()
pairs = []
return res, pairs, full, univariate, keepers_o, df
screen = Screen(mut, cn, rna, mirna, clinical_df)
res, pairs, full, univariate, keepers_o, df = run_screen(screen, [hpv_inferred==1], [old, age])
r = ScreenResult(res, pairs, full, univariate, keepers_o, df)
r
Screen Result: 823 events tested across 261 patients 60 events were significant above .1 FDR. 0 pairs of events were significantly overlapping. ('cna', 'del_3p14.2') was the top association with a q value of 0.00685793973844.
r.full.ix['mutation'].ix['TP53']
fmla fmla Surv(days, event) ~ feature + old\n p uncorrected 0.000302 bh_within 0.0181 bh_all 0.0171 bonf_all 0.248 bonf_within 0.0326 two_step 0.0904 Name: TP53, dtype: object
r1 = r
r.results.head()
fmla | p | |||||||
---|---|---|---|---|---|---|---|---|
fmla | uncorrected | bh_within | bh_all | bonf_all | bonf_within | two_step | ||
cna | del_3p14.2 | Surv(days, event) ~ feature + old + age + feat... | 8.33e-06 | 6.17e-04 | 0.01 | 0.01 | 6.17e-04 | 0.00 |
mirna | hsa-mir-3170 | Surv(days, event) ~ feature + old\n | 1.85e-05 | 3.99e-03 | 0.01 | 0.02 | 3.99e-03 | 0.02 |
clinical | spread_inferred | Surv(days, event) ~ feature + old\n | 3.11e-05 | 3.42e-04 | 0.01 | 0.03 | 3.42e-04 | 0.00 |
rna | ST_GA13_PATHWAY | Surv(days, event) ~ feature + old\n | 3.38e-05 | 8.39e-03 | 0.01 | 0.03 | 1.40e-02 | 0.04 |
P4HA1 | Surv(days, event) ~ feature + old\n | 4.96e-05 | 8.39e-03 | 0.01 | 0.04 | 2.05e-02 | 0.04 |
5 rows × 7 columns
Our primary screen identified 79 events that were each independently associated with patient survival (Extended Data Fig. 3).
df.groupby(level=0).size()
clinical 11 cna 74 mirna 216 mutation 108 rna 414 dtype: int64
r.full[r.full.p.bh_all < .1].groupby(level=0).size()
clinical 4 cna 8 mirna 12 mutation 4 rna 32 dtype: int64
import Data.ProcessClinical as PC
reload(PC)
<module 'Data.ProcessClinical' from 'Data/ProcessClinical.pyc'>
cancer.initialize_data(run, save=True)
clinical = cancer.load_clinical()
surv = clinical.survival.survival_5y
combo = combine(r1.df.ix['mutation'].ix['TP53'] > 0, r1.df.ix['cna'].ix['del_3p14.2'])
survival_and_stats(combo, surv, figsize=(6,4))
We show that in HPV negative (HPV) patients, the detrimental impact of TP53 mutation occurs only in combination with loss of chromosome 3p, leading to a marked decrease in median survival from >5 years for TP53 mutation only to 1.7 years for both events.
get_surv_fit_lr(clinical.survival.survival, combo)
Stats | Median Survival | 5y Survival | Log-Rank | |||||||
---|---|---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | chi2 | p | |
15.2 | 0.00169 | |||||||||
both | 181 | 96 | 1.81 | 1.5 | 2.7 | 0.331 | 0.249 | 0.441 | ||
del_3p14.2 | 27 | 10 | 5.48 | 2.96 | NaN | 0.629 | 0.428 | 0.926 | ||
TP53 | 25 | 7 | NaN | 4.49 | NaN | 0.564 | 0.334 | 0.951 | ||
neither | 24 | 8 | 7.44 | 4.71 | NaN | 0.646 | 0.397 | 1 |
5 rows × 10 columns
two_hit = combo == 'both'
survival_and_stats(two_hit, surv, figsize=(6,4))
The most significant association with survival was the aggregate of TP53 mutation and 3p deletion (Fig. 2c-d, HR 2.5 +/- .5).
get_cox_ph(surv, two_hit, print_desc=True);
coef exp(coef) se(coef) z p feature 0.951 2.59 0.253 3.75 0.00017 Likelihood ratio test=17 on 1 df, p=3.75e-05 n= 257, number of events= 109
(exp(0.931)), exp(0.931) - exp(0.931 - 0.253)
(2.5370449544725853, 0.56711103258165552)
These events co-occur in 185 of 258 HPV patients (Fig. 2c, Fisher’s Exact Test, P < 10^7)
fisher_exact_test(df.ix['mutation'].ix['TP53'], df.ix['cna'].ix['del_3p14.2'])
odds_ratio 6.58e+00 p 1.28e-07 dtype: float64
while each event by itself has a significant association with survival, these marginal effects are driven by the large number of patients who have both events together (Fig. 2d, Extended Data Fig. 4, Permutation Test P < .006, Bonferroni Corrected P < .2).
interaction_empirical_p(df.ix['mutation'].ix['TP53'], df.ix['cna'].ix['del_3p14.2'], surv, num_perm=101)
interaction both p 0.257 dtype: object
res, pairs, full, univariate, keepers_o, df = run_screen(screen, [hpv_inferred==1, two_hit==0], [old, age])
r = ScreenResult(res, pairs, full, univariate, keepers_o, df)
r
r2 = r
r.results.head()
muc5b = df.ix['mutation'].ix['MUC5B'].ix[keepers_o].dropna()
survival_and_stats(muc5b, surv, figsize=(6,4))
get_cox_ph(surv, muc5b, print_desc=True);
exp(1.36), exp(1.36) - exp(1.36 - 0.388)
res, pairs, full, univariate, keepers_o, df = run_screen(screen, [hpv_inferred==1, two_hit==0, muc5b==1], [old, age])
r = ScreenResult(res, pairs, full, univariate, keepers_o, df)
r
r.univariate.ix['clinical'].sort([('p','bonf_within')])
pre_2k = df.ix['clinical'].ix['year'].ix[keepers_o].dropna()
survival_and_stats(pre_2k, surv, figsize=(6,4))
res, pairs, full, univariate, keepers_o, df = run_screen(screen, [hpv_inferred==1, two_hit==0, muc5b==1],
[old, age, year])
r = ScreenResult(res, pairs, full, univariate, keepers_o, df)
r
r3 = r
res.head()
mir548 = df.ix['mirna'].ix['hsa-mir-548k'].ix[keepers_o].dropna()
survival_and_stats(mir548, surv, figsize=(6,4))
get_cox_ph(surv, mir548, print_desc=True);
exp(.803), exp(.803) - exp(.803 - 0.231)
survival_and_stats(mir548.ix[true_index(year)].dropna(), surv, figsize=(6,4))
st = (two_hit*1.).copy() + 1
st.ix[true_index(st==2).intersection(true_index(muc5b==1))] = 4
st.ix[true_index(st==2).intersection(true_index(mir548==1))] = 3
survival_and_stats(st.dropna(), surv, order=[4,3,2,1])
get_cox_ph(clinical.survival.event_free_survival_5y, '_' + st.astype(str), print_desc=True, interactions=False);
get_cox_ph(surv, '_' + st.astype(str), print_desc=True, interactions=False);
t = pd.concat([r1.univariate, r1.results],
keys=['univariate','full'], axis=1).sort([('full','p','uncorrected')])
tm = t.as_matrix()
t2 = array(map(lambda s: array(pd.core.format.format_array(s, None)), tm.T)).T
t3 = pd.DataFrame(t2, t.index, t.columns)
t3[('full','fmla','fmla')] = t[('full','fmla','fmla')].apply(lambda s: s.strip().replace('\\n',''))
t3.groupby(level=0).size()
interaction_empirical_p(r1.df.ix['mutation'].ix['TP53'], r1.df.ix['cna'].ix['del_3p14.2'], surv, num_perm=101)