from solvebio import login, Dataset, Filter import numpy as np import plotly.plotly as py import plotly.tools as tls from plotly.graph_objs import Data, Layout, XAxis, YAxis, Figure, Box # Load local SolveBio credentials login() tcga = Dataset.retrieve('TCGA/PatientInformation') cancers = [x[0] for x in tcga.fields('cancer_abbreviation').facets(limit=0)['facets']] print "Cancer types: {0}".format(','.join(cancers)) cancer_and_age = [] print "Retrieving data for cancer type:" for cancer in cancers: print "{0}".format(cancer), f = ~Filter(age_at_initial_pathologic_diagnosis='[Not Available]') & \ Filter(cancer_abbreviation=cancer) results = tcga.query(fields='age_at_initial_pathologic_diagnosis', filters=f) ages = [int(r['age_at_initial_pathologic_diagnosis']) for r in results] cancer_and_age.append({'cancer_type': cancer, 'ages': ages}) cancer_and_age = sorted(cancer_and_age, key = lambda x: np.median(x['ages'])) data = Data([Box(y=cancer['ages'], name=cancer['cancer_type']) for cancer in cancer_and_age]) layout = Layout( title='Age of Diagnosis for TCGA Patients by Cancer Type', xaxis=XAxis(title='Cancer Type'), yaxis=YAxis(title='Age of Diagnosis') ) fig = Figure(data=data, layout=layout) plot_url = py.plot(fig, filename='age-of-diagnosis-for-tcga-patients', auto_open=False) tls.embed(plot_url)