Show the presence counts per tissue we observed in the GNF Expression Atlas data.
import io
import gzip
import pandas
import requests
import numpy
import seaborn
import matplotlib.pyplot as plt
%matplotlib inline
# Read BTO id and names
url = 'https://gist.githubusercontent.com/dhimmel/1f252b674c0c75443cc1/raw/a97c3425792f2288b0369de70e1b46018832455b/bto-terms-in-gnf.tsv'
bto_df = pandas.read_table(url)
# Read expression
url = 'http://het.io/disease-genes/downloads/files/expression.txt.gz'
with gzip.open(io.BytesIO(requests.get(url).content)) as read_file:
gnf_df = pandas.read_table(read_file)
gnf_df = numpy.log10(gnf_df)
# Compute expressed genes per tissue
gnf_summary_df = (gnf_df >= 1.4).sum().reset_index()
gnf_summary_df.columns = ['bto_id', 'count']
gnf_summary_df = bto_df.merge(gnf_summary_df, how='right')
gnf_summary_df = gnf_summary_df.sort('count', ascending=False)
# Plot distribution of expressed genes per tissue
#IPython.core.pylabtools.figsize(5, 2)
seaborn.set_context(rc={'figure.figsize': (5, 2.5)})
plt.hist(gnf_summary_df['count'], 15);
# print dataframe
gnf_summary_df
bto_id | bto_name | cell_line | count | |
---|---|---|---|---|
72 | BTO:0003335 | EBV-LCL cell | 1 | 5138 |
19 | BTO:0000725 | hematopoietic stem cell | 0 | 4996 |
28 | BTO:0000914 | natural killer cell | 0 | 4883 |
75 | BTO:0004730 | myeloid progenitor cell | 0 | 4823 |
36 | BTO:0001067 | pineal gland | 0 | 4799 |
65 | BTO:0002042 | dendritic cell | 0 | 4782 |
43 | BTO:0001175 | retina | 0 | 4631 |
70 | BTO:0002807 | prefrontal cortex | 0 | 4566 |
58 | BTO:0001379 | thyroid gland | 0 | 4534 |
24 | BTO:0000776 | B-lymphocyte | 0 | 4415 |
27 | BTO:0000876 | monocyte | 0 | 4408 |
69 | BTO:0002417 | helper T-lymphocyte | 0 | 4347 |
11 | BTO:0000289 | cytotoxic T-lymphocyte | 0 | 4298 |
44 | BTO:0001176 | endothelial cell | 0 | 4244 |
5 | BTO:0000089 | blood | 0 | 4242 |
41 | BTO:0001129 | prostate gland | 0 | 4241 |
35 | BTO:0001042 | amygdala | 0 | 4224 |
15 | BTO:0000614 | hypothalamus | 0 | 4204 |
42 | BTO:0001154 | RAJI cell | 1 | 4184 |
64 | BTO:0001561 | DAUDI cell | 1 | 4110 |
23 | BTO:0000763 | lung | 0 | 4048 |
50 | BTO:0001260 | smooth muscle | 0 | 4031 |
38 | BTO:0001078 | placenta | 0 | 3983 |
63 | BTO:0001539 | cardiac myocyte | 0 | 3975 |
37 | BTO:0001073 | hypophysis | 0 | 3952 |
71 | BTO:0002924 | NHBE cell | 1 | 3948 |
10 | BTO:0000269 | colon | 0 | 3938 |
33 | BTO:0000991 | pancreatic islet | 0 | 3932 |
16 | BTO:0000651 | small intestine | 0 | 3911 |
52 | BTO:0001279 | spinal cord | 0 | 3900 |
... | ... | ... | ... | ... |
1 | BTO:0000041 | medulla oblongata | 0 | 3463 |
9 | BTO:0000232 | cerebellum | 0 | 3462 |
8 | BTO:0000211 | caudate nucleus | 0 | 3460 |
60 | BTO:0001387 | tonsil | 0 | 3451 |
57 | BTO:0001374 | thymus | 0 | 3442 |
2 | BTO:0000045 | adrenal cortex | 0 | 3426 |
40 | BTO:0001103 | skeletal muscle | 0 | 3413 |
74 | BTO:0004238 | interstitial cell | 0 | 3396 |
47 | BTO:0001235 | seminiferous tubule | 0 | 3391 |
53 | BTO:0001325 | superior cervical ganglion | 0 | 3371 |
62 | BTO:0001424 | uterus | 0 | 3360 |
67 | BTO:0002252 | subthalamic nucleus | 0 | 3324 |
54 | BTO:0001355 | temporal lobe | 0 | 3301 |
59 | BTO:0001385 | tongue | 0 | 3290 |
30 | BTO:0000961 | olfactory bulb | 0 | 3284 |
6 | BTO:0000141 | bone marrow | 0 | 3283 |
39 | BTO:0001101 | pons | 0 | 3267 |
61 | BTO:0001388 | trachea | 0 | 3239 |
3 | BTO:0000047 | adrenal gland | 0 | 3196 |
4 | BTO:0000084 | vermiform appendix | 0 | 3146 |
32 | BTO:0000988 | pancreas | 0 | 3098 |
18 | BTO:0000671 | kidney | 0 | 3063 |
46 | BTO:0001231 | trigeminal ganglion | 0 | 2976 |
45 | BTO:0001203 | salivary gland | 0 | 2972 |
66 | BTO:0002246 | globus pallidus | 0 | 2842 |
48 | BTO:0001253 | skin | 0 | 2652 |
51 | BTO:0001264 | spinal ganglion | 0 | 2644 |
31 | BTO:0000975 | ovary | 0 | 2533 |
68 | BTO:0002320 | cardiac muscle fiber | 0 | 2469 |
49 | BTO:0001256 | parasympathetic ganglion | 0 | 2450 |
77 rows × 4 columns