import os
import csv
import re
import networkx
import pandas
import do_tools
! svn checkout svn://svn.code.sf.net/p/diseaseontology/code/trunk/ download
U download/HumanDO.obo Checked out revision 2816.
path = os.path.join('download', 'HumanDO.obo')
do = do_tools.load_do(path)
dox = do_tools.do_to_networkx(do)
# Create a table of descriptions
pattern = re.compile(r'^"(.*?)"')
rows = list()
for term in dox:
match = pattern.search(term.definition)
description = match.group(1) if match else ''
rows.append((term.id, term.name, description))
description_df = pandas.DataFrame(rows, columns = ['disease_id', 'name', 'description']).sort_values('disease_id')
description_df.to_csv('data/description.tsv', sep='\t', index=False)
description_df.head(2)
disease_id | name | description | |
---|---|---|---|
1509 | DOID:0001816 | angiosarcoma | A malignant vascular tumor that results_in rap... |
3043 | DOID:0002116 | pterygium |
xref_rename = {
'ICD10CM': 'ICD10',
'ICD9CM': 'ICD9',
'NCI2009_04D': 'NCI',
'SNOMEDCT_2010_1_31': 'SNOMEDCT',
'SNOMEDCT_2013_01_31': 'SNOMEDCT',
'UMLS_CUI': 'UMLS',
}
def write_xref_row(writer, doid_code, doid_name, xrefs, rename_dict):
rows = list()
for xref in xrefs:
resource, resource_id = xref.split(':', 1)
if resource in rename_dict:
resource = rename_dict[resource]
rows.append([doid_code, doid_name, resource, resource_id])
rows.sort()
writer.writerows(rows)
file_unprop = open(os.path.join('data', 'xrefs.tsv'), 'w')
file_prop = open(os.path.join('data', 'xrefs-prop.tsv'), 'w')
writer_unprop = csv.writer(file_unprop, delimiter='\t')
writer_prop = csv.writer(file_prop, delimiter='\t')
for writer in writer_unprop, writer_prop:
writer.writerow(['doid_code', 'doid_name', 'resource', 'resource_id'])
for term in networkx.topological_sort_recursive(dox, reverse=True):
xrefs = set(term.xrefs)
xrefs_prop = set(xrefs)
for ancestor in networkx.ancestors(dox, term):
xrefs_prop |= set(ancestor.xrefs)
write_xref_row(writer_unprop, term.id, term.name, xrefs, xref_rename)
write_xref_row(writer_prop, term.id, term.name, xrefs_prop, xref_rename)
for write_file in file_unprop, file_prop:
write_file.close()
# list of xrefs
import pandas
path = os.path.join('data', 'xrefs.tsv')
xref_df = pandas.read_table(path)
set(xref_df.resource)
{'CSP', 'CTV3', 'EFO', 'EFOpat_id', 'HP', 'ICD10', 'ICD9', 'KEGG', 'MEDDRA', 'MSH', 'MTH', 'NCI', 'NDFRT', 'OMIM', 'ORDO', 'Orphanet', 'SNOMEDCT', 'UMLS', 'WHO'}
# create a name to term mapping
rows = list()
for term in dox:
rows.append({'doid': term.id, 'name': term.name, 'type': 'name'})
for synonym in term.synonyms:
rows.append({'doid': term.id, 'name': synonym[0], 'type': '{}-synonym'.format(synonym[1].lower())})
path = os.path.join('data', 'term-names.tsv')
with open(path, 'w') as write_file:
writer = csv.DictWriter(write_file, delimiter='\t', fieldnames=['doid', 'name', 'type'])
writer.writeheader()
writer.writerows(rows)