import collections
import requests
import re
import os
import csv
# url of a search through MeSH which has empty terms
MeSH_url = 'http://www.nlm.nih.gov/cgi/mesh/2015/MB_cgi?mode=&term='
# Function (MeSH_match) to take a single disease MeSH name, search the 2014
# MeSH database, and extract the unique MeSH ID associated with that disease.
# Returns a two membered list including the disease name, followed by the unique MeSH ID
def MeSH_match(disease):
disease_edit = disease.replace(' ', '+')
url = MeSH_url + disease_edit
returned = requests.get(url)
returned = returned.text
sub_returned = re.search('Unique ID</TH><TD colspan=1>(.+?)<', returned)
sub_returned = sub_returned.group(1)
returned_list = [disease, sub_returned]
return(returned_list)
# opens a file that contains a list of diseases, strips the newlines, and creates a list (your_list)
# variable containing these diseases
print(os.getcwd())
f = open('disease_input.txt', 'r')
your_list = [x.rstrip() for x in f]
f.close()
# runs the MeSH_match function on your list of diseases (your_list) and returns
# the output to a new list (output1)
output1 = list(map(MeSH_match, your_list))
/Users/leobrueggeman/GitHub/incomplete-interactome
# creates a csv file which contains MeSH disease names matched with their unique MeSH IDs
with open('disease_output.tsv', 'w') as f:
writer = csv.writer(f, delimiter = '\t')
writer.writerow(['name', 'mesh_id'])
writer.writerows(output1)