#!/usr/bin/env python # coding: utf-8 # In[1]: import collections import requests import re import os import csv # In[2]: # url of a search through MeSH which has empty terms MeSH_url = 'http://www.nlm.nih.gov/cgi/mesh/2015/MB_cgi?mode=&term=' # In[3]: # Function (MeSH_match) to take a single disease MeSH name, search the 2014 # MeSH database, and extract the unique MeSH ID associated with that disease. # Returns a two membered list including the disease name, followed by the unique MeSH ID def MeSH_match(disease): disease_edit = disease.replace(' ', '+') url = MeSH_url + disease_edit returned = requests.get(url) returned = returned.text sub_returned = re.search('Unique ID(.+?)<', returned) sub_returned = sub_returned.group(1) returned_list = [disease, sub_returned] return(returned_list) # In[5]: # opens a file that contains a list of diseases, strips the newlines, and creates a list (your_list) # variable containing these diseases print(os.getcwd()) f = open('disease_input.txt', 'r') your_list = [x.rstrip() for x in f] f.close() # runs the MeSH_match function on your list of diseases (your_list) and returns # the output to a new list (output1) output1 = list(map(MeSH_match, your_list)) # In[6]: # creates a csv file which contains MeSH disease names matched with their unique MeSH IDs with open('disease_output.tsv', 'w') as f: writer = csv.writer(f, delimiter = '\t') writer.writerow(['name', 'mesh_id']) writer.writerows(output1) # In[ ]: # In[ ]: