#!/usr/bin/env python # coding: utf-8 # In[1]: from __future__ import print_function import csv import sys import xml.etree.ElementTree as ET import pandas as pd import requests if sys.version_info.major == 2: from StringIO import StringIO else: from io import StringIO # In[2]: def get_psiquic(service, query, full_url=False, **kwargs): kwargs['format'] = kwargs.get('format', 'tab27') if full_url: req = requests.get('%s%s' % (service, query), params=kwargs) else: server = 'http://www.ebi.ac.uk/Tools/webservices/psicquic' req = requests.get('%s/%s/%s' % (server, service, query), params=kwargs) if not req.ok: req.raise_for_status() return req.content # In[3]: def get_databases(db_xml): for service in db_xml: for elem in service: ns_clean_tag = elem.tag[elem.tag.find('}') + 1:] if ns_clean_tag == 'name': name = elem.text elif ns_clean_tag == 'active': active = False if elem.text == 'false' else True elif ns_clean_tag == 'restUrl': rest_url = elem.text elif ns_clean_tag == 'restExample': example = elem.text elif ns_clean_tag == 'organizationUrl': org_url = elem.text else: pass # there are a few more yield {'name': name, 'active': active, 'org_url': org_url, 'example': example, 'rest_url': rest_url} dbs_xml = get_psiquic('registry', 'registry', action='STATUS', format='xml') dbs_xml_parsed = ET.fromstring(dbs_xml) dbs = pd.DataFrame.from_records(get_databases(dbs_xml_parsed)) pd.options.display.max_colwidth = 100 active_dbs = dbs[dbs.active==True] active_dbs.drop(['active', 'example', 'rest_url'], 1) # In[4]: req = get_psiquic('intact/webservices/current/search/query', 'tp53', format='count') print(req) for index, db in active_dbs.iterrows(): req = get_psiquic(db['rest_url'], 'query/tp53', full_url=True, format='count') count = int(req) print('DB: %s, count: %d' % ( db['name'], count)) # In[5]: req = get_psiquic('intact/webservices/current/search/query', 'tp53', firstResult=0, maxResults=1000) answer = csv.reader(StringIO(req), delimiter='\t') db_types = set() for record in answer: db_types.add(record[0].split(':')[0]) db_types.add(record[1].split(':')[0]) print(db_types) # In[ ]: