In [1]:

from __future__ import print_function

import csv
import sys
import xml.etree.ElementTree as ET
    
import pandas as pd
import requests

if sys.version_info.major == 2:
    from StringIO import StringIO
else:
    from io import StringIO

In [2]:

def get_psiquic(service, query, full_url=False, **kwargs):
    kwargs['format'] = kwargs.get('format', 'tab27')
    if full_url:
        req = requests.get('%s%s' % (service, query), params=kwargs)
    else:
        server = 'http://www.ebi.ac.uk/Tools/webservices/psicquic'
        req = requests.get('%s/%s/%s' % (server, service, query), params=kwargs)
    if not req.ok:
        req.raise_for_status()
    return req.content

In [3]:

def get_databases(db_xml):
    for service in db_xml:
        for elem in service:
            ns_clean_tag = elem.tag[elem.tag.find('}') + 1:]
            if ns_clean_tag == 'name':
                name = elem.text
            elif ns_clean_tag == 'active':
                active = False if elem.text == 'false' else True
            elif ns_clean_tag == 'restUrl':
                rest_url = elem.text
            elif ns_clean_tag == 'restExample':
                example = elem.text
            elif ns_clean_tag == 'organizationUrl':
                org_url = elem.text
            else:
                pass  # there are a few more
        yield {'name': name, 'active': active, 'org_url': org_url,
               'example': example, 'rest_url': rest_url}

dbs_xml = get_psiquic('registry', 'registry', action='STATUS', format='xml')
dbs_xml_parsed = ET.fromstring(dbs_xml)
dbs = pd.DataFrame.from_records(get_databases(dbs_xml_parsed))

pd.options.display.max_colwidth = 100
active_dbs = dbs[dbs.active==True]
active_dbs.drop(['active', 'example', 'rest_url'], 1)

Out[3]:

	name	org_url
1	BioGrid	http://www.thebiogrid.org/
2	bhf-ucl	http://www.ucl.ac.uk/functional-gene-annotation/cardiovascular/projects
3	ChEMBL	http://www.ebi.ac.uk/chembl
4	DIP	http://dip.doe-mbi.ucla.edu/
5	HPIDb	http://www.agbase.msstate.edu/hpi/main.html
6	InnateDB	http://www.innatedb.com
7	IntAct	http://www.ebi.ac.uk/intact
8	mentha	http://mentha.uniroma2.it/
9	MPIDB	http://jcvi.org/mpidb/
11	MatrixDB	http://matrixdb.ibcp.fr/
12	MINT	http://mint.bio.uniroma2.it/
13	Reactome	http://www.reactome.org/
14	Reactome-FIs	http://www.reactome.org/
16	BIND	http://www.baderlab.org
17	Interoporc	http://biodev.extra.cea.fr/interoporc
20	I2D-IMEx	http://ophid.utoronto.ca/
21	InnateDB-IMEx	http://www.innatedb.ca/
22	MolCon	http://www.molecularconnections.com
23	UniProt	http://www.uniprot.org
24	MBInfo	http://www.mechanobio.info/
25	BindingDB	http://www.bindingdb.org
26	VirHostNet	http://pbildb1.univ-lyon1.fr/virhostnet
28	Spike	None

In [4]:

req = get_psiquic('intact/webservices/current/search/query', 'tp53', format='count')
print(req)
for index, db in active_dbs.iterrows():
    req = get_psiquic(db['rest_url'], 'query/tp53', full_url=True, format='count')
    count = int(req)
    print('DB: %s, count: %d' % ( db['name'], count))

4802
DB: BioGrid, count: 2375
DB: bhf-ucl, count: 14
DB: ChEMBL, count: 80
DB: DIP, count: 0
DB: HPIDb, count: 74
DB: InnateDB, count: 136
DB: IntAct, count: 4802
DB: mentha, count: 3217
DB: MPIDB, count: 0
DB: MatrixDB, count: 0
DB: MINT, count: 2158
DB: Reactome, count: 0
DB: Reactome-FIs, count: 369
DB: BIND, count: 47
DB: Interoporc, count: 0
DB: I2D-IMEx, count: 194
DB: InnateDB-IMEx, count: 5
DB: MolCon, count: 18
DB: UniProt, count: 438
DB: MBInfo, count: 0
DB: BindingDB, count: 0
DB: VirHostNet, count: 0
DB: Spike, count: 450

In [5]:

req = get_psiquic('intact/webservices/current/search/query', 'tp53',
                  firstResult=0, maxResults=1000)
answer = csv.reader(StringIO(req), delimiter='\t')
db_types = set()
for record in answer:
    db_types.add(record[0].split(':')[0])
    db_types.add(record[1].split(':')[0])
print(db_types)

set(['uniprotkb', 'ddbj/embl/genbank', '-', 'ensembl', 'intact', 'chebi'])

In [ ]: