from __future__ import print_function
import csv
import sys
import xml.etree.ElementTree as ET
import pandas as pd
import requests
if sys.version_info.major == 2:
from StringIO import StringIO
else:
from io import StringIO
def get_psiquic(service, query, full_url=False, **kwargs):
kwargs['format'] = kwargs.get('format', 'tab27')
if full_url:
req = requests.get('%s%s' % (service, query), params=kwargs)
else:
server = 'http://www.ebi.ac.uk/Tools/webservices/psicquic'
req = requests.get('%s/%s/%s' % (server, service, query), params=kwargs)
if not req.ok:
req.raise_for_status()
return req.content
def get_databases(db_xml):
for service in db_xml:
for elem in service:
ns_clean_tag = elem.tag[elem.tag.find('}') + 1:]
if ns_clean_tag == 'name':
name = elem.text
elif ns_clean_tag == 'active':
active = False if elem.text == 'false' else True
elif ns_clean_tag == 'restUrl':
rest_url = elem.text
elif ns_clean_tag == 'restExample':
example = elem.text
elif ns_clean_tag == 'organizationUrl':
org_url = elem.text
else:
pass # there are a few more
yield {'name': name, 'active': active, 'org_url': org_url,
'example': example, 'rest_url': rest_url}
dbs_xml = get_psiquic('registry', 'registry', action='STATUS', format='xml')
dbs_xml_parsed = ET.fromstring(dbs_xml)
dbs = pd.DataFrame.from_records(get_databases(dbs_xml_parsed))
pd.options.display.max_colwidth = 100
active_dbs = dbs[dbs.active==True]
active_dbs.drop(['active', 'example', 'rest_url'], 1)
name | org_url | |
---|---|---|
1 | BioGrid | http://www.thebiogrid.org/ |
2 | bhf-ucl | http://www.ucl.ac.uk/functional-gene-annotation/cardiovascular/projects |
3 | ChEMBL | http://www.ebi.ac.uk/chembl |
4 | DIP | http://dip.doe-mbi.ucla.edu/ |
5 | HPIDb | http://www.agbase.msstate.edu/hpi/main.html |
6 | InnateDB | http://www.innatedb.com |
7 | IntAct | http://www.ebi.ac.uk/intact |
8 | mentha | http://mentha.uniroma2.it/ |
9 | MPIDB | http://jcvi.org/mpidb/ |
11 | MatrixDB | http://matrixdb.ibcp.fr/ |
12 | MINT | http://mint.bio.uniroma2.it/ |
13 | Reactome | http://www.reactome.org/ |
14 | Reactome-FIs | http://www.reactome.org/ |
16 | BIND | http://www.baderlab.org |
17 | Interoporc | http://biodev.extra.cea.fr/interoporc |
20 | I2D-IMEx | http://ophid.utoronto.ca/ |
21 | InnateDB-IMEx | http://www.innatedb.ca/ |
22 | MolCon | http://www.molecularconnections.com |
23 | UniProt | http://www.uniprot.org |
24 | MBInfo | http://www.mechanobio.info/ |
25 | BindingDB | http://www.bindingdb.org |
26 | VirHostNet | http://pbildb1.univ-lyon1.fr/virhostnet |
28 | Spike | None |
req = get_psiquic('intact/webservices/current/search/query', 'tp53', format='count')
print(req)
for index, db in active_dbs.iterrows():
req = get_psiquic(db['rest_url'], 'query/tp53', full_url=True, format='count')
count = int(req)
print('DB: %s, count: %d' % ( db['name'], count))
4802 DB: BioGrid, count: 2375 DB: bhf-ucl, count: 14 DB: ChEMBL, count: 80 DB: DIP, count: 0 DB: HPIDb, count: 74 DB: InnateDB, count: 136 DB: IntAct, count: 4802 DB: mentha, count: 3217 DB: MPIDB, count: 0 DB: MatrixDB, count: 0 DB: MINT, count: 2158 DB: Reactome, count: 0 DB: Reactome-FIs, count: 369 DB: BIND, count: 47 DB: Interoporc, count: 0 DB: I2D-IMEx, count: 194 DB: InnateDB-IMEx, count: 5 DB: MolCon, count: 18 DB: UniProt, count: 438 DB: MBInfo, count: 0 DB: BindingDB, count: 0 DB: VirHostNet, count: 0 DB: Spike, count: 450
req = get_psiquic('intact/webservices/current/search/query', 'tp53',
firstResult=0, maxResults=1000)
answer = csv.reader(StringIO(req), delimiter='\t')
db_types = set()
for record in answer:
db_types.add(record[0].split(':')[0])
db_types.add(record[1].split(':')[0])
print(db_types)
set(['uniprotkb', 'ddbj/embl/genbank', '-', 'ensembl', 'intact', 'chebi'])