#!/usr/bin/env python # coding: utf-8 # In[1]: from __future__ import print_function from collections import defaultdict import requests import numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: def do_request(service, a1=None, a2=None, a3=None, **kwargs): server = 'http://api.gbif.org/v1' params = '' for a in [a1, a2, a3]: if a is not None: params += '/' + a req = requests.get('%s/%s%s' % (server, service, params), params=kwargs, headers={'Content-Type': 'application/json'}) if not req.ok: req.raise_for_status() return req.json() # In[3]: req = do_request('species', 'search', q='bear') print(req['count']) req['results'][0] # In[4]: req_short = do_request('species', 'search', q='bear', rank='family') print(req_short['count']) bear = req_short['results'][0] bear # In[5]: import time def get_all_records(rec_field, service, a1=None, a2=None, a3=None, **kwargs): records = [] all_done = False offset = 0 num_iter = 0 while not all_done and num_iter < 100: # arbitrary req = do_request(service, a1=a1, a2=a2, a3=a3, offset=offset, **kwargs) all_done = req['endOfRecords'] if not all_done: time.sleep(1) offset += req['limit'] records.extend(req[rec_field]) num_iter += 1 return records # In[6]: def get_leaves(nub): leaves = [] recs = get_all_records('results', 'species', str(nub), 'children') if len(recs) == 0: return None for rec in recs: rec_leaves = get_leaves(rec['nubKey']) if rec_leaves is None: leaves.append(rec) else: leaves.extend(rec_leaves) return leaves # In[7]: records = get_all_records('results', 'species', str(bear['nubKey']), 'children') leaves = get_leaves(bear['nubKey']) # In[8]: #[leaf['nameType'] for leaf in leaves] for rec in leaves: print(rec['scientificName'], rec['rank'], end=' ') vernaculars = do_request('species', str(rec['nubKey']), 'vernacularNames', language='en')['results'] for vernacular in vernaculars: if vernacular['language'] == 'eng': print(vernacular['vernacularName'], end='') break print() # In[9]: basis_of_record = defaultdict(int) country = defaultdict(int) zero_occurrences = 0 count_extinct = 0 for rec in leaves: #print(rec['scientificName'], rec['rank'], rec['taxonID']) occurrences = get_all_records('results', 'occurrence', 'search', taxonKey=rec['nubKey']) for occurrence in occurrences: basis_of_record[occurrence['basisOfRecord']] += 1 country[occurrence.get('country', 'NA')] += 1 #there is also publisingCountry if len(occurrences) > 0: zero_occurrences += 1 profiles = do_request('species', str(rec['nubKey']), 'speciesProfiles')['results'] for profile in profiles: if profile.get('extinct', False): count_extinct += 1 break # In[10]: countries, obs_countries = zip(*sorted(country.items(), key=lambda x: x[1])) basis_name, basis_cnt = zip(*sorted(basis_of_record.items(), key=lambda x: x[1])) fig = plt.figure(figsize=(16, 9)) ax = fig.add_subplot(1, 2, 1) ax.barh(np.arange(10) - 0.5, obs_countries[-10:]) ax.set_title('Top 10 countries per occurences') ax.set_yticks(range(10)) ax.set_ylim(0.5, 9.5) ax.set_yticklabels(countries[-10:]) #refer metadata problems ax = fig.add_subplot(2, 2, 2) ax.set_title('Basis of record') ax.bar(np.arange(len(basis_name)), basis_cnt, color='g') basis_name = [x.replace('OBSERVATION', 'OBS').replace('_SPECIMEN', '') for x in basis_name] ax.set_xticks(0.5 + np.arange(len(basis_name))) ax.set_xticklabels(basis_name, size='x-small') ax = fig.add_subplot(2, 2, 4) other = len(leaves) - zero_occurrences - count_extinct pie_values = [zero_occurrences, count_extinct, other] labels = ['No occurence (%d)' % zero_occurrences, 'Extinct (%d)' % count_extinct, 'Other (%d)' % other] ax.pie(pie_values, labels=labels, colors=['cyan', 'magenta', 'yellow']) ax.set_title('Status for each species') # In[11]: #distribution of observations of horribilis per year #probably not put... # In[12]: horribilis = get_all_records('results', 'occurrence', 'search', taxonKey=6163845) # In[13]: years = defaultdict(int) for x in horribilis: years[x.get('year', None)] += 1 xs, ys = zip(*[(y, cnt) for y, cnt in years.items() if y is not None]) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(xs, ys, '.') # In[ ]: