# using an example in the API documentation to confirm that we can get json representation from API import requests json_url = "http://opencontext.org/sets/Palestinian+Authority/Tell+en-Nasbeh/.json?proj=Bade+Museum" r = requests.get(json_url) # what are the top level keys of response? r.json().keys() # Now let's apply same logic to the Asian Stoneware Jars project json_url = "http://opencontext.org/sets/.json?proj=Asian+Stoneware+Jars" request = requests.get(json_url) request_json = request.json() results= request_json['results'] request_json.keys() # number of results matches what is on human UI request_json['numFound'] # we get back the first page of 10 len(results) results[0] # list the URLs for the thumbnails [result.get('thumbIcon') for result in results] # do a quick display from IPython.display import HTML from jinja2 import Template CSS = """ """ IMAGES_TEMPLATE = CSS + """
{% for item in items %}{% endfor %}
""" template = Template(IMAGES_TEMPLATE) HTML(template.render(items=results)) import requests url = "http://opencontext.org/sets/.json" r = requests.get(url) r.json().keys() r.json()['numFound'] r.json()['paging']['prev'] # write a generator for all items in http://opencontext.org/sets/.json import requests def opencontext_items(): url = "http://opencontext.org/sets/.json" more_items = True while more_items: r = requests.get(url) for item in r.json()['results']: yield item url = r.json()['paging']['next'] if not url: more_items = False from itertools import islice results = list(islice(opencontext_items(), 25)) HTML(template.render(items=results)) import requests import lxml from lxml import etree url = "http://opencontext.org/projects/.atom" r = requests.get(url) doc = etree.fromstring(r.content) doc # get list of titles project_titles = [e.find('{http://www.w3.org/2005/Atom}title').text for e in doc.findall('{http://www.w3.org/2005/Atom}entry')] for (i, title) in enumerate(project_titles): print i+1, title