# using an example in the API documentation to confirm that we can get json representation from API
import requests
json_url = "http://opencontext.org/sets/Palestinian+Authority/Tell+en-Nasbeh/.json?proj=Bade+Museum"
r = requests.get(json_url)
# what are the top level keys of response?
r.json().keys()
# Now let's apply same logic to the Asian Stoneware Jars project
json_url = "http://opencontext.org/sets/.json?proj=Asian+Stoneware+Jars"
request = requests.get(json_url)
request_json = request.json()
results= request_json['results']
request_json.keys()
# number of results matches what is on human UI
request_json['numFound']
# we get back the first page of 10
len(results)
results[0]
# list the URLs for the thumbnails
[result.get('thumbIcon') for result in results]
# do a quick display
from IPython.display import HTML
from jinja2 import Template
CSS = """
"""
IMAGES_TEMPLATE = CSS + """
{% for item in items %}
{% endfor %}
"""
template = Template(IMAGES_TEMPLATE)
HTML(template.render(items=results))
import requests
url = "http://opencontext.org/sets/.json"
r = requests.get(url)
r.json().keys()
r.json()['numFound']
r.json()['paging']['prev']
# write a generator for all items in http://opencontext.org/sets/.json
import requests
def opencontext_items():
url = "http://opencontext.org/sets/.json"
more_items = True
while more_items:
r = requests.get(url)
for item in r.json()['results']:
yield item
url = r.json()['paging']['next']
if not url:
more_items = False
from itertools import islice
results = list(islice(opencontext_items(), 25))
HTML(template.render(items=results))
import requests
import lxml
from lxml import etree
url = "http://opencontext.org/projects/.atom"
r = requests.get(url)
doc = etree.fromstring(r.content)
doc
# get list of titles
project_titles = [e.find('{http://www.w3.org/2005/Atom}title').text for e in doc.findall('{http://www.w3.org/2005/Atom}entry')]
for (i, title) in enumerate(project_titles):
print i+1, title