a refresher on http might be in order: http://mashupguide.net/1.0/html/ch06s04.xhtml#d0e10474
I like using requests even though there are several alternatives: http://lumberjaph.net/python/2012/02/17/HTTP_requests_with_python.html
http://docs.python-requests.org/en/latest/
enpkg knows about requests:
enpkg -s
yields
Name Versions Note
------------------------------------------------------------
requests 0.3.0-1
0.3.1-1
0.3.2-1
0.4.1-1
0.5.0-1
0.6.1-1
0.6.4-1
0.7.4-1
0.9.0-1
0.9.1-1
0.9.3-1
0.10.1-1
normally, I like to use geocoder.us as an example -- see my book http://mashupguide.net/1.0/html/ch13s06.xhtml#d0e21349 -- but there have been overuse by bad actors
import requests
import json
# https://developers.google.com/maps/documentation/geocoding/
# get Google lat, long
url = "http://maps.googleapis.com/maps/api/geocode/json?address=1600+Amphitheatre+Parkway,+Mountain+View,+CA&sensor=false"
r = requests.get(url)
<Response [200]>
r.status_code
200
r.headers['content-type']
'application/json; charset=UTF-8'
r.text
u'{\n "results" : [\n {\n "address_components" : [\n {\n "long_name" : "1600",\n "short_name" : "1600",\n "types" : [ "street_number" ]\n },\n {\n "long_name" : "Amphitheatre Parkway",\n "short_name" : "Amphitheatre Pkwy",\n "types" : [ "route" ]\n },\n {\n "long_name" : "Mountain View",\n "short_name" : "Mountain View",\n "types" : [ "locality", "political" ]\n },\n {\n "long_name" : "Santa Clara",\n "short_name" : "Santa Clara",\n "types" : [ "administrative_area_level_2", "political" ]\n },\n {\n "long_name" : "California",\n "short_name" : "CA",\n "types" : [ "administrative_area_level_1", "political" ]\n },\n {\n "long_name" : "United States",\n "short_name" : "US",\n "types" : [ "country", "political" ]\n },\n {\n "long_name" : "94043",\n "short_name" : "94043",\n "types" : [ "postal_code" ]\n }\n ],\n "formatted_address" : "1600 Amphitheatre Parkway, Mountain View, CA 94043, USA",\n "geometry" : {\n "location" : {\n "lat" : 37.42219410,\n "lng" : -122.08459320\n },\n "location_type" : "ROOFTOP",\n "viewport" : {\n "northeast" : {\n "lat" : 37.42354308029149,\n "lng" : -122.0832442197085\n },\n "southwest" : {\n "lat" : 37.42084511970850,\n "lng" : -122.0859421802915\n }\n }\n },\n "types" : [ "street_address" ]\n }\n ],\n "status" : "OK"\n}\n'
r.json()
{u'results': [{u'address_components': [{u'long_name': u'1600', u'short_name': u'1600', u'types': [u'street_number']}, {u'long_name': u'Amphitheatre Parkway', u'short_name': u'Amphitheatre Pkwy', u'types': [u'route']}, {u'long_name': u'Mountain View', u'short_name': u'Mountain View', u'types': [u'locality', u'political']}, {u'long_name': u'Santa Clara', u'short_name': u'Santa Clara', u'types': [u'administrative_area_level_2', u'political']}, {u'long_name': u'California', u'short_name': u'CA', u'types': [u'administrative_area_level_1', u'political']}, {u'long_name': u'United States', u'short_name': u'US', u'types': [u'country', u'political']}, {u'long_name': u'94043', u'short_name': u'94043', u'types': [u'postal_code']}], u'formatted_address': u'1600 Amphitheatre Parkway, Mountain View, CA 94043, USA', u'geometry': {u'location': {u'lat': 37.4221941, u'lng': -122.0845932}, u'location_type': u'ROOFTOP', u'viewport': {u'northeast': {u'lat': 37.42354308029149, u'lng': -122.0832442197085}, u'southwest': {u'lat': 37.4208451197085, u'lng': -122.0859421802915}}}, u'types': [u'street_address']}], u'status': u'OK'}
r.json()['results'][0]['geometry']['location']
{u'lat': 37.4221941, u'lng': -122.0845932}
import requests
from lxml.html import parse
from StringIO import StringIO
ry_class_url = "http://osoc.berkeley.edu/OSOC/osoc?p_term=SP&p_deptname=INFO&p_instr=yee"
r = requests.get(ry_class_url)
doc = parse(StringIO(r.content)).getroot()
course_tts = doc.cssselect('table:nth-of-type(2) tt')
print course_tts[-1].text_content().replace("Avail Seats", "Avail_Seats").split(" ")[:-1]
['Limit:40', 'Enrolled:31', 'Waitlist:0', 'Avail_Seats:9']