%pylab --no-import-all inline
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame, Series, Index
import pandas as pd
# check that CENSUS_KEY is defined
import settings
assert settings.CENSUS_KEY is not None
# http://api.census.gov/data/2010/sf1/geo.html
from IPython.core.display import HTML
HTML("")
%%HTML
hi there
import urlparse
import urllib
from IPython.core.display import HTML
def add_census_key(url, api_key=settings.CENSUS_KEY):
"""Take an input example Census API call and a key parameter"""
pr = urlparse.urlparse(url)
# we're going to modify the query, which is the 5th element in the tuple (index 4)
pr1 = list(pr)
# convert pr.query from string to dict
# see http://stackoverflow.com/a/10233141/7782 for meaning of doseq
pr_query = urlparse.parse_qs(pr.query)
pr_query["key"]= api_key
pr1[4] = urllib.urlencode(pr_query, doseq=True)
return urlparse.urlunparse(pr1)
def c_url (url, title=None, api_key=settings.CENSUS_KEY):
url_with_key = add_census_key(url, api_key)
if title is None:
title = url
return HTML("""{title}""".format(url=url_with_key, title=title))
#add_census_key("http://api.census.gov/data/2010/sf1?get=P0010001&for=county:*")
c_url("http://api.census.gov/data/2010/sf1?get=NAME,P0010001&for=state:*")
import requests
from lxml.html import parse, fromstring
url = "http://api.census.gov/data/2010/sf1/geo.html"
r = requests.get(url).content
doc = fromstring(r)
rows = doc.xpath("//table/tr")
# first row is the header
headers = [col.text for col in rows[0].findall('th')]
headers
# next rows are the census URL examples
row = rows[1]
cols = row.findall('td')
# col[s0]: Summmary Level
print cols[0].text
# cols[1]: Description
print cols[1].text
from itertools import islice
from lxml.html import parse
# let's actually now decorate the urls
def decorated_parse_examples(examples, api_key=settings.CENSUS_KEY):
for row in examples:
new_row = row.copy()
# need to change URLs
example_urls_col = new_row[headers[2]]
#urls_with_key = [add_census_key(url) for url in example_urls_col]
new_row[headers[2]] = "
".join(
["""{url}""".format(
url=url,
url_with_key=add_census_key(url)
) for url in example_urls_col
])
yield new_row
def parse_urls_col(col):
# http://stackoverflow.com/a/15074386/7782
return [child for child in col.itertext()]
def parse_census_examples():
url = "http://api.census.gov/data/2010/sf1/geo.html"
doc = parse(url)
rows = doc.xpath("//table/tr")
# first row is the header
headers = [col.text for col in rows[0].findall('th')]
for row in rows[1:]:
cols = row.findall('td')
yield ({headers[0]:cols[0].text,
headers[1]:cols[1].text,
headers[2]:parse_urls_col(cols[2])})
#parsed_examples = list(islice(parse_census_examples(),None))
parsed_examples = parse_census_examples()
# let's redisplay the table with
from IPython.display import HTML
from jinja2 import Template
URLS_TEMPLATE= """
{{header}} | {% endfor %}
---|
{{row[header]}} | {% endfor %}