We will be working with [Freebase][http://dev.freebase.com] and OpenRefine throughout the semester. Today, I wanted to get us set up with using the API -- the goal today is simply for all of you to go get a Google API key and configure
Follow instructions at
http://wiki.freebase.com/wiki/Freebase_API#Getting_Started
to get a key. You may need to go to the Services section at https://code.google.com/apis/console/b/0/ to make sure Freebase access is turned on:
Then go to the API Access screen and the Create New Browser Key button on bottom of page to get a key.
Make a CREDENTIALS.py in the same directory as your IPython notebooks to hold this key:
FREEBASE_KEY = '[INSERT_YOUR_KEY]'
You need to confKey for browser apps (with referers)
# https://dev.freebase.com/astronomy/planet?instances
# http://wiki.freebase.com/wiki/Google_API_Client_Libraries#Python
from apiclient import discovery
from apiclient import model
import json
from CREDENTIALS import FREEBASE_KEY
DEVELOPER_KEY = FREEBASE_KEY
model.JsonModel.alt_param = ""
freebase = discovery.build('freebase', 'v1', developerKey=DEVELOPER_KEY)
query = [{'id': None, 'name': None, 'type': '/astronomy/planet'}]
response = json.loads(freebase.mqlread(query=json.dumps(query)).execute())
planets = []
for planet in response['result']:
print planet['name']
planets.append(planet['name'])
assert planets == [u'Earth',
u'Venus',
u'Mars',
u'Mercury',
u'Jupiter',
u'Neptune',
u'Saturn',
u'Uranus']
Earth Venus Mars Mercury Jupiter Neptune Saturn Uranus
# http://wiki.freebase.com/wiki/Google_API_Client_Libraries#Python
from itertools import islice
from apiclient import discovery
from apiclient import model
import json
from CREDENTIALS import FREEBASE_KEY
from pandas import DataFrame, Series
DEVELOPER_KEY = FREEBASE_KEY
model.JsonModel.alt_param = ""
freebase = discovery.build('freebase', 'v1', developerKey=DEVELOPER_KEY)
query_json = """[{
"id": null,
"wiki_en:key": [{
"/type/key/namespace": "/wikipedia/en_id",
"value": null,
"optional": true
}],
"/location/administrative_division/fips_10_4_region_code": null,
"/location/administrative_division/first_level_division_of": "United States of America",
"type": "/government/governmental_jurisdiction",
"governing_officials": [{
"type": null,
"office_holder": {
"id": null,
"en:name": null,
"type": "/government/politician",
"party": [{
"party": null
}]
},
"basic_title": "Governor",
"from": null,
"to": {
"optional": "forbidden",
"value": null
}
}]
}]""".replace("\n", " ")
query = json.loads(query_json)
response = json.loads(freebase.mqlread(query=json.dumps(query)).execute())
results=list()
for result in islice(response['result'], None):
#print result
results.append( {'fips': result['/location/administrative_division/fips_10_4_region_code'],
'state': result['id'],
'name': result['governing_officials'][0]['office_holder']['en:name'],
'party': [p['party'] for p in result['governing_officials'][0]['office_holder']['party']],
'en_wikipedia_key': [k["value"] for k in result["wiki_en:key"]]
})
governors = DataFrame(results)
governors[:5]
en_wikipedia_key | fips | name | party | state | |
---|---|---|---|---|---|
0 | [303] | US01 | Dr. Robert J. Bentley | [Republican Party] | /en/alabama |
1 | [624] | US02 | Sean Parnell | [Republican Party] | /en/alaska |
2 | [1930] | US05 | Mike Beebe | [Democratic Party] | /en/arkansas |
3 | [21883824] | US04 | Jan Brewer | [Republican Party] | /en/arizona |
4 | [5399] | US08 | John Hickenlooper | [Democratic Party] | /en/colorado |
# which ones are Republicans (or have been Republican)
governors[governors["party"].apply(lambda x: 'Republican Party' in x)]
en_wikipedia_key | fips | name | party | state | |
---|---|---|---|---|---|
0 | [303] | US01 | Dr. Robert J. Bentley | [Republican Party] | /en/alabama |
1 | [624] | US02 | Sean Parnell | [Republican Party] | /en/alaska |
3 | [21883824] | US04 | Jan Brewer | [Republican Party] | /en/arizona |
8 | [18933066] | US12 | Rick Scott | [Republican Party] | /en/florida |
10 | [26810748] | US19 | Terry E. Branstad | [Republican Party] | /en/iowa |
11 | [14607] | US16 | C. L. Otter | [Republican Party] | /en/idaho |
13 | [21883857] | US18 | Mike Pence | [Republican Party] | /en/indiana |
14 | [16716] | US20 | Sam Brownback | [Republican Party] | /en/kansas |
16 | [18130] | US22 | Bobby Jindal | [Republican Party] | /en/louisiana |
18 | [18859] | US26 | Rick Snyder | [Republican Party] | /en/michigan |
19 | [16949861] | US28 | Phil Bryant | [Republican Party] | /en/mississippi |
22 | [19977] | US23 | Paul LePage | [Republican Party] | /en/maine |
25 | [21216] | US32 | Brian Sandoval | [Republican Party] | /en/nevada |
26 | [21647] | US31 | Dave Heineman | [Republican Party] | /en/nebraska |
27 | [21648] | US34 | Chris Christie | [Republican Party] | /en/new_jersey |
28 | [21649] | US35 | Susana Martinez | [Republican Party, Democratic Party] | /en/new_mexico |
29 | [21650] | US37 | Pat McCrory | [Republican Party] | /en/north_carolina |
30 | [21651] | US38 | Jack Dalrymple | [Republican Party] | /en/north_dakota |
32 | [22199] | US39 | John Kasich | [Republican Party] | /en/ohio |
33 | [22489] | US40 | Mary Fallin | [Republican Party] | /en/oklahoma |
34 | [23332] | US42 | Tom Corbett | [Republican Party] | /en/pennsylvania |
35 | [25410] | US44 | Lincoln Chafee | [Republican Party, Independent] | /en/rhode_island |
36 | [26746] | US46 | Dennis Daugaard | [Republican Party] | /en/south_dakota |
37 | [27956] | US45 | Nikki Haley | [Republican Party] | /en/south_carolina |
38 | [29810] | US48 | Rick Perry | [Republican Party, Democratic Party] | /en/texas |
39 | [30395] | US47 | Bill Haslam | [Republican Party] | /en/tennessee |
40 | [31716] | US49 | Gary R. Herbert | [Republican Party] | /en/utah |
41 | [32432] | US51 | Bob McDonnell | [Republican Party, Republican Party of Virginia] | /en/virginia |
45 | [33127] | US55 | Scott Walker | [Republican Party] | /en/wisconsin |
46 | [33611] | US56 | Matt Mead | [Republican Party] | /en/wyoming |
47 | [48830] | US13 | Nathan Deal | [Republican Party, Democratic Party] | /en/georgia |
# state centroids
# http://tinyurl.com/cjuy6k3
from itertools import islice
from apiclient import discovery
from apiclient import model
import json
from CREDENTIALS import FREEBASE_KEY
from pandas import DataFrame, Series
DEVELOPER_KEY = FREEBASE_KEY
model.JsonModel.alt_param = ""
freebase = discovery.build('freebase', 'v1', developerKey=DEVELOPER_KEY)
query_json = """
[{
"id": null,
"name": null,
"/location/administrative_division/fips_10_4_region_code": [],
"/location/administrative_division/first_level_division_of": "United States of America",
"/location/location/geolocation": {
"latitude": null,
"longitude": null
}
}]""".replace("\n", " ")
query = json.loads(query_json)
response = json.loads(freebase.mqlread(query=json.dumps(query)).execute())
results = list()
for result in islice(response['result'], None):
results.append( {'id': result['id'],
'name': result['name'],
'latitude': float(result['/location/location/geolocation']['latitude']),
'longitude': float(result['/location/location/geolocation']['longitude']),
'fips': result['/location/administrative_division/fips_10_4_region_code'],
} )
states = DataFrame(results)
plt.scatter(states["longitude"], states["latitude"])
<matplotlib.collections.PathCollection at 0x4e6d410>