First, you have to find your access tokens to use the Foursquare API with reasonable rate limits.
If you have an access token, you can use that, otherwise register an app an use the client id and secret for the following steps.
import foursquare
import pandas as pd
#ACCESS_TOKEN = ""
#client = foursquare.Foursquare(access_token=ACCESS_TOKEN)
CLIENT_ID = ""
CLIENT_SECRET = ""
client = foursquare.Foursquare(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
# bbox = [11.109872,47.815652,12.068588,48.397136] # bounding box for Munich
# bbox = [13.088400,52.338120,13.761340,52.675499] # bounding box for Berlin
bbox = [5.866240,47.270210,15.042050,55.058140] # bounding box for Germany
new_crawl = [] # list of locations to be crawled
done = [] # list of crawled locations
links = [] # list of tuples that represent links between locations
venues = pd.DataFrame() # dictionary of locations id => meta-data on location
Set seed values for Marienplatz, Airport and Central Station.
Depth is the number of recursive crawling processes.
to_crawl = ["4ade0ccef964a520246921e3", "4cbd1bfaf50e224b160503fc", "4b0674e2f964a520f4eb22e3"]
depth = 8
for i in range(depth):
new_crawl = []
print "Step " + str(i) + ": " + str(len(venues)) + " locations and " + str(len(links)) + " links. " + str(len(to_crawl)) + " venues to go."
for v in to_crawl:
if v not in venues:
res = client.venues(v)
venues = venues.append(pd.DataFrame({"name":res["venue"]["name"],"users":res["venue"]["stats"]["usersCount"],
"checkins":res["venue"]["stats"]["checkinsCount"], "lat":res["venue"]["location"]["lat"],
"lng":res["venue"]["location"]["lng"]}, index=[v]))
next_venues = client.venues.nextvenues(v)
for nv in next_venues['nextVenues']['items']:
if ((nv["location"]["lat"] > bbox[1]) & (nv["location"]["lat"] < bbox[3]) &
(nv["location"]["lng"] > bbox[0]) & (nv["location"]["lng"] < bbox[2])):
if nv["id"] not in venues:
venues = venues.append(pd.DataFrame({"name":nv["name"],"users":nv["stats"]["usersCount"],
"checkins":nv["stats"]["checkinsCount"], "lat":nv["location"]["lat"],
"lng":nv["location"]["lng"]}, index=[nv["id"]]))
if (nv["id"] not in done) & (nv["id"] not in to_crawl) & (nv["id"] not in new_crawl):
new_crawl.append(nv["id"])
links.append((v, nv["id"]))
done.append(v)
to_crawl = new_crawl
We're importing networkx to build the network out of our crawled venues (= nodes) and links between them.
venues = venues.reset_index().drop_duplicates(cols='index',take_last=True).set_index('index')
venues.head()
labels = venues["name"].to_dict()
import networkx as nx
G = nx.DiGraph()
G.add_nodes_from(venues.index)
for f,t in links:
G.add_edge(f, t)
nx.info(G)
Calculate some useful metrics and visualize the most important venues
pagerank = nx.pagerank(G,alpha=0.9)
betweenness = nx.betweenness_centrality(G)
venues['pagerank'] = [pagerank[n] for n in venues.index]
venues['betweenness'] = [betweenness[n] for n in venues.index]
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 6), dpi=150)
ax = fig.add_subplot(111)
venues.sort('users', inplace=True)
venues.set_index('name')[-20:].users.plot(kind='barh')
ax.set_ylabel('Location')
ax.set_xlabel('Users')
ax.set_title('Top 20 Locations by Users')
plt.show()
fig = plt.figure(figsize=(8, 6), dpi=150)
ax = fig.add_subplot(111)
venues.sort('checkins', inplace=True)
venues.set_index('name')[-20:].checkins.plot(kind='barh')
ax.set_ylabel('Location')
ax.set_xlabel('Checkins')
ax.set_title('Top 20 Locations by Checkins')
plt.show()
fig = plt.figure(figsize=(8, 6), dpi=150)
ax = fig.add_subplot(111)
venues.sort('pagerank', inplace=True)
venues.set_index('name')[-20:].pagerank.plot(kind='barh')
ax.set_ylabel('Location')
ax.set_xlabel('Pagerank')
ax.set_title('Top 20 Locations by Pagerank')
plt.show()
fig = plt.figure(figsize=(8, 6), dpi=150)
ax = fig.add_subplot(111)
venues.sort('betweenness', inplace=True)
venues.set_index('name')[-20:].betweenness.plot(kind='barh')
ax.set_ylabel('Location')
ax.set_xlabel('Pagerank')
ax.set_title('Top 20 Locations by Betweenness Centrality')
plt.show()
Visualize the network
fig = plt.figure(figsize=(16, 9), dpi=150)
graph_pos=nx.spring_layout(G)
nodesize = [10000*n for n in pagerank.values()]
nx.draw_networkx_nodes(G,graph_pos,node_size=nodesize, alpha=0.5, node_color='blue')
nx.draw_networkx_edges(G,graph_pos,width=1, alpha=0.3,edge_color='blue')
nx.draw_networkx_labels(G, graph_pos, labels=labels, font_size=10, font_family='Arial')
plt.axis('off')
plt.show()
Finally, save the network for further analysis e.g. in Gephi
nx.write_graphml(G, "./location_graph.graphml")