%matplotlib inline

import json

import numpy as np
import networkx as nx
import requests
from pattern import web
import matplotlib.pyplot as plt

import ast

from itertools import combinations, permutations
import operator

# set some nicer defaults for matplotlib
from matplotlib import rcParams

#these colors come from colorbrewer2.org. Each is an RGB triplet
dark2_colors = [(0.10588235294117647, 0.6196078431372549, 0.4666666666666667),
                (0.8509803921568627, 0.37254901960784315, 0.00784313725490196),
                (0.4588235294117647, 0.4392156862745098, 0.7019607843137254),
                (0.9058823529411765, 0.1607843137254902, 0.5411764705882353),
                (0.4, 0.6509803921568628, 0.11764705882352941),
                (0.9019607843137255, 0.6705882352941176, 0.00784313725490196),
                (0.6509803921568628, 0.4627450980392157, 0.11372549019607843),
                (0.4, 0.4, 0.4)]

rcParams['figure.figsize'] = (10, 6)
rcParams['figure.dpi'] = 150
rcParams['axes.color_cycle'] = dark2_colors
rcParams['lines.linewidth'] = 2
rcParams['axes.grid'] = False
rcParams['axes.facecolor'] = 'white'
rcParams['font.size'] = 14
rcParams['patch.edgecolor'] = 'none'

def remove_border(axes=None, top=False, right=False, left=True, bottom=True):
    """
    Minimize chartjunk by stripping out unnecessary plot borders and axis ticks
    
    The top/right/left/bottom keywords toggle whether the corresponding plot border is drawn
    """
    ax = axes or plt.gca()
    ax.spines['top'].set_visible(top)
    ax.spines['right'].set_visible(right)
    ax.spines['left'].set_visible(left)
    ax.spines['bottom'].set_visible(bottom)
    
    #turn off all ticks
    ax.yaxis.set_ticks_position('none')
    ax.xaxis.set_ticks_position('none')
    
    #now re-enable visibles
    if top:
        ax.xaxis.tick_top()
    if bottom:
        ax.xaxis.tick_bottom()
    if left:
        ax.yaxis.tick_left()
    if right:
        ax.yaxis.tick_right()

"""
Function
--------
get_senate_vote

Scrapes a single JSON page for a particular Senate vote, given by the vote number

Parameters
----------
vote : int
   The vote number to fetch
   
Returns
-------
json : dict
   The JSON-decoded dictionary for that vote
   
Examples
--------
>>> get_senate_vote(11)['bill']
{u'congress': 113,
 u'number': 325,
 u'title': u'A bill to ensure the complete and timely payment of the obligations of the United States Government until May 19, 2013, and for other purposes.',
 u'type': u'hr'}
"""
def get_senate_vote(vote):
#     url = "https://www.govtrack.us/data/congress/113/votes/2013/s{}/data.json".format(vote)
    url = "https://www.govtrack.us/data/congress/101/votes/1989/s{}/data.json".format(vote)
    page = requests.get(url).text
    try:
        data = json.loads(page)
        return data
    except ValueError:
        raise Exception("Not a valid vote number.")


"""
Function
--------
get_all_votes

Scrapes all the Senate votes from http://www.govtrack.us/data/congress/113/votes/2013,
and returns a list of dicts

Parameters
-----------
None

Returns
--------
vote_dicts : list of dicts
    List of JSON-parsed dicts for each senate vote
"""
def get_all_votes():
    vote_num = 1
    vote_dicts = []
    while True:
        try:
            vote_dict = get_senate_vote(vote_num)
            vote_dicts.append(vote_dict)
            vote_num += 1
        except Exception:
            break
    return vote_dicts


vote_data = get_all_votes()

"""
Function
--------
vote_graph

Parameters
----------
data : list of dicts
    The vote database returned from get_all_votes

Returns
-------
graph : NetworkX Graph object, with the following properties
    1. Each node in the graph is labeled using the `display_name` of a Senator (e.g., 'Lee (R-UT)')
    2. Each node has a `color` attribute set to 'r' for Republicans, 
       'b' for Democrats, and 'k' for Independent/other parties.
    3. The edges between two nodes are weighted by the number of 
       times two senators have cast the same Yea or Nay vote
    4. Each edge also has a `difference` attribute, which is set to `1 / weight`.

Examples
--------
>>> graph = vote_graph(vote_data)
>>> graph.node['Lee (R-UT)']
{'color': 'r'}  # attributes for this senator
>>> len(graph['Lee (R-UT)']) # connections to other senators
101
>>> graph['Lee (R-UT)']['Baldwin (D-WI)']  # edge relationship between Lee and Baldwin
{'difference': 0.02, 'weight': 50}
"""
def vote_graph(data):
    graph = nx.Graph()
    
    # set for all senator display names - these will be our nodes
    all_senators = set()
    # list for roll_call dicts, one for each vote
    roll_calls = []
    for vote in data:
        # dict with keys for each vote class; values are lists of senator display names
        roll_call = {}
        for key, value in vote['votes'].iteritems():
            senators = []
            for senator in value:
                if senator == 'VP':
                    continue
                senators.append(senator['display_name'])
#             senators = [senator['display_name'] for senator in value]
            roll_call[key] = senators
            # add any new senators to the set
            all_senators.update(senators)
        roll_calls.append(roll_call)
    
    # all combinations of 2 senator display names
    all_senator_pairs = combinations(all_senators, 2)
    common_votes = {}
    for pair in all_senator_pairs:
        common_votes[pair] = 0
        
    for vote in roll_calls:
        yea_pairs = combinations(vote['Yea'], 2)
        for pair in yea_pairs:
            try:
                common_votes[pair] += 1
            except KeyError:
                # flip senator names so we can find the pair in the common_votes db
                common_votes[(pair[1],pair[0])] += 1
            
        nay_pairs = combinations(vote['Nay'], 2)
        for pair in nay_pairs:
            try:
                common_votes[pair] += 1
            except KeyError:
                common_votes[(pair[1],pair[0])] += 1
    
    for senator in all_senators:
        party = senator.split()[1][1]
        # use color names that Graphviz understands
        if party == 'D':
            graph.add_node(senator, color='blue')
        elif party == 'R':
            graph.add_node(senator, color='red')
        else:
            graph.add_node(senator, color='black')
            
    for pair, votes in common_votes.iteritems():
        # don't draw an edge for senators with 0 votes in common
        if votes == 0:
            continue
        graph.add_edge(pair[0], pair[1], weight=votes, difference=1.0/votes)
        
    return graph


votes = vote_graph(vote_data)

nx.write_gexf(votes, 'votes-101-1989.gexf')

#this makes sure draw_spring results are the same at each call
np.random.seed(1)  

color = [votes.node[senator]['color'] for senator in votes.nodes()]

#determine position of each node using a spring layout
pos = nx.spring_layout(votes, iterations=200)

#plot the edges
nx.draw_networkx_edges(votes, pos, alpha = .05)

#plot the nodes
nx.draw_networkx_nodes(votes, pos, node_color=color)

#draw the labels
lbls = nx.draw_networkx_labels(votes, pos, alpha=5, font_size=8)

#coordinate information is meaningless here, so let's remove it
plt.xticks([])
plt.yticks([])
remove_border(left=False, bottom=False)

mst = nx.minimum_spanning_tree(votes, weight='difference')

# this makes sure draw_spring results are the same at each call
np.random.seed(1)  

# larger figure size makes graph easier to read
plt.figure(figsize=(25,25))

color = [mst.node[senator]['color'] for senator in mst.nodes()]

# use a Graphviz layout to determine node positions
pos = nx.graphviz_layout(mst, prog='neato', args='-Goverlap=false -Gsep=+150,150')

# plot the edges
nx.draw_networkx_edges(mst, pos, alpha = .5)

# plot the nodes
nx.draw_networkx_nodes(mst, pos, node_color=color)

# offset pos will allow us to pull labels down, so they're not drawn directly on nodes
offset_pos = nx.graphviz_layout(mst, prog='neato', args='-Goverlap=false -Gsep=+150,150')
for x in offset_pos:
    offset_pos[x] = (offset_pos[x][0], offset_pos[x][1] - 95)
# draw the labels
lbls = nx.draw_networkx_labels(mst, offset_pos, alpha=5, font_size=12)

# coordinate information is meaningless here, so let's remove it
plt.xticks([])
plt.yticks([])

remove_border(left=False, bottom=False)

centralities = nx.closeness_centrality(votes, distance='difference')

centrality = zip(centralities.keys(), centralities.values())

get_score = operator.itemgetter(1)
centrality = sorted(centrality, key=get_score, reverse=True)

senators, scores = [], []
for senator, score in centrality:
    senators.append(senator)
    scores.append(score)
    
scores = map(lambda x: round(x, 4), scores)

def visualize(title, senator_lst, score_lst, bar_color=dark2_colors[0], subplot=111, xmin=0, xmax=100):
    "For visualizing node/score data with a horizontal bar graph"
    pos = np.arange(len(senator_lst))
    
    if str(subplot)[2] == '1':
        plt.figure(figsize=(10, 30))
    
    plt.subplot(subplot)
    
    plt.title(title)
    plt.barh(pos, score_lst, color=bar_color)
    
    # add the numbers to the side of each bar
    for p, senator, score in zip(pos, senator_lst, score_lst):
        if score < 0.5:
            plt.annotate(str(score), xy=(score + 0.0001, p + .5), va='center')
        else:
            plt.annotate(str(score), xy=(score + 1, p + .5), va='center')
    
    # customize ticks
    ticks = plt.yticks(pos + .5, senator_lst)
    xt = plt.xticks()[0]
    plt.xticks(xt, [' '] * len(xt))
    
    # minimize chartjunk
    remove_border(left=False, bottom=False)
    plt.grid(axis = 'x', color ='white', linestyle='-')
    
    # set plot limits
    plt.ylim(pos.max() + 1, pos.min())
    plt.xlim(xmin, xmax)
    
    plt.tight_layout()

visualize("Senator Centrality Scores - 113th Congress", senators, scores, xmax=scores[0]+1)


paths = nx.all_pairs_shortest_path(mst)
senators = mst.nodes()

republicans, democrats = [], []
for senator in senators:
    color = mst.node[senator]['color']
    if color == 'red':
        republicans.append(senator)
    else:
        democrats.append(senator)

scores = []
for senator in senators:
    if senator in republicans:
        lengths = [len(paths[senator][dem_senator]) for dem_senator in democrats]
    else:
        lengths = [len(paths[senator][rep_senator]) for rep_senator in republicans]
        
    score = np.mean(lengths)
    scores.append(score)
    
avg_lengths = zip(senators, scores)

get_score = operator.itemgetter(1)
avg_lengths = sorted(avg_lengths, key=get_score)

senators, scores = [], []
for senator, score in avg_lengths:
    senators.append(senator)
    scores.append(score)
    
scores = map(lambda x: round(x, 4), scores)
      
visualize(
          "Senator 'Shortest Paths' Scores - 113th Congress", 
          senators, 
          scores, 
          bar_color=dark2_colors[1],  
          xmax=scores[-1] + 1
          )

"""
Function
--------
get_senate_bill

Scrape the bill data from a single JSON page, given the bill number

Parameters
-----------
bill : int
   Bill number to fetch
   
Returns
-------
A dict, parsed from the JSON

Examples
--------
>>> bill = get_senate_bill(10)
>>> bill['sponsor']
{u'district': None,
 u'name': u'Reid, Harry',
 u'state': u'NV',
 u'thomas_id': u'00952',
 u'title': u'Sen',
 u'type': u'person'}
>>> bill['short_title']
u'Agriculture Reform, Food, and Jobs Act of 2013'
"""
def get_senate_bill(bill):
    url = "https://www.govtrack.us/data/congress/113/bills/s/s{}/data.json".format(bill)
    page = requests.get(url).text
    try:
        data = json.loads(page)
        return data
    except ValueError:
        raise Exception("Not a valid bill number.")


"""
Function
--------
get_all_bills

Scrape all Senate bills at http://www.govtrack.us/data/congress/113/bills/s

Parameters
----------
None

Returns
-------
A list of dicts, one for each bill
"""
def get_all_bills():
    bill_num = 1
    bill_dicts = []
    while True:
        try:
            bill_dict = get_senate_bill(bill_num)
            bill_dicts.append(bill_dict)
            bill_num += 1
        except Exception:
            break
    return bill_dicts


bill_list = get_all_bills()

"""
Function
--------
bill_graph

Turn the bill graph data into a NetworkX Digraph

Parameters
----------
data : list of dicts
    The data returned from get_all_bills

Returns
-------
graph : A NetworkX DiGraph, with the following properties
    * Each node is a senator. For a label, use the 'name' field 
      from the 'sponsor' and 'cosponsors' dict items
    * Each edge from A to B is assigned a weight equal to how many 
      bills are sponsored by B and co-sponsored by A
"""
def bill_graph(data):
    digraph = nx.DiGraph()
    
    senators = set()
    all_sponsor_names = []
    for bill in data:
        sponsor_names = {}        
        sponsor_names['sponsor'] = bill['sponsor']['name']
        sponsor_names['cosponsors'] = [cosponsor['name'] for cosponsor in bill['cosponsors']]
        senators.add(sponsor_names['sponsor'])
        senators.update(sponsor_names['cosponsors'])
        all_sponsor_names.append(sponsor_names)
        
    # ordered pairs of senators; (A, B) where A cosponsors a bill sponsored by B
    all_senator_permutations = permutations(senators, 2)
    coincident_sponsorships = {}
    for permutation in all_senator_permutations:
        coincident_sponsorships[permutation] = 0
        
    for bill in all_sponsor_names:
        sponsor = bill['sponsor']
        cosponsors = bill['cosponsors']
        for cosponsor in cosponsors:
            coincident_sponsorships[cosponsor, sponsor] += 1
            
    for senator in senators:
        digraph.add_node(senator)
            
    for pair, count in coincident_sponsorships.iteritems():
        if count == 0:
            continue
        digraph.add_edge(pair[0], pair[1], weight=count)
    
    return digraph


bills = bill_graph(bill_list)

pageranks = nx.pagerank_numpy(bills)


# visualize senator PageRank scores
pagerank = zip(pageranks.keys(), pageranks.values())

get_score = operator.itemgetter(1)
pagerank = sorted(pagerank, key=get_score, reverse=True)

senators, scores = [], []
for senator, score in pagerank:
    senators.append(senator)
    scores.append(score)
    
scores = map(lambda x: round(x, 5), scores)

visualize("Senator PageRank Scores", 
          senators, 
          scores, 
          bar_color=dark2_colors[2], 
          subplot=121, 
          xmax=scores[0]+0.001
          )

# visualize degree of each senator node
degrees = []
for senator in senators:
    degrees.append(bills.degree(senator))
    
senator_degrees = zip(senators, degrees)

get_degree = operator.itemgetter(1)
senator_degrees = sorted(senator_degrees, key=get_degree, reverse=True)

senators, degrees = [], []
for senator, degree in senator_degrees:
    senators.append(senator)
    degrees.append(degree)

visualize("Senator Node Degrees", 
          senators, 
          degrees, 
          bar_color=dark2_colors[3], 
          subplot=122, 
          xmax=degrees[0] + 1
          )


# visualize correlation between PageRank score and degree
plt.figure(figsize=(13, 8))
plt.scatter(degrees, scores, color='g', alpha=0.7)

plt.title("Senator PageRank Score vs. Degree - 113th Congress")
plt.xlabel("Degree")
plt.ylabel("PageRank Score")

plt.xlim(xmin=0)
plt.ylim(ymin=0.0)

remove_border()
plt.tight_layout()


nx.write_gexf(votes, 'votes.gexf')

from IPython.display import Image

path = 'votes-modularity-pagerank.png'
Image(path)