%matplotlib inline

import json

import numpy as np
import networkx as nx
import requests
from pattern import web
import matplotlib.pyplot as plt

# set some nicer defaults for matplotlib
from matplotlib import rcParams

#these colors come from colorbrewer2.org. Each is an RGB triplet
dark2_colors = [(0.10588235294117647, 0.6196078431372549, 0.4666666666666667),
                (0.8509803921568627, 0.37254901960784315, 0.00784313725490196),
                (0.4588235294117647, 0.4392156862745098, 0.7019607843137254),
                (0.9058823529411765, 0.1607843137254902, 0.5411764705882353),
                (0.4, 0.6509803921568628, 0.11764705882352941),
                (0.9019607843137255, 0.6705882352941176, 0.00784313725490196),
                (0.6509803921568628, 0.4627450980392157, 0.11372549019607843),
                (0.4, 0.4, 0.4)]

rcParams['figure.figsize'] = (10, 6)
rcParams['figure.dpi'] = 150
rcParams['axes.color_cycle'] = dark2_colors
rcParams['lines.linewidth'] = 2
rcParams['axes.grid'] = False
rcParams['axes.facecolor'] = 'white'
rcParams['font.size'] = 14
rcParams['patch.edgecolor'] = 'none'

def remove_border(axes=None, top=False, right=False, left=True, bottom=True):
    """
    Minimize chartjunk by stripping out unnecessary plot borders and axis ticks
    
    The top/right/left/bottom keywords toggle whether the corresponding plot border is drawn
    """
    ax = axes or plt.gca()
    ax.spines['top'].set_visible(top)
    ax.spines['right'].set_visible(right)
    ax.spines['left'].set_visible(left)
    ax.spines['bottom'].set_visible(bottom)
    
    #turn off all ticks
    ax.yaxis.set_ticks_position('none')
    ax.xaxis.set_ticks_position('none')
    
    #now re-enable visibles
    if top:
        ax.xaxis.tick_top()
    if bottom:
        ax.xaxis.tick_bottom()
    if left:
        ax.yaxis.tick_left()
    if right:
        ax.yaxis.tick_right()

"""
Function
--------
get_senate_vote

Scrapes a single JSON page for a particular Senate vote, given by the vote number

Parameters
----------
vote : int
   The vote number to fetch
   
Returns
-------
vote : dict
   The JSON-decoded dictionary for that vote
   
Examples
--------
>>> get_senate_vote(11)['bill']
{u'congress': 113,
 u'number': 325,
 u'title': u'A bill to ensure the complete and timely payment of the obligations of the United States Government until May 19, 2013, and for other purposes.',
 u'type': u'hr'}
"""
#your code here

def get_senate_vote(vote):
    url = 'http://www.govtrack.us/data/congress/113/votes/2013/s%i/data.json' % vote
    page = requests.get(url).text
    return json.loads(page)    

"""
Function
--------
get_all_votes

Scrapes all the Senate votes from http://www.govtrack.us/data/congress/113/votes/2013,
and returns a list of dicts

Parameters
-----------
None

Returns
--------
votes : list of dicts
    List of JSON-parsed dicts for each senate vote
"""
#Your code here

def get_all_votes():
    page = requests.get('https://www.govtrack.us/data/congress/113/votes/2013/').text
    dom = web.Element(page)
    votes = [a.attr['href'] for a in dom.by_tag('a') 
             if a.attr.get('href', '').startswith('s')]
    n_votes = len(votes)
    return [get_senate_vote(i) for i in range(1, n_votes + 1)]

#vote_data = get_all_votes()
vote_data = json.load(open('vote_data.json'))

"""
Function
--------
vote_graph

Parameters
----------
data : list of dicts
    The vote database returned from get_vote_data

Returns
-------
graph : NetworkX Graph object, with the following properties
    1. Each node in the graph is labeled using the `display_name` of a Senator (e.g., 'Lee (R-UT)')
    2. Each node has a `color` attribute set to 'r' for Republicans, 
       'b' for Democrats, and 'k' for Independent/other parties.
    3. The edges between two nodes are weighted by the number of 
       times two senators have cast the same Yea or Nay vote
    4. Each edge also has a `difference` attribute, which is set to `1 / weight`.

Examples
--------
>>> graph = vote_graph(vote_data)
>>> graph.node['Lee (R-UT)']
{'color': 'r'}  # attributes for this senator
>>> len(graph['Lee (R-UT)']) # connections to other senators
101
>>> graph['Lee (R-UT)']['Baldwin (D-WI)']  # edge relationship between Lee and Baldwin
{'difference': 0.02, 'weight': 50}
"""
#Your code here

def _color(s):
    if '(R' in s:
        return 'r'
    if '(D' in s:
        return 'b'
    return 'k'
    
def vote_graph(data):
    
    senators = set(x['display_name'] for d in data for vote_grp in d['votes'].values() for x in vote_grp)
    weights = {s: {ss: 0 for ss in senators if ss != s} for s in senators}
    
    for d in data:
        for grp in ['Yea', 'Nay']:
            if grp not in d['votes']:
                continue
            vote_grp = d['votes'][grp]
            for i in range(len(vote_grp)):
                for j in range(i + 1, len(vote_grp)):
                    sen1 = vote_grp[i]['display_name']
                    sen2 = vote_grp[j]['display_name']                
                    weights[min(sen1, sen2)][max(sen1, sen2)] += 1
                    
    g = nx.Graph()
    for s in senators:
        g.add_node(s)
        g.node[s]['color'] = _color(s)
    
    for s1, neighbors in weights.items():
        for s2, weight in neighbors.items():
            if weight == 0:
                continue
            g.add_edge(s1, s2, weight= weight, difference = 1. / weight)
            
    return g


votes = vote_graph(vote_data)    

#this makes sure draw_spring results are the same at each call
np.random.seed(1)  

color = [votes.node[senator]['color'] for senator in votes.nodes()]

#determine position of each node using a spring layout
pos = nx.spring_layout(votes, iterations=200)

#plot the edges
nx.draw_networkx_edges(votes, pos, alpha = .05)

#plot the nodes
nx.draw_networkx_nodes(votes, pos, node_color=color)

#draw the labels
lbls = nx.draw_networkx_labels(votes, pos, alpha=5, font_size=8)

#coordinate information is meaningless here, so let's remove it
plt.xticks([])
plt.yticks([])
remove_border(left=False, bottom=False)

#Your code here
plt.figure(figsize=(15, 10))
np.random.seed(5)
mst = nx.minimum_spanning_tree(votes, weight='difference')
pos = nx.spring_layout(mst, iterations=900, k=.008, weight='difference')

mst_edges = list(nx.minimum_spanning_edges(votes, weight='difference'))

nl = votes.nodes()
c = [votes.node[n]['color'] for n in nl]
nx.draw_networkx_edges(votes, pos, edgelist=mst_edges, alpha=.2)
nx.draw_networkx_nodes(votes, pos, nodelist = nl, node_color = c, node_size=60)

for p in pos.values():
    p[1] += .02
    
nx.draw_networkx_labels(votes, pos, font_color='k', font_size=7)

plt.title("MST of Vote Disagreement", fontsize=18)
plt.xticks([])
plt.yticks([])
remove_border(left=False, bottom=False)

#Your code here

bet = nx.closeness_centrality(votes, distance='difference')
bipartisans = sorted(bet, key=lambda x: -bet[x])

print "Highest closeness"
for senator in bipartisans[:5]:
    print "%20.20s\t%0.3f" % (senator, bet[senator])
print
print "Lowest closeness"
for senator in bipartisans[-5:]:
    print "%20.20s\t%0.3f" % (senator, bet[senator])
    

plt.figure(figsize=(15, 4))
x = np.arange(len(nl))
y = np.array([bet[n] for n in nl])
c = np.array([votes.node[n]['color'] for n in nl])

ind = np.argsort(y)
y = y[ind]
c = c[ind]

plt.bar(x, y, color=c, align='center', width=.8)

remove_border(left=None, bottom=None)
ticks = plt.xticks(x, [nl[i] for i in x[ind]], 
                   rotation='vertical', fontsize=7)
limits = plt.xlim(-1, x[-1] + 1)

#your code here

"""
Here, we compute the mean weight for the edges that connect a Senator
to a node in the other party (we consider Independents to be Democrats
for this analysis).

This only considers how similarly a Senator votes with the other party.

The scatter plot shows that the betweenness centrality and bipartisan score
correlate with each other. However, the betweenness centrality judges Democrats
to be more bipartisan as a whole. Part of this is a bias due to the fact
that Democrats are the majority party in the Senate right now, so their
votes are considered more "central" due to their bigger numbers.
"""
def bipartisan_score(graph, node):
    party = graph.node[node]['color']
    other = 'r' if party != 'r' else 'b'
    return np.mean([v['weight'] for k, v in graph[node].items() if graph.node[k]['color'] == other])

bp_score = {node: bipartisan_score(votes, node) for node in votes.nodes()}
bp2 = sorted(bp_score, key=lambda x: -1 * bp_score[x])

print "Most Bipartisan"
for senator in bp2[:5]:
    print "%20.20s\t%0.3f" % (senator, bp_score[senator])

print
print "Least Bipartisan"
for senator in bp2[-5:]:
    print "%20.20s\t%0.3f" % (senator, bp_score[senator])

    
senators = bp_score.keys()
x = [bet[s] for s in senators]
y = [bp_score[s] for s in senators]
c = [votes.node[s]['color'] for s in senators]

plt.scatter(x, y, 80, color=c, 
            alpha=.5, edgecolor='white')
plt.xlabel("Betweenness Centrality")
plt.ylabel("Bipartisan Score")
remove_border()

"""
Function
--------
get_senate_bill

Scrape the bill data from a single JSON page, given the bill number

Parameters
-----------
bill : int
   Bill number to fetch
   
Returns
-------
A dict, parsed from the JSON

Examples
--------
>>> bill = get_senate_bill(10)
>>> bill['sponsor']
{u'district': None,
 u'name': u'Reid, Harry',
 u'state': u'NV',
 u'thomas_id': u'00952',
 u'title': u'Sen',
 u'type': u'person'}
>>> bill['short_title']
u'Agriculture Reform, Food, and Jobs Act of 2013'
"""
#your code here
def get_senate_bill(bill):
    url = 'http://www.govtrack.us/data/congress/113/bills/s/s%i/data.json' % bill
    page = requests.get(url).text
    return json.loads(page)

"""
Function
--------
get_all_bills

Scrape all Senate bills at http://www.govtrack.us/data/congress/113/bills/s

Parameters
----------
None

Returns
-------
A list of dicts, one for each bill
"""
#your code here
def get_all_bills():
    page = requests.get('http://www.govtrack.us/data/congress/113/bills/s/').text
    dom = web.Element(page)
    links = [a.attr['href'] for a in dom.by_tag('a') 
             if a.attr.get('href', '').startswith('s')]
    return [get_senate_bill(i) for i in range(1, len(links) + 1)]

#bill_list = get_all_bills()
bill_list = json.load(open('bill_list.json'))

"""
Function
--------
bill_graph

Turn the bill graph data into a NetworkX Digraph

Parameters
----------
data : list of dicts
    The data returned from get_all_bills

Returns
-------
graph : A NetworkX DiGraph, with the following properties
    * Each node is a senator. For a label, use the 'name' field 
      from the 'sponsor' and 'cosponsors' dict items
    * Each edge from A to B is assigned a weight equal to how many 
      bills are sponsored by B and co-sponsored by A
"""
#Your code here

def bill_graph(data):
    
    sp = nx.DiGraph()

    for bill in data:
        sponsor = bill['sponsor']['name']
        sponsor_data = bill['sponsor']
    
        cosponsors = [cs['name'] for cs in bill['cosponsors']]
    
        if sponsor not in sp:
            sp.add_node(sponsor, **sponsor_data)
        
        for cosponsor in bill['cosponsors']:
            if cosponsor['name'] not in sp:
                sp.add_node(cosponsor['name'], **cosponsor)            
            cosponsor = cosponsor['name']
        
            try:
                w = sp[cosponsor][sponsor]['weight'] + 1
            except KeyError:
                w = + 1
            sp.add_edge(cosponsor, sponsor, weight=w)    

    return sp

bills = bill_graph(bill_list)

#Your code here

pagerank = nx.pagerank_numpy(bills)
names = np.array(pagerank.keys())
vals = np.array([pagerank[n] for n in names])

ind = np.argsort(vals)
names = names[ind]
vals = vals[ind]

print "Highest Scores"
for n, v in zip(names, vals)[-5:][::-1]:
    print "%20.20s\t%0.3f" % (n, v)

print
print "Lowest Scores" 
for n, v in zip(names, vals)[:5]:
    print "%20.20s\t%0.3f" % (n, v)

#Your code here

deg = nx.degree(bills)

plt.scatter([deg[n] for n in bills.nodes()],
            [pagerank[n] for n in bills.nodes()], 80, alpha=.8, 
            color='k', edgecolor='white')

labels = ['Reid, Harry', 'Lautenberg, Frank R.', 'Menendez, Robert', 'Harkin, Tom']
for lbl in labels:
    plt.annotate(lbl, (deg[lbl], pagerank[lbl] + .002), fontsize=10, rotation=10)
    
plt.xlabel("Degree")
plt.ylabel("PageRank")
remove_border()

nx.write_gexf(votes, 'votes.gexf')