%matplotlib inline import json import numpy as np import networkx as nx import requests from pattern import web import matplotlib.pyplot as plt # set some nicer defaults for matplotlib from matplotlib import rcParams #these colors come from colorbrewer2.org. Each is an RGB triplet dark2_colors = [(0.10588235294117647, 0.6196078431372549, 0.4666666666666667), (0.8509803921568627, 0.37254901960784315, 0.00784313725490196), (0.4588235294117647, 0.4392156862745098, 0.7019607843137254), (0.9058823529411765, 0.1607843137254902, 0.5411764705882353), (0.4, 0.6509803921568628, 0.11764705882352941), (0.9019607843137255, 0.6705882352941176, 0.00784313725490196), (0.6509803921568628, 0.4627450980392157, 0.11372549019607843), (0.4, 0.4, 0.4)] rcParams['figure.figsize'] = (10, 6) rcParams['figure.dpi'] = 150 rcParams['axes.color_cycle'] = dark2_colors rcParams['lines.linewidth'] = 2 rcParams['axes.grid'] = False rcParams['axes.facecolor'] = 'white' rcParams['font.size'] = 14 rcParams['patch.edgecolor'] = 'none' def remove_border(axes=None, top=False, right=False, left=True, bottom=True): """ Minimize chartjunk by stripping out unnecessary plot borders and axis ticks The top/right/left/bottom keywords toggle whether the corresponding plot border is drawn """ ax = axes or plt.gca() ax.spines['top'].set_visible(top) ax.spines['right'].set_visible(right) ax.spines['left'].set_visible(left) ax.spines['bottom'].set_visible(bottom) #turn off all ticks ax.yaxis.set_ticks_position('none') ax.xaxis.set_ticks_position('none') #now re-enable visibles if top: ax.xaxis.tick_top() if bottom: ax.xaxis.tick_bottom() if left: ax.yaxis.tick_left() if right: ax.yaxis.tick_right() """ Function -------- get_senate_vote Scrapes a single JSON page for a particular Senate vote, given by the vote number Parameters ---------- vote : int The vote number to fetch Returns ------- vote : dict The JSON-decoded dictionary for that vote Examples -------- >>> get_senate_vote(11)['bill'] {u'congress': 113, u'number': 325, u'title': u'A bill to ensure the complete and timely payment of the obligations of the United States Government until May 19, 2013, and for other purposes.', u'type': u'hr'} """ #your code here def get_senate_vote(vote): url = 'http://www.govtrack.us/data/congress/113/votes/2013/s%i/data.json' % vote page = requests.get(url).text return json.loads(page) """ Function -------- get_all_votes Scrapes all the Senate votes from http://www.govtrack.us/data/congress/113/votes/2013, and returns a list of dicts Parameters ----------- None Returns -------- votes : list of dicts List of JSON-parsed dicts for each senate vote """ #Your code here def get_all_votes(): page = requests.get('https://www.govtrack.us/data/congress/113/votes/2013/').text dom = web.Element(page) votes = [a.attr['href'] for a in dom.by_tag('a') if a.attr.get('href', '').startswith('s')] n_votes = len(votes) return [get_senate_vote(i) for i in range(1, n_votes + 1)] #vote_data = get_all_votes() vote_data = json.load(open('vote_data.json')) """ Function -------- vote_graph Parameters ---------- data : list of dicts The vote database returned from get_vote_data Returns ------- graph : NetworkX Graph object, with the following properties 1. Each node in the graph is labeled using the `display_name` of a Senator (e.g., 'Lee (R-UT)') 2. Each node has a `color` attribute set to 'r' for Republicans, 'b' for Democrats, and 'k' for Independent/other parties. 3. The edges between two nodes are weighted by the number of times two senators have cast the same Yea or Nay vote 4. Each edge also has a `difference` attribute, which is set to `1 / weight`. Examples -------- >>> graph = vote_graph(vote_data) >>> graph.node['Lee (R-UT)'] {'color': 'r'} # attributes for this senator >>> len(graph['Lee (R-UT)']) # connections to other senators 101 >>> graph['Lee (R-UT)']['Baldwin (D-WI)'] # edge relationship between Lee and Baldwin {'difference': 0.02, 'weight': 50} """ #Your code here def _color(s): if '(R' in s: return 'r' if '(D' in s: return 'b' return 'k' def vote_graph(data): senators = set(x['display_name'] for d in data for vote_grp in d['votes'].values() for x in vote_grp) weights = {s: {ss: 0 for ss in senators if ss != s} for s in senators} for d in data: for grp in ['Yea', 'Nay']: if grp not in d['votes']: continue vote_grp = d['votes'][grp] for i in range(len(vote_grp)): for j in range(i + 1, len(vote_grp)): sen1 = vote_grp[i]['display_name'] sen2 = vote_grp[j]['display_name'] weights[min(sen1, sen2)][max(sen1, sen2)] += 1 g = nx.Graph() for s in senators: g.add_node(s) g.node[s]['color'] = _color(s) for s1, neighbors in weights.items(): for s2, weight in neighbors.items(): if weight == 0: continue g.add_edge(s1, s2, weight= weight, difference = 1. / weight) return g votes = vote_graph(vote_data) #this makes sure draw_spring results are the same at each call np.random.seed(1) color = [votes.node[senator]['color'] for senator in votes.nodes()] #determine position of each node using a spring layout pos = nx.spring_layout(votes, iterations=200) #plot the edges nx.draw_networkx_edges(votes, pos, alpha = .05) #plot the nodes nx.draw_networkx_nodes(votes, pos, node_color=color) #draw the labels lbls = nx.draw_networkx_labels(votes, pos, alpha=5, font_size=8) #coordinate information is meaningless here, so let's remove it plt.xticks([]) plt.yticks([]) remove_border(left=False, bottom=False) #Your code here plt.figure(figsize=(15, 10)) np.random.seed(5) mst = nx.minimum_spanning_tree(votes, weight='difference') pos = nx.spring_layout(mst, iterations=900, k=.008, weight='difference') mst_edges = list(nx.minimum_spanning_edges(votes, weight='difference')) nl = votes.nodes() c = [votes.node[n]['color'] for n in nl] nx.draw_networkx_edges(votes, pos, edgelist=mst_edges, alpha=.2) nx.draw_networkx_nodes(votes, pos, nodelist = nl, node_color = c, node_size=60) for p in pos.values(): p[1] += .02 nx.draw_networkx_labels(votes, pos, font_color='k', font_size=7) plt.title("MST of Vote Disagreement", fontsize=18) plt.xticks([]) plt.yticks([]) remove_border(left=False, bottom=False) #Your code here bet = nx.closeness_centrality(votes, distance='difference') bipartisans = sorted(bet, key=lambda x: -bet[x]) print "Highest closeness" for senator in bipartisans[:5]: print "%20.20s\t%0.3f" % (senator, bet[senator]) print print "Lowest closeness" for senator in bipartisans[-5:]: print "%20.20s\t%0.3f" % (senator, bet[senator]) plt.figure(figsize=(15, 4)) x = np.arange(len(nl)) y = np.array([bet[n] for n in nl]) c = np.array([votes.node[n]['color'] for n in nl]) ind = np.argsort(y) y = y[ind] c = c[ind] plt.bar(x, y, color=c, align='center', width=.8) remove_border(left=None, bottom=None) ticks = plt.xticks(x, [nl[i] for i in x[ind]], rotation='vertical', fontsize=7) limits = plt.xlim(-1, x[-1] + 1) #your code here """ Here, we compute the mean weight for the edges that connect a Senator to a node in the other party (we consider Independents to be Democrats for this analysis). This only considers how similarly a Senator votes with the other party. The scatter plot shows that the betweenness centrality and bipartisan score correlate with each other. However, the betweenness centrality judges Democrats to be more bipartisan as a whole. Part of this is a bias due to the fact that Democrats are the majority party in the Senate right now, so their votes are considered more "central" due to their bigger numbers. """ def bipartisan_score(graph, node): party = graph.node[node]['color'] other = 'r' if party != 'r' else 'b' return np.mean([v['weight'] for k, v in graph[node].items() if graph.node[k]['color'] == other]) bp_score = {node: bipartisan_score(votes, node) for node in votes.nodes()} bp2 = sorted(bp_score, key=lambda x: -1 * bp_score[x]) print "Most Bipartisan" for senator in bp2[:5]: print "%20.20s\t%0.3f" % (senator, bp_score[senator]) print print "Least Bipartisan" for senator in bp2[-5:]: print "%20.20s\t%0.3f" % (senator, bp_score[senator]) senators = bp_score.keys() x = [bet[s] for s in senators] y = [bp_score[s] for s in senators] c = [votes.node[s]['color'] for s in senators] plt.scatter(x, y, 80, color=c, alpha=.5, edgecolor='white') plt.xlabel("Betweenness Centrality") plt.ylabel("Bipartisan Score") remove_border() """ Function -------- get_senate_bill Scrape the bill data from a single JSON page, given the bill number Parameters ----------- bill : int Bill number to fetch Returns ------- A dict, parsed from the JSON Examples -------- >>> bill = get_senate_bill(10) >>> bill['sponsor'] {u'district': None, u'name': u'Reid, Harry', u'state': u'NV', u'thomas_id': u'00952', u'title': u'Sen', u'type': u'person'} >>> bill['short_title'] u'Agriculture Reform, Food, and Jobs Act of 2013' """ #your code here def get_senate_bill(bill): url = 'http://www.govtrack.us/data/congress/113/bills/s/s%i/data.json' % bill page = requests.get(url).text return json.loads(page) """ Function -------- get_all_bills Scrape all Senate bills at http://www.govtrack.us/data/congress/113/bills/s Parameters ---------- None Returns ------- A list of dicts, one for each bill """ #your code here def get_all_bills(): page = requests.get('http://www.govtrack.us/data/congress/113/bills/s/').text dom = web.Element(page) links = [a.attr['href'] for a in dom.by_tag('a') if a.attr.get('href', '').startswith('s')] return [get_senate_bill(i) for i in range(1, len(links) + 1)] #bill_list = get_all_bills() bill_list = json.load(open('bill_list.json')) """ Function -------- bill_graph Turn the bill graph data into a NetworkX Digraph Parameters ---------- data : list of dicts The data returned from get_all_bills Returns ------- graph : A NetworkX DiGraph, with the following properties * Each node is a senator. For a label, use the 'name' field from the 'sponsor' and 'cosponsors' dict items * Each edge from A to B is assigned a weight equal to how many bills are sponsored by B and co-sponsored by A """ #Your code here def bill_graph(data): sp = nx.DiGraph() for bill in data: sponsor = bill['sponsor']['name'] sponsor_data = bill['sponsor'] cosponsors = [cs['name'] for cs in bill['cosponsors']] if sponsor not in sp: sp.add_node(sponsor, **sponsor_data) for cosponsor in bill['cosponsors']: if cosponsor['name'] not in sp: sp.add_node(cosponsor['name'], **cosponsor) cosponsor = cosponsor['name'] try: w = sp[cosponsor][sponsor]['weight'] + 1 except KeyError: w = + 1 sp.add_edge(cosponsor, sponsor, weight=w) return sp bills = bill_graph(bill_list) #Your code here pagerank = nx.pagerank_numpy(bills) names = np.array(pagerank.keys()) vals = np.array([pagerank[n] for n in names]) ind = np.argsort(vals) names = names[ind] vals = vals[ind] print "Highest Scores" for n, v in zip(names, vals)[-5:][::-1]: print "%20.20s\t%0.3f" % (n, v) print print "Lowest Scores" for n, v in zip(names, vals)[:5]: print "%20.20s\t%0.3f" % (n, v) #Your code here deg = nx.degree(bills) plt.scatter([deg[n] for n in bills.nodes()], [pagerank[n] for n in bills.nodes()], 80, alpha=.8, color='k', edgecolor='white') labels = ['Reid, Harry', 'Lautenberg, Frank R.', 'Menendez, Robert', 'Harkin, Tom'] for lbl in labels: plt.annotate(lbl, (deg[lbl], pagerank[lbl] + .002), fontsize=10, rotation=10) plt.xlabel("Degree") plt.ylabel("PageRank") remove_border() nx.write_gexf(votes, 'votes.gexf')