!pip install oauth2 !pip install unidecode %matplotlib inline from collections import defaultdict import json import numpy as np import scipy as sp import matplotlib.pyplot as plt import pandas as pd from matplotlib import rcParams import matplotlib.cm as cm import matplotlib as mpl #colorbrewer2 Dark2 qualitative color table dark2_colors = [(0.10588235294117647, 0.6196078431372549, 0.4666666666666667), (0.8509803921568627, 0.37254901960784315, 0.00784313725490196), (0.4588235294117647, 0.4392156862745098, 0.7019607843137254), (0.9058823529411765, 0.1607843137254902, 0.5411764705882353), (0.4, 0.6509803921568628, 0.11764705882352941), (0.9019607843137255, 0.6705882352941176, 0.00784313725490196), (0.6509803921568628, 0.4627450980392157, 0.11372549019607843)] rcParams['figure.figsize'] = (10, 6) rcParams['figure.dpi'] = 150 rcParams['axes.color_cycle'] = dark2_colors rcParams['lines.linewidth'] = 2 rcParams['axes.facecolor'] = 'white' rcParams['font.size'] = 14 rcParams['patch.edgecolor'] = 'white' rcParams['patch.facecolor'] = dark2_colors[0] rcParams['font.family'] = 'StixGeneral' def remove_border(axes=None, top=False, right=False, left=True, bottom=True): """ Minimize chartjunk by stripping out unnecesasry plot borders and axis ticks The top/right/left/bottom keywords toggle whether the corresponding plot border is drawn """ ax = axes or plt.gca() ax.spines['top'].set_visible(top) ax.spines['right'].set_visible(right) ax.spines['left'].set_visible(left) ax.spines['bottom'].set_visible(bottom) #turn off all ticks ax.yaxis.set_ticks_position('none') ax.xaxis.set_ticks_position('none') #now re-enable visibles if top: ax.xaxis.tick_top() if bottom: ax.xaxis.tick_bottom() if left: ax.yaxis.tick_left() if right: ax.yaxis.tick_right() pd.set_option('display.width', 500) pd.set_option('display.max_columns', 100) #Johanna #user_token = '6a516d33-786e-443c-b6e9-def654f88098' #user_secret = 'c03c49da-9dae-4b05-a2af-82e40426439f' #api_key = 'xpsswsigqw4r' #secret_key = 'aIRpJHhA8JHTRsyb' #Alex #api_key = 'g8lq60ilatfh' #secret_key = 'XEOmeklHWHtmwgoQ' #user_token = 'a8991ba6-9a27-40d7-ac6f-9280cc1dc650' #user_secret = '43a11017-c1f3-4c30-afab-43df3c39b938' #Nicolas user_token = 'd41f3e0c-6bb9-4db8-b324-25a723ff2f50' user_secret = 'fc66e892-6f92-4e15-b9a9-b0cccbec5336' api_key = 'kg7oy496e09a' secret_key = 'oLCLRNxVjt8ZY6OE' import oauth2 as oauth import urlparse def request_token(consumer): client = oauth.Client(consumer) request_token_url = 'https://api.linkedin.com/uas/oauth/requestToken?scope=r_network' resp, content = client.request(request_token_url, "POST") if resp['status'] != '200': raise Exception("Invalid response %s." % resp['status']) request_token = dict(urlparse.parse_qsl(content)) return request_token #consumer = oauth.Consumer(api_key, secret_key) #r_token = request_token(consumer) #print "Request Token: oauth_token: %s, oauth_token_secret: %s" % (r_token['oauth_token'], r_token['oauth_token_secret']) def authorize(request_token): authorize_url ='https://api.linkedin.com/uas/oauth/authorize' print "Go to the following link in your browser:" print "%s?oauth_token=%s" % (authorize_url, request_token['oauth_token']) print accepted = 'n' while accepted.lower() == 'n': accepted = raw_input('Have you authorized me? (y/n) ') oauth_verifier = raw_input('What is the PIN? ') return oauth_verifier #oauth_verifier = authorize(r_token) def access(consumer, request_token, oauth_verifier): access_token_url = 'https://api.linkedin.com/uas/oauth/accessToken' token = oauth.Token(request_token['oauth_token'], request_token['oauth_token_secret']) token.set_verifier(oauth_verifier) client = oauth.Client(consumer, token) resp, content = client.request(access_token_url, "POST") access_token = dict(urlparse.parse_qsl(content)) return access_token #a_token = access(consumer, r_token, oauth_verifier) #print a_token #print "Access Token: oauth_token = %s, oauth_token_secret = %s" % (a_token['oauth_token'], a_token['oauth_token_secret']) #print "You may now access protected resources using the access tokens above." consumer = oauth.Consumer(api_key, secret_key) r_token = request_token(consumer) print "Request Token: oauth_token: %s, oauth_token_secret: %s" % (r_token['oauth_token'], r_token['oauth_token_secret']) oauth_verifier = authorize(r_token) a_token = access(consumer, r_token, oauth_verifier) print a_token print "Access Token: oauth_token = %s, oauth_token_secret = %s" % (a_token['oauth_token'], a_token['oauth_token_secret']) print "You may now access protected resources using the access tokens above." import simplejson import codecs output_file = 'linkedIn_links.csv' my_name = 'Your Name' def linkedin_connections(): # Use your credentials to build the oauth client consumer = oauth.Consumer(key=api_key, secret=secret_key) token = oauth.Token(key=a_token['oauth_token'], secret=a_token['oauth_token_secret']) client = oauth.Client(consumer, token) # Fetch first degree connections resp, content = client.request('http://api.linkedin.com/v1/people/~/connections?format=json') results = simplejson.loads(content) # File that will store the results output = codecs.open(output_file, 'w', 'utf-8') # Loop through the 1st degree connection and see how they connect to each other for result in results["values"]: con = "%s %s" % (result["firstName"].replace(",", " "), result["lastName"].replace(",", " ")) print >>output, "%s,%s" % (my_name, con) # This is the trick, use the search API to get related connections u = "https://api.linkedin.com/v1/people/%s:(relation-to-viewer:(related-connections))?format=json" % result["id"] resp, content = client.request(u) rels = simplejson.loads(content) try: for rel in rels['relationToViewer']['relatedConnections']['values']: sec = "%s %s" % (rel["firstName"].replace(",", " "), rel["lastName"].replace(",", " ")) print >>output, "%s,%s" % (con, sec) except: pass linkedin_connections() from operator import itemgetter from unidecode import unidecode clean_output_file = 'linkedIn_links_clean.csv' def stringify(chain): # Simple utility to build the nodes labels allowed = '0123456789abcdefghijklmnopqrstuvwxyz_' c = unidecode(chain.strip().lower().replace(' ', '_')) return ''.join([letter for letter in c if letter in allowed]) def clean(f_input, f_output): output = open(f_output, 'w') # Store the edges inside a set for dedup edges = set() for line in codecs.open(f_input, 'r', 'utf-8'): from_person, to_person = line.strip().split(',') _f = stringify(from_person) _t = stringify(to_person) # Reorder the edge tuple _e = tuple(sorted((_f, _t), key=itemgetter(0, 1))) edges.add(_e) for edge in edges: print >>output, '%s,%s' % (edge[0], edge[1]) clean(output_file, clean_output_file) import csv from collections import defaultdict pairlist=[] connections=defaultdict(list) userset=set() with open('linkedIn_links_clean.csv', 'rb') as csvfile: allrows = csv.reader(csvfile, delimiter=',') for row in allrows: # if ((row[0]=='your_name') | (row[1]=='your_name')): continue # exclude yourself ? pairlist.append((row[0], row[1])) connections[row[0]].append(row[1]) connections[row[1]].append(row[0]) userset.add(row[0]) userset.add(row[1]) ## Actual algorithm starts here ## display the pagerank import networkx as nx import matplotlib.pyplot as plt import matplotlib import math g = nx.Graph() remove_me = False for user in userset: if remove_me & (user=='your_name'): continue g.add_node(user) for user in userset: if remove_me & (user=='your_name'): continue nconnec = 0 for connection in connections[user]: if remove_me & (connection=='your_name'): continue g.add_edge(user, connection, weight = 1) nconnec+=1 if remove_me & (nconnec==0): g.remove_node(user) pagerank_nx = nx.pagerank_scipy(g) color = [(min(pagerank_nx[n]*30.,1),min(pagerank_nx[n]*30.,1), min(pagerank_nx[n]*30.,1)) for n in pagerank_nx] pos = nx.spring_layout(g, iterations=100) nx.draw_networkx_edges(g, pos, width=1, alpha=0.4) nx.draw_networkx_nodes(g, pos, node_color=color, node_size=100, alpha=1, linewidths =0.5) #lbls = nx.draw_networkx_labels(g, pos) plt.show() # checks whether we have the same, or similar, pageranks sorted_pr = sorted(pagerank_nx.iteritems(), reverse=True, key=lambda (k,v): v) print sorted_pr[:10] # your number of connection print 'my degree is: ', g.degree('your_name'), '\n' # diameter = maximum nb of edges between 2 nodes = always 2 in this case print 'the graph diameter is: ',nx.diameter(g), '\n' #center : surprising ? print 'the center is: ',nx.center(g), '\n' # number of clique communities of 5 nodes print 'there are ', len(list(nx.k_clique_communities(g, 5))),'clique communities\n' # most influential ? print 'degree: ', g.degree(sorted_pr[2]),'\n' print 'shortest path between Hanspeter and a friend', nx.shortest_path(g,source='hanspeter_pfister',target='etienne_corteel'),'\n'