import json import urllib2 import urllib from IPython.display import display request_str = 'https://newsreader.scraperwiki.com/types_of_actors?filter=scientist&output=json' res = json.load(urllib2.urlopen(request_str)) print res #request_str = "https://newsreader.scraperwiki.com/summary_of_events_with_actor_type?datefilter=2010-01&uris.0=dbo:University&output=json" #request_str = "https://newsreader.scraperwiki.com/summary_of_events_with_event_label?filter=acquisition&datefilter=2010&output=json" request_str = "https://newsreader.scraperwiki.com/actors_of_a_type?uris.0=dbo:University&filter=university" res = json.load(urllib2.urlopen(request_str+"&output=json")) print res import pandas as pd import numpy as np import matplotlib.pyplot as plt uniToCount = {"uni": [], "count": []} payload = res['payload'] print uniToCount for p in payload: actor = p['actor'][28:] count = p['count'] uniToCount["uni"].append(actor) uniToCount["count"].append(int(count)) df = pd.DataFrame(uniToCount) print df df.plot(kind='barh',x='uni', title="Number of Events in the News") from collections import defaultdict #str = 'https://newsreader.scraperwiki.com/event_details_filtered_by_actor?uris.0=' + actor + '&output=json' actorToLabels = {} for p in payload: print "start" labels = defaultdict() actor = p['actor'] print actor url = 'https://newsreader.scraperwiki.com/summary_of_events_with_actor?uris.0=' + actor + '&output=json' while url: res = json.load(urllib2.urlopen(url)) #print res if 'next page' in res: url = res['next page'] else: break for r in res['payload']: if r['event_label'] in labels: labels[r['event_label']] += 1 else: labels[r['event_label']] = 1 print url actorToLabels[actor] = labels print actorToLabels for ab in actorToLabels: columns = actorToLabels[ab].keys() row = [] names = [] for x in columns: row.append(actorToLabels[ab][x]) d = {ab[28:] : row} df2 = pd.DataFrame(d, index=columns) s = df2.sort(ab[28:], ascending=False) df2.plot(kind='area') #print s[ab[28:]][:5]