import md5 import os import time import urlparse import bokeh.charts as bc import matplotlib.pyplot as plt import pandas as pd import requests import qgrid import seaborn as sns %matplotlib inline qgrid.nbinstall() # Quick and Dirty API wrapper class Marvelous(object): base_url = "http://gateway.marvel.com" def __init__(self, private_key=None, public_key=None): """A microwrapper for the Marvel API""" if private_key and public_key: self.private_key = private self.public_key = public_key else: self.private_key = os.getenv('MARVEL_PRIVATE_KEY') self.public_key = os.getenv('MARVEL_PUBLIC_KEY') def request(self, endpoint, **kwargs): """Make a request against the Marvel API""" ts = str(int(time.time())) concat = ''.join([ts, self.private_key, self.public_key]) query_params = { 'ts': ts, 'apikey': self.public_key, 'hash': md5.new(concat).hexdigest()} query_params.update(kwargs) url = urlparse.urljoin(self.base_url, endpoint) resp = requests.get(url, params=query_params) if resp.status_code != 200: resp.raise_for_status() else: return resp.json() def get_character(self, character_name): """Get the data for a given character name""" resp = self.request('/v1/public/characters', name=character_name) result = resp['data']['results'][0] return { 'name': result['name'], 'description': result['description'], 'id': result['id'], 'comic_count': result['comics']['available'], 'story_count': result['stories']['available'], 'series_count': result['series']['available'], 'event_count': result['events']['available'] } api = Marvelous() api.get_character('Captain America') cap_comics = api.request('/v1/public/characters/1009220/comics', dateRange='2010-01-01,2014-11-01', limit=100) # Let's get all of the characters in this list of comics comic_list = cap_comics['data']['results'] character_set = {c['name'] for comic in comic_list for c in comic['characters']['items']} # Let's only get comic ids for comics that have associated characters comic_ids = [str(com['id']) for com in comic_list if com['characters']['available'] > 10] # Marvel's API will only accept up to 10 comic ids joined_ids = ','.join(comic_ids[:5]) joined_ids all_characters = api.request('/v1/public/characters', comics=joined_ids, limit=50) character_list = all_characters['data']['results'] records = [] for character in character_list: records.append({ 'name': character['name'], 'description': character['description'], 'id': character['id'], 'comic_count': character['comics']['available'], 'story_count': character['stories']['available'], 'series_count': character['series']['available'], 'event_count': character['events']['available'] }) hero_df = pd.DataFrame(records) qgrid.show_grid(hero_df, remote_js=True) # For quick and dirty exploratory analysis, I usually start with Pandas native plotting functionality hero_df['comic_count'].plot(kind='bar') top_10 = hero_df.sort('comic_count', ascending=False)[['name', 'comic_count']][:10] qgrid.show_grid(top_10, remote_js=True) top_10.plot(kind='bar', x='name') # Ok, time to create something a bit nicer sns.factorplot('name', 'comic_count', data=top_10, kind='bar', aspect=2.0) # Seaborn is very customizable sns.set_style("whitegrid") sns.factorplot('name', 'comic_count', data=top_10, kind='bar', aspect=2.5, palette="muted", size=7, x_order=top_10['name'].tolist()) import vincent vincent.initialize_notebook() bar = (vincent.Bar(top_10, columns=['comic_count'], key_on='name') .common_axis_properties(title_size=15) .axis_titles(x='Character Name', y='Total Comic Book Count') .colors(range_=['#6a9fb5']) .x_axis_properties(title_offset=10) .y_axis_properties(title_offset=-30)) bar from IPython.core.display import HTML # Use the following if running locally: # styles = open("styles/custom.css", "r").read() # This is for nbviewer: styles = open("custom.css", "r").read() HTML(styles)