Image(url='http://it-ebooks.info/images/ebooks/3/agile_data_science.jpg')

from IPython.display import Image
Image(url='http://www.cloudpointasia.com/var/site/storage/images/media/images/site-images/logos/citrixonline_logo_web/96233-1-eng-GB/citrixonline_logo_web.gif')

Image(url='http://www.theactivityexchange.com/images/logo_small.png')

# The Data Science Venn Diagram (1.0) — Drew Conway
Image(url='http://static.squarespace.com/static/5150aec6e4b0e340ec52710a/t/51525c33e4b0b3e0d10f77ab/1364352052403/Data_Science_VD.png?format=750w')

# Venn Diagram 2.0, Steven Geringer
Image('http://2.bp.blogspot.com/-Qi-0utjhySM/UsteLrV6NyI/AAAAAAAACNQ/AdkizQfS8l8/s1600/moz-screenshot-3-729576.png')

import urllib2
import json

# Download all info from all the Data Science Meetup members. 
members = json.loads(urllib2.urlopen(
    "http://api.meetup.com/2/members?order=name" +
    "&group_urlname=Santa-Barbara-Data-Science&offset=0&format=json&page=150" +  
    "&sig_id=66734052&sig=f7fc02b7069092e6775332b25f01b69e21346b92"
    ).read())

bios = [{'name' : x['name'], 'bio': x['bio']} for x in members['results'] ]
bios[-2:]

import nltk
from collections import Counter

# Find all the named entities.
entity_counter = Counter()
for x in bios:
    text = nltk.wordpunct_tokenize(x['bio'])
    for name,tag in nltk.pos_tag(text):
        if tag == 'NNP':
            entity_counter.update([name.capitalize()])
entity_counter.most_common(5)

import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure(figsize=(15,9)); ax = fig.add_subplot(111); ax.set_xticks([]);
names,counts=zip(*[(name, count) for name, count in entity_counter.most_common(20) 
     if name not in ['Data', 'Hi','Santa', 'Barbara','D','Ph', 'My', 'Science']])
x_pos = np.arange(len(counts))
plt.bar(x_pos - .4, counts, color = '#eeefff');
for x, y, label in zip(x_pos, counts, names): plt.annotate(label,
    (x+0.1, y + len(label)/2.2), ha='center', rotation=70, size='xx-large')

from itertools import product
#Create a topic co-occurrence graph. Nodes are topics, edges between a,b means that a member listed both a,b as topics
nodes = Counter()
edges = Counter()

for x in members['results']:
    for y in x['topics']:
        nodes.update([y['name']])
    edges.update([(a['name'],b['name']) 
          for a,b in product(x['topics'],x['topics']) if a['name'] > b['name']])

nodes.most_common(10)

import networkx as nx

g = nx.Graph()
node_names = set()

for name, count in nodes.most_common(20):
    g.add_node(name, count=count)
    node_names.add(name)

for edge, count in edges.iteritems():
    if edge[0] in node_names and edge[1] in node_names:
        g.add_edge(edge[0], edge[1], weight = count)

labels = dict([(name, name.replace(' ','\n')) for name,_ in nodes.most_common(20)])

fig = plt.figure(figsize=(17, 10)); ax = fig.add_subplot(111)
pos = nx.spring_layout(g,k=4.9, scale = 1000.0)
nx.draw_networkx_nodes(g, pos, node_size = [7*(d['count']**2) 
        for _, d in g.nodes_iter(data=True)], alpha = 0.8, node_color = '#eeefff')
nx.draw_networkx_labels(g, pos, labels, font_size=18);
nx.draw_networkx_edges(g, pos, width=  [(d['weight']/10.0)**2 
        for _, _, d in g.edges_iter(data=True)], alpha = 0.5, edge_color = 'g')
ax.set_xticks([]);ax.set_yticks([]);