#!/usr/bin/env python # coding: utf-8 # # [matta](https://github.com/carnby/matta) - view and scaffold d3.js visualizations in IPython notebooks # # ## basic examples # # By [@carnby](https://twitter.com/carnby). # # This notebook showcases the basic matta visualizations, as well as their usage. # # Note that the `init_javascript` call is not needed when running on local server having added the javascript code to your IPython profile. # In[1]: import pandas as pd import networkx as nx import matta import json import requests from networkx.readwrite import json_graph # we do this to load the required libraries when viewing on NBViewer matta.init_javascript(path='https://rawgit.com/carnby/matta/master/matta/libs') # ### Wordclouds # # Wordclouds are implemented using the [d3.layout.cloud layout by Jason Davies](http://www.jasondavies.com/wordcloud/). They work with bags of words. The python `Counter` class is perfect for this purposes. # In[2]: hamlet = requests.get('http://www.gutenberg.org/cache/epub/2265/pg2265.txt').text hamlet[0:100] # In[3]: import re from collections import Counter words = re.split(r'[\W]+', hamlet.lower()) counts = Counter(words) # In[4]: df = pd.DataFrame.from_records(counts.iteritems(), columns=['word', 'frequency']) df.sort_values(['frequency'], ascending=False, inplace=True) df.head() # In[5]: matta.wordcloud(dataframe=df.head(500), text='word', font_size='frequency', typeface='Helvetica', font_weight='bold', font_color={'value': 'frequency', 'palette': 'cubehelix', 'scale': 'threshold'}) # ### Treemaps # # Treemaps use the [Treemap Layout from d3.js](https://github.com/mbostock/d3/wiki/Treemap-Layout). They work with trees, which we construct through [`networkx.DiGraph`](http://networkx.github.io/documentation/networkx-1.9.1/reference/classes.digraph.html). # In[6]: flare_data = requests.get('https://gist.githubusercontent.com/mbostock/4063582/raw/a05a94858375bd0ae023f6950a2b13fac5127637/flare.json').json() # In[7]: flare_data['name'] # In[8]: tree = nx.DiGraph() def add_node(node): node_id = tree.number_of_nodes() + 1 n = tree.add_node(node_id, name=node['name']) if 'size' in node: tree.node[node_id]['size'] = node['size'] if 'children' in node: for child in node['children']: child_id = add_node(child) tree.add_edge(node_id, child_id) return node_id root = add_node(flare_data) # treemap requires this attribute tree.graph['root'] = root # In[9]: nx.is_arborescence(tree) # In[10]: import seaborn as sns # In[11]: matta.treemap(tree=tree, node_value='size', node_label='name', node_color={'value': 'parent.name', 'scale': 'ordinal', 'palette': sns.husl_palette(15, l=.4, s=.9)}) # ### Sankey # # Sankey or flow diagrams use the [Sankey plugin by Mike Bostock](http://bost.ocks.org/mike/sankey/). They work with digraphs, just like treemaps. Note that graphs with loops are not supported. # In[12]: sankey_data = requests.get('http://bost.ocks.org/mike/sankey/energy.json') # In[13]: sankey_graph = json_graph.node_link_graph(json.loads(sankey_data.text), directed=True) # In[14]: sankey_graph.nodes_iter(data=True).next(), sankey_graph.edges_iter(data=True).next() # In[22]: matta.flow(graph=sankey_graph, node_label='name', link_weight='value', node_color='indigo', node_width=12, node_padding=13, link_color={'value': 'value', 'palette': 'Greys', 'scale': 'threshold'}, link_opacity=0.8) # ### Parallel Coordinates # # Parallel Coordinates are based on the [code by Jason Davies](http://bl.ocks.org/jasondavies/1341281). They work with [`pandas.DataFrame`](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe). # In[23]: df = pd.read_csv('http://bl.ocks.org/jasondavies/raw/1341281/cars.csv', index_col='name') df.head() # In[24]: matta.parcoords(dataframe=df) # ### Parallel Sets # In[25]: df = pd.read_csv('https://www.jasondavies.com/parallel-sets/titanic.csv') df.head() # In[27]: matta.parsets(dataframe=df, columns=['Survived', 'Sex', 'Age', 'Class']) # ### Graph # # Graphs from [`networkx.DiGraph`](http://networkx.github.io/documentation/networkx-1.9.1/reference/classes.digraph.html) are visualized using the [Force Layout in d3.js](https://github.com/mbostock/d3/wiki/Force-Layout). # In[28]: graph = nx.davis_southern_women_graph() # In[29]: for node in graph.nodes_iter(data=True): graph.node[node[0]]['color'] = 'purple' if node[1]['bipartite'] else 'green' graph.node[node[0]]['size'] = graph.degree(node[0]) # In[30]: matta.force(graph=graph, link_distance=100, height=600, node_ratio='size', node_color={'value': 'bipartite', 'scale': 'ordinal', 'palette': 'Set2'}) # In[ ]: