# You'll need graphviz and networkx to draw the graphs import pandas as pd import itertools import networkx as nx import pygraphviz as pgv from IPython.core.display import Image # I ran # history | grep " git " | awk '{print $1 " " $3}' | grep -v '|' > history.txt # to generate this history file df = pd.read_csv('./history.txt', sep=' ', header=None, names=["row", "command"], index_col="row") def dot_draw(G, prog="circo"): # Hackiest code :) nx.write_dot(G, "/tmp/blah.dot") with open("/tmp/blah.dot") as f: dot = f.read() dot_graph = pgv.AGraph("/tmp/blah.dot") dot_graph.draw("/tmp/blah.png", prog=prog) return Image(filename='/tmp/blah.png') def create_graph(pair_counts): G = nx.DiGraph() for (frm, to), count in pair_counts.iterrows(): G.add_edge(frm, to, penwidth=float(count) / 16) return G pairs = pd.DataFrame(index=range(len(df) - 1)) pairs['dist'] = df.index[1:].values - df.index[:-1].values pairs['from'] = df['command'][:-1].values pairs['to'] = df['command'][1:].values close_pairs = pairs[pairs.dist == 1] close_pairs[:3] pair_counts = close_pairs.groupby(['from', 'to']).aggregate(len).rename(columns= {'dist': 'count'}) pair_counts = pair_counts.sort('count', ascending=False) pair_counts[:20].sort_index() G = create_graph(pair_counts[:20]) dot_draw(G) G = create_graph(pair_counts[pair_counts['count'] >= 3]) dot_draw(G, prog="circo")