Can we extract workflows from our shell history files? Maybe! Skip to the end for the graph.
By Julia Evans: http://jvns.ca, http://twitter.com/b0rk
I wrote a blog post about this
# You'll need graphviz and networkx to draw the graphs
import pandas as pd
import itertools
import networkx as nx
import pygraphviz as pgv
from IPython.core.display import Image
# I ran
# history | grep " git " | awk '{print $1 " " $3}' | grep -v '|' > history.txt
# to generate this history file
df = pd.read_csv('./history.txt', sep=' ', header=None, names=["row", "command"], index_col="row")
def dot_draw(G, prog="circo"):
# Hackiest code :)
nx.write_dot(G, "/tmp/blah.dot")
with open("/tmp/blah.dot") as f:
dot = f.read()
dot_graph = pgv.AGraph("/tmp/blah.dot")
dot_graph.draw("/tmp/blah.png", prog=prog)
return Image(filename='/tmp/blah.png')
def create_graph(pair_counts):
G = nx.DiGraph()
for (frm, to), count in pair_counts.iterrows():
G.add_edge(frm, to, penwidth=float(count) / 16)
return G
pairs = pd.DataFrame(index=range(len(df) - 1))
pairs['dist'] = df.index[1:].values - df.index[:-1].values
pairs['from'] = df['command'][:-1].values
pairs['to'] = df['command'][1:].values
close_pairs = pairs[pairs.dist == 1]
close_pairs[:3]
dist | from | to | |
---|---|---|---|
0 | 1 | add | commit |
1 | 1 | commit | commit |
3 | 1 | add | commit |
pair_counts = close_pairs.groupby(['from', 'to']).aggregate(len).rename(columns= {'dist': 'count'})
pair_counts = pair_counts.sort('count', ascending=False)
pair_counts[:20].sort_index()
count | ||
---|---|---|
from | to | |
add | add | 20 |
commit | 121 | |
status | 15 | |
checkout | checkout | 12 |
status | 23 | |
commit | commit | 61 |
push | 66 | |
status | 35 | |
diff | add | 17 |
checkout | 15 | |
commit | 90 | |
diff | 12 | |
status | 25 | |
push | status | 22 |
rm | commit | 11 |
status | add | 41 |
checkout | 25 | |
commit | 46 | |
diff | 119 | |
push | 12 |
G = create_graph(pair_counts[:20])
dot_draw(G)
G = create_graph(pair_counts[pair_counts['count'] >= 3])
dot_draw(G, prog="circo")