# Lets start to interact with workbench, please note there is NO specific client to workbench, # Just use the ZeroRPC Python, Node.js, or CLI interfaces. import zerorpc c = zerorpc.Client(timeout=120) c.connect("tcp://127.0.0.1:4242") # I forgot what stuff I can do with workbench print c.help() # Grab a range of pcaps in workbench (last 100 MegaBytes worth in this case) pcap_md5s = c.get_sample_window('pcap', 50) print 'Number of PCAPs %d' % len(pcap_md5s) # Workbench lets you store sample sets pcap_set = c.store_sample_set(pcap_md5s) # Now give us a HTTP graph of all the activities within that window of PCAPs. # Workbench also has DNS and CONN graphs, but for now we're just interested in HTTP. c.work_request('pcap_http_graph', pcap_set) # We can also ask workbench for a python dictionary of all the info from this set of (100MB) PCAPs, # because sometimes visualization are useful and sometimes organized data is useful. output = c.work_request('view_pcap_details', pcap_set)['view_pcap_details'] output # Critical Code: Transition from Bro logs to Pandas Dataframes # This one line of code populates dataframes from the Bro logs, # streaming client/server generators, zero-copy, efficient, awesome... import pandas as pd dataframes = {name:pd.DataFrame(c.stream_sample(bro_log)) for name, bro_log in output['bro_logs'].iteritems()} # Now we group by host and show the different response mime types for each host group_host = dataframes['http_log'].groupby(['id.orig_h','host','id.resp_h','resp_mime_types'])[['response_body_len']].sum() group_host.head(100) # Now we group by host and show the different response mime types for each host group_host = dataframes['http_log'].groupby(['host','id.resp_h','resp_mime_types','uri'])[['response_body_len']].sum() group_host.head(50) # Look at Weird logs dataframes['weird_log'].head(20) # Convert the 'ts' field to an official datetime object dataframes['http_log']['time'] = pd.to_datetime(dataframes['http_log']['ts'],unit='s') dataframes['http_log']['time'].head() # Explore pivoting and resampling response_bytes = dataframes['http_log'][['time','resp_mime_types','response_body_len']] response_bytes['response_body_len'] = response_bytes['response_body_len'].astype(int) print response_bytes.head() pivot = pd.pivot_table(response_bytes, rows='time', values='response_body_len', cols=['resp_mime_types'], aggfunc=sum) sampled_bytes = pivot.resample('1Min', how='sum') sampled_bytes.head() # Plotting defaults import matplotlib.pyplot as plt %matplotlib inline plt.rcParams['font.size'] = 12.0 plt.rcParams['figure.figsize'] = 12.0, 8.0 # Let plot it! sampled_bytes.plot()