# Lets start to interact with workbench, please note there is NO specific client to workbench,
# Just use the ZeroRPC Python, Node.js, or CLI interfaces.
import zerorpc
c = zerorpc.Client(timeout=120)
c.connect("tcp://127.0.0.1:4242")

# I forgot what stuff I can do with workbench
print c.help()

# Grab a range of pcaps in workbench (last 100 MegaBytes worth in this case)
pcap_md5s = c.get_sample_window('pcap', 50)
print 'Number of PCAPs %d' % len(pcap_md5s)

# Workbench lets you store sample sets
pcap_set = c.store_sample_set(pcap_md5s)

# Now give us a HTTP graph of all the activities within that window of PCAPs.
# Workbench also has DNS and CONN graphs, but for now we're just interested in HTTP.
c.work_request('pcap_http_graph', pcap_set)

# We can also ask workbench for a python dictionary of all the info from this set of (100MB) PCAPs,
# because sometimes visualization are useful and sometimes organized data is useful.
output = c.work_request('view_pcap_details', pcap_set)['view_pcap_details']
output

# Critical Code: Transition from Bro logs to Pandas Dataframes
# This one line of code populates dataframes from the Bro logs, 
# streaming client/server generators, zero-copy, efficient, awesome...
import pandas as pd
dataframes = {name:pd.DataFrame(c.stream_sample(bro_log)) for name, bro_log in output['bro_logs'].iteritems()}

# Now we group by host and show the different response mime types for each host
group_host = dataframes['http_log'].groupby(['id.orig_h','host','id.resp_h','resp_mime_types'])[['response_body_len']].sum()
group_host.head(100)

# Now we group by host and show the different response mime types for each host
group_host = dataframes['http_log'].groupby(['host','id.resp_h','resp_mime_types','uri'])[['response_body_len']].sum()
group_host.head(50)

# Look at Weird logs
dataframes['weird_log'].head(20)

# Convert the 'ts' field to an official datetime object
dataframes['http_log']['time'] = pd.to_datetime(dataframes['http_log']['ts'],unit='s')
dataframes['http_log']['time'].head()

# Explore pivoting and resampling
response_bytes = dataframes['http_log'][['time','resp_mime_types','response_body_len']]
response_bytes['response_body_len'] = response_bytes['response_body_len'].astype(int)
print response_bytes.head()
pivot = pd.pivot_table(response_bytes, rows='time', values='response_body_len', cols=['resp_mime_types'], aggfunc=sum)
sampled_bytes = pivot.resample('1Min', how='sum')
sampled_bytes.head()

# Plotting defaults
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['font.size'] = 12.0
plt.rcParams['figure.figsize'] = 12.0, 8.0

# Let plot it!
sampled_bytes.plot()