# Lets start to interact with workbench, please note there is NO specific client to workbench,
# Just use the ZeroRPC Python, Node.js, or CLI interfaces.
import zerorpc
c = zerorpc.Client()
c.connect("tcp://127.0.0.1:4242")

# Load in the PCAP file
filename = '../data/pcap/kitchen_boss.pcap'
with open(filename,'rb') as f:
    pcap_md5 = c.store_sample(f.read(), filename, 'pcap')

# Run the Bro Network Security Monitor on the PCAP (or set of PCAPS) we just loaded.
# We could run several requests here... 'pcap_bro','view_pcap' or 'view_pcap_details',
# workbench is super granular and it's easy to try them all and add your own as well
output = c.work_request('view_pcap_details', pcap_md5)['view_pcap_details']
output

# We'll grab the md5s for those files and do some kewl stuff with them later
file_md5s = list(set([item['md5'] for item in output['extracted_files']]))
pe_md5 = '4410133f571476f2e76e29e61767b557'
file_md5s

# Grab the Bro logs that we want
dns_log = c.stream_sample(output['bro_logs']['dns_log'])
http_log = c.stream_sample(output['bro_logs']['http_log'])
files_log = c.stream_sample(output['bro_logs']['files_log'])
dns_log

import pandas as pd

# Okay take the generators returned by stream_sample and efficiently create dataframes
# LIKE BUTTER I TELL YOU!
dns_df = pd.DataFrame(dns_log)
http_df = pd.DataFrame(http_log)
files_df = pd.DataFrame(files_log)
files_df.head()

dns_df.head()

dns_df[['query','answers','qtype_name']]

# Now we group by host and show the different response mime types for each host
group_host = http_df.groupby(['host','uid','resp_mime_types','uri'])[['response_body_len']].sum()
group_host.head(10)

# Get Meta-data for each of the extracted files from the PCAP
file_views = c.batch_work_request('meta_deep',{'md5_list':file_md5s})
[view for view in file_views]

# Virus Total Queries (as of 4-20-2014)
vt_output = c.batch_work_request('vt_query', {'md5_list':file_md5s})
[output for output in vt_output]

# Well VirusTotal only found two of the files (SWF and JAR). The SWF has
# zero positives (we're going to take that with a grain of salt). The PDF
# and PE files don't even show up. So we'll take a closer look at the SWF
# and PE file with some of the workers in workbench.
swf_view = c.work_request('swf_meta','16cf037b8c8caad6759afc8c309de0f9')
swf_view

pe_view = c.work_request('pe_indicators', '4410133f571476f2e76e29e61767b557')
pe_view

graph = c.work_request('pcap_graph', pcap_md5)
graph

# Lets look at the timing of the dns requests and the file downloads

# Make a new column in both dataframe with a proper datetime stamp
dns_df['time'] = pd.to_datetime(dns_df['ts'], unit='s')
files_df['time'] = pd.to_datetime(files_df['ts'], unit='s')

# Now make time the new index for both dataframes
dns_df.set_index(['time'], inplace=True)
files_df.set_index(['time'], inplace=True)

interesting_files = files_df[files_df['md5'].isin(file_md5s)]

domains = ['kitchenboss.com.au','www.kitchenboss.com.au','p22x62n0yr63872e-qh6.focondteavrt.ru',
           '2496128308-6.focondteavrt.ru','92.194.4.142.in-addr.arpa']
interesting_dns = dns_df[dns_df['query'].isin(domains)]

all_time = pd.concat([interesting_dns[['query','answers','qtype_name']], interesting_files[['md5','mime_type','tx_hosts']]])
all_time.sort_index(inplace=True)
all_time