# Lets start to interact with workbench, please note there is NO specific client to workbench, # Just use the ZeroRPC Python, Node.js, or CLI interfaces. import zerorpc c = zerorpc.Client() c.connect("tcp://127.0.0.1:4242") # I forgot what stuff I can do with workbench print c.help() # Now lets get infomation about the dynamically loaded workers (your site may have many more!) # Next to each worker name is the list of dependences that worker has declared print c.help_workers() # Lets gets the infomation about the meta worker print c.help_worker('meta') # Okay lets load up a file, and see what this silly meta thing gives back filename = '../data/pe/bad/9e42ff1e6f75ae3e60b24e48367c8f26' with open(filename,'rb') as f: my_md5 = c.store_sample(f.read(), filename, 'exe') output = c.work_request('meta', my_md5) output # Pfff... my meta data worker will be WAY better! # Err.. okay I'll just copy the meta worker file and see what happens. # Note: obviously you'd just go to the shell and cp meta.py my_meta.py # but since we're in IPython... %cd /Users/briford/work/workbench/server/workers %cp meta.py my_meta.py %cd /Users/briford/work/workbench/notebooks # Okay just cause I'm feeling crazy lets look at help_workers again print c.help_workers() # My mind must be playing tricks, lets see if I can run my worker output = c.work_request('my_meta', my_md5) output # I've been around software... testing, server integration, test coverage all that stuff is # a complete PITA, heck I spend half my time doing that.. there's no way all that just happened. !./runtests # You sir are on some sort of needle drug... so you're saying that all the new functionality # that I just typed in is already available on the server? Help too? print c.help_worker('my_meta') output = c.work_request('my_meta', my_md5) output # Run my new code output = c.work_request('my_meta', my_md5) output # So lets do a more complicated worker just for hammering home what's happening.. # Workbench uses Directed Acyclic Graphs to pipeline workers together, it recursively # satisfies dependencies with agressive caching, shallow memory copies and gevent based # co-operative processes on the server side. Basicaly six slices of awesome... output = c.work_request('view', my_md5) output # Yeah but I want to run my new worker on a LOTS of samples and I # want to put the results into a Pandas dataframes and run some # statistics, and do some Machine Learning and kewl plots! # This is just throwing files at Workbench (could be pdfs, swfs, pcap, memory_images, etc) import os file_list = [os.path.join('../data/pe/bad', child) for child in os.listdir('../data/pe/bad')] working_set = [] for filename in file_list: with open(filename,'rb') as f: md5 = c.store_sample(f.read(), filename, 'exe') working_set.append(md5) # Now just run a batch request against all the sample we just threw in results = c.batch_work_request('my_meta', {'md5_list':working_set}) results # Now toss that client-server generator into a dataframe (zero-copy and efficient) import pandas as pd df_meta = pd.DataFrame(results) df_meta.head() # Plotting defaults import matplotlib.pyplot as plt %matplotlib inline plt.rcParams['font.size'] = 12.0 plt.rcParams['figure.figsize'] = 18.0, 8.0 # Plot stuff (yes this is a silly plot but it's just an example :) df_meta.boxplot('entropy','packed') plt.xlabel('Packed') plt.ylabel('Entropy') plt.title('Entropy of Sample') plt.suptitle('') # Groupby and Statistics (yes silly again but just an example) df_meta.groupby('packed').describe()