# Lets start to interact with workbench, please note there is NO specific client to workbench, # Just use the ZeroRPC Python, Node.js, or CLI interfaces. import zerorpc c = zerorpc.Client() c.connect("tcp://127.0.0.1:4242") # I forgot what stuff I can do with workbench print c.help() print c.help_basic() # STEP 1: # Okay get the list of commands from workbench print c.help_commands() # STEP 2: # Lets gets the infomation on a specific command 'store_sample' print c.help_command('store_sample') # STEP 3: # Now lets get infomation about the dynamically loaded workers (your site may have many more!) # Next to each worker name is the list of dependences that worker has declared print c.help_workers() # STEP 4: # Lets gets the infomation about the meta worker print c.help_worker('meta') # STEP 5: # Okay when we load up a file, we get the md5 back filename = '../data/pe/bad/0cb9aa6fb9c4aa3afad7a303e21ac0f3' with open(filename,'rb') as f: my_md5 = c.store_sample(f.read(), filename, 'exe') print my_md5 # Lets see what view_pe does print c.help_worker('view_pe') # Okay lets give it a try c.work_request('view_pe', my_md5) # Okay, that worker needed the output of pe_features and pe_indicators # so what happened? The worker has a dependency list and workbench # recursively satisfies that dependency list.. this is powerful because # when we're interested in one particular analysis we just want to get # the darn thing without having to worry about a bunch of details # Well lets throw in a bunch of files! import os file_list = [os.path.join('../data/pe/bad', child) for child in os.listdir('../data/pe/bad')] working_set = [] for filename in file_list: with open(filename,'rb') as f: md5 = c.store_sample(f.read(), filename, 'exe') working_set.append(md5) print working_set[:5] # Okay we just pushed in a bunch of files, now we can extract features, # look at indicators, peids, and yara sigs! # Lets just randomly pick one to understand the details and then we'll look # at running all of them a bit later. c.work_request('pe_features', working_set[0]) c.work_request('pe_indicators', working_set[0]) # Now we rip the peid on all the PE files output = c.batch_work_request('pe_peid', {'md5_list':working_set}) output # At this granularity it opens up a new world import pandas as pd df = pd.DataFrame(output) df.head(10) # So lets get a breakdown of the PEID matches df['match'] = [str(match) for match in df['match_list']] df['match'].value_counts() # Now we do the same thing for yara sigs output = c.batch_work_request('yara_sigs', {'md5_list':working_set}) output # Pop it into a dataframe with one line of code df_yara = pd.DataFrame(output) df_yara.head(10) # Here the yara output is a bit more details so we're going to carve it up a bit import numpy as np df_yara['match'] = [str(match.keys()) if match.keys() else np.nan for match in df_yara['matches'] ] df_yara = df_yara.dropna() df_yara['count'] = 1 df_yara.groupby(['match','md5']).sum() # Alright now that we have an overview of matches we drill down again c.work_request('yara_sigs', 'b681485cb9e0cad73ee85b9274c0d3c2')