# Lets start to interact with workbench, please note there is NO specific client to workbench, # Just use the ZeroRPC Python, Node.js, or CLI interfaces. import zerorpc c = zerorpc.Client() c.connect("tcp://127.0.0.1:4242") # Load in 100 PE Files import os file_list = [os.path.join('../data/pe/bad', child) for child in os.listdir('../data/pe/bad')] file_list += [os.path.join('../data/pe/good', child) for child in os.listdir('../data/pe/good')] md5_list = [] for filename in file_list: with open(filename,'rb') as f: md5_list.append(c.store_sample(f.read(), filename, 'exe')) print 'Files loaded: %d' % len(md5_list) md5_list[:5] # Compute pe_features on all files of type pe, just pull back the sparse features imports = c.batch_work_request('pe_features', {'md5_list': md5_list, 'subkeys':['md5','sparse_features.imported_symbols']}) imports # Client generators # First we're going going to filter PE Files only getting ones with communication related imports def comm_imports(import_info_list): comm_imports = ['accept', 'bind', 'connect', 'connectnamedpipe', 'gethostbyname', 'gethostname', 'inet_addr', 'recv', 'send'] for import_info in import_info_list: md5 = import_info['md5'] import_symbols = import_info['imported_symbols'] if any(comm in sym for comm in comm_imports for sym in import_symbols ): yield md5 def peid_sigs(md5_list): for md5 in md5_list: yield c.work_request('pe_peid', md5) # Now and only now will our generator pipeline unwind. The work will get pulled from the workbench # server and ONLY what needs to get processed based on our generator filters will get processed. # Note: Out of 100 PE Files, only 19 matched our filter, so only 19 will get pulled through the peid # worker. Imagine a generator pipeline that ended in a dynamic sandbox.. super handy to downselect first. peid_results = peid_sigs(comm_imports(imports)) for peid_info in peid_results: print peid_info