#!/usr/bin/env python # coding: utf-8 # # LocalOsquery Data Provider # https://msticpy.readthedocs.io/en/v1.1.0/data_acquisition/DataProviders.html#using-local-data-the-localdata-provider # ## Imports # In[1]: #Check we are running Python 3.6 import sys MIN_REQ_PYTHON = (3,6) if sys.version_info < MIN_REQ_PYTHON: print('Check the Kernel->Change Kernel menu and ensure that Python 3.6') print('or later is selected as the active kernel.') sys.exit("Python %s.%s or later is required.\n" % MIN_REQ_PYTHON) #imports import json import yaml import msticpy.nbtools as nbtools #data library imports from msticpy.data.data_providers import QueryProvider import msticpy.nbtools as mas print('Imports Complete') # ## Variables # In[2]: # directory with osqueryd.results.log or other *.log files # Tested with single file (osqueryd.results.log) and double (osqueryd.results.log + osqueryd.snapshots.log) datadir = "/path/to/var/log/osquery" # directory with queries yaml file query_path = "/path/to" # ## Load Data # In[3]: # Specify path to look for data files data_path = datadir qry_prov = QueryProvider("LocalOsquery", data_paths=[data_path], query_paths=[query_path] ) # In[4]: get_ipython().run_cell_magic('time', '', '# Show the schema of the data files read in\n# Slow for log file ~1MB\nprint(qry_prov.schema)\n') # In[5]: print(json.dumps(qry_prov.schema, indent=2)) # In[6]: qry_prov.list_queries() # In[7]: get_ipython().run_cell_magic('time', '', 'df_fim = qry_prov.linux.fim()\ndf_fim.head(1)\n') # In[8]: get_ipython().run_cell_magic('time', '', 'df_process = qry_prov.linux.processes()\ndf_process.head(1)\n') # In[9]: get_ipython().run_cell_magic('time', '', 'df_outbound_conn = qry_prov.linux.outbound_connections()\ndf_outbound_conn.head(1)\n') # In[ ]: # ## Analysis examples # In[10]: # https://msticpy.readthedocs.io/en/latest/visualization/ProcessTree.html from msticpy.vis import process_tree from msticpy.transform.proc_tree_builder import OSQUERY_EVENT_SCH # In[28]: p_tree_lx = process_tree.build_process_tree(df_process, schema=OSQUERY_EVENT_SCH) # In[29]: # partial tree - 10 processes only process_tree.plot_process_tree(data=df_process[50:60], legend_col="columns_name") # In[ ]: # In[12]: # FIXME! schema correct above but not here. time columns not datetime64 df_fim.dtypes # In[ ]: df_fim.mp_plot.timeline( title="FIM by action", # group_by="columns.action", # group_by="columns.username", group_by="columns_target_path", source_columns=["columns_username", "columns_action", "columns_category", "columns_target_path"], time_column="columns_time", legend="left", height=200, ) # In[ ]: # In[ ]: df_outbound_conn.mp_plot.matrix(x="columns_name", y="columns_remote_address", title="Process name vs remote address Interaction") # In[ ]: