#!/usr/bin/env python # coding: utf-8 # # Data Viewer # # This notebook demonstrates the use of the DataViewer control. # # It provides some basic features that let you browse pandas DataFrames more easily: # - Scrollable data viewer taking fixed amount of output cell space # - Sorting data by column # - Column selection # - Data filtering # #### Read in some data to demonstrate # In[1]: from msticpy.vis.data_viewer import DataViewer import pandas as pd data = pd.read_csv( "./data/processes_on_host.csv", index_col=0, parse_dates=["TimeGenerated"], infer_datetime_format=True, ) # ## Use the DataViewer to display a DataFrame # In[2]: DataViewer(data) # ## Specify an initial set of columns # In[3]: columns = [ "Account", "EventID", "TimeGenerated", "Computer", "NewProcessName", "CommandLine", "ParentProcessName", ] DataViewer(data, selected_cols=columns) # ## Use "Choose columns" to select which columns to display # # The right side list contains the available columns in the DataFrame, # the left side is the list of columns to display. # # Use the Add/Remove buttons to add or remove columns from the selected set. # You can select multiple columns using Ctrl+Click or Shift+Click (the # former selects or deselects an item for each click, the latter selects # a range of items between the last item selected and the currently-clicked # item). # # Click on Apply columns to update the data view. # In[4]: viewer = DataViewer(data, selected_cols=columns) # We're opening the "Choose columns" drop-down programmatically # Just click on the small arrow to the left of "Choose columns" to open this viewer.accordion.selected_index = 0 viewer # # Filtering the data # # You can apply multiple filters - each filter is additive, i.e. each is logically ANDed with the others. # # The "Filter data" drop down shows the following controls: # # Filter expression editor # - *Column selector* drop-down - which column you # want the filter to apply to # - *Not* checkbox - invert the logic of the filter (for this filter item only) # - *Operator* drop-down - the available operators are different for string # and non-string (numeric and dates) # - *Expression* text box - type in the expression that you want to match # - **Add filter** - adds the current filter items as a new filter expression to **Current filters** # - **Update filter** - overwrites the selected filter in **Current filters** with the current filter expression # # Current filters # - Select the filter expression you want to operate on from the **Filters** list # - **Delete filter** deletes the selected item # - **Clear all filters** removes all filter expressions # - **Apply filter** - applies the filter items to the data and updates the display # In[5]: viewer = DataViewer(data, selected_cols=columns) # manually add a filter sample_filter = { "ParentProcessName contains 'cmd'": ("ParentProcessName", False, "contains", "cmd"), "CommandLine contains 'script'": ("CommandLine", False, "contains", "script"), } viewer.import_filters(sample_filter) # We're opening the "Filter data" drop-down programmatically # Just click on the small arrow to the left of "Filter data" to open this viewer.accordion.selected_index = 1 viewer # In[6]: viewer.filters # ## Advanced querying with filter **query** operator # # The **query** operator lets you type in a pandas query expression. # # > Note, the selected column is not relevant for this operator since you specify the column name
# > within the query expression. You can select any column name. # # See this documentation for the [syntax of the pandas `query` method ](https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#the-query-method) # In[7]: viewer = DataViewer(data, selected_cols=columns) sample_q_filter = { "EventID query 'ParentProcessName.str.contains('cmd') and (CommandLine.str.contains('cacls') or CommandLine.str.contains('script'))'": ( "EventID", False, "query", "ParentProcessName.str.contains('cmd') and (CommandLine.str.contains('cacls') or CommandLine.str.contains('script'))", ) } viewer.import_filters(sample_q_filter) # We're opening the "Choose columns" drop-down programmatically # Just click on the small arrow to the left of "Choose columns" to open this viewer.accordion.selected_index = 1 viewer # # Accessing the filtered data # # Use the `filtered_data` property of the DataViewer to retrieve # a DataFrame corresponding to the current column and row filtering. # # > Note column sorting is not captured in this data. # In[9]: viewer.filtered_data # # DataViewer Help # In[10]: help(DataViewer) # In[11]: import tabulate print(tabulate.tabulate(viewer.filtered_data, tablefmt="rst", showindex=False, headers="keys"))