#!/usr/bin/env python # coding: utf-8 # # What's new in MSTICPy 2.0? # # - Project reorganization # - Import simplification: # - mp.attributes # - init_notebook imports - Entities & Pivots # - pandas accessors # - Time Series pandas accessors # - Threat Intel Lookup Async # - DataFrame to Graph # - Folium map # - Sentinel Workspace configuration # - Pivoting and Data Providers # # # ## V2.0.0 Docs available online # https://msticpy.readthedocs.io/en/release-msticpy-v2.0.0/ # ## Imports simplified # # New "house style" - shortening msticpy to "mp" # # In[86]: import msticpy as mp mp.init_notebook() # In[ ]: # Many classes and functions are available directly from "mp" # In[89]: qry_prov = mp.QueryProvider("MSSentinel") ti = mp.TILookup() mp.check_version() mp.search("sentinel") # `init_notebook` imports many items into the notebook namespace: # - standard packages such as pandas, numpy, ipywidgets # - MSTICPy modules and classes such as **Entities** # In[92]: ip = "145.1.10.17" IpAddress.whois(ip) # In[93]: IpAddress.util.geoloc(ip) # ### Pandas accessors - more and better organized. # # Most dataframe-related functionality available through: # - df.mp - transforms, data processing # - df.mp_plot - plotting functions: timeline, process tree, etc. # # # `df.mp_timeseries` - is a separate item since it requires non-core dependencies such as `statsmodel` # In[94]: proc_df = pd.read_csv("data/processes_on_host.csv", index_col=0) proc_df.head(3) # In[95]: proc_df.mp.ioc_extract(columns="CommandLine", ioc_types=["ipv4", "url"]) # In[96]: help(proc_df.mp_plot.timeline) # In[97]: proc_df.mp_plot.timeline(group_by="SubjectUserName", source_columns=["CommandLine"]) # In[98]: proc_df.mp_plot.process_tree(legend_col="SubjectUserName") # ## Time Series improvements # # Previously - minimal code # ```python # from msticpy.nbtools.timeseries import display_timeseries_anomolies # from msticpy.analysis.timeseries import timeseries_anomalies_stl # # ts_data = pd.read_csv("data/TimeSeriesDemo.csv", parse_dates=["TimeGenerated"]) # ts_data = ts_data[["TimeGenerated", "TotalBytesSent"]] # ts_data = ts_data.set_index("TimeGenerated") # # ts_df = timeseries_anomalies_stl(ts_data) # display_timeseries_anomolies(ts_df) # # ``` # In[100]: from msticpy.analysis import timeseries ts_data = pd.read_csv("data/TimeSeriesDemo.csv", parse_dates=["TimeGenerated"]) ts_data.mp_timeseries.analyze( time_column="TimeGenerated", data_column="TotalBytesSent" ).mp_timeseries.plot(y="TotalBytesSent") # ## Threat Intel Lookup # # Previously, when using multiple providers, indicators were # sent to each provider in sequence. Large number of indicators # caused notebook to appear to have hung. # # V 2.0: # # - sends requests to each provider asynchronously. # - displays progress bar # In[102]: iocs = ['162.244.80.235', '185.141.63.120', '82.118.21.1', '85.93.88.165'] ti_lookup = mp.TILookup() ti_lookup.lookup_iocs(iocs) # ## DataFrame to Graph # # New in V2.0 - builds on previous Alert and Incident graph modules # but in generic form # In[ ]: proc_df.mp.to_graph # In[103]: nxg = proc_df.mp.to_graph( source_col="SubjectUserName", target_col="NewProcessName", source_attrs=["SubjectDomainName", "SubjectLogonId"], target_attrs=["CommandLine", "ParentProcessName"], edge_attrs=["TimeGenerated"] ) # In[104]: import networkx as nx nx.draw(nxg) # In[105]: proc_df.mp_plot.network( source_col="SubjectUserName", target_col="NewProcessName", source_attrs=["SubjectDomainName", "SubjectLogonId"], target_attrs=["CommandLine", "ParentProcessName"], edge_attrs=["TimeGenerated"], font_size=7, ) # ## Folium Map # # Previous code # # ```python # # Create a IP Geolookup class # iplocation = GeoLiteLookup() # # def format_ip_entity(row, ip_col): # ip_entity = entities.IpAddress(Address=row[ip_col]) # iplocation.lookup_ip(ip_entity=ip_entity) # ip_entity.AdditionalData["protocol"] = row.L7Protocol # if "severity" in row: # ip_entity.AdditionalData["threat severity"] = row["severity"] # if "Details" in row: # ip_entity.AdditionalData["threat details"] = row["Details"] # return ip_entity # # ips_out = list(selected_out.apply(lambda x: format_ip_entity(x, "dest"), axis=1)) # ips_in = list(selected_in.apply(lambda x: format_ip_entity(x, "source"), axis=1)) # ips_threats = list(ti_ip_results.apply(lambda x: format_ip_entity(x, "Ioc"), axis=1)) # # icon_props = {"color": "green"} # for ips in host_entity.public_ips: # ips.AdditionalData["host"] = host_entity.HostName # folium_map.add_ip_cluster(ip_entities=host_entity.public_ips, **icon_props) # icon_props = {"color": "blue"} # folium_map.add_ip_cluster(ip_entities=ips_out, **icon_props) # icon_props = {"color": "purple"} # folium_map.add_ip_cluster(ip_entities=ips_in, **icon_props) # icon_props = {"color": "red"} # folium_map.add_ip_cluster(ip_entities=ips_threats, **icon_props) # # display(folium_map) # ``` # # ### New code # In[106]: # read in a DataFrame from a csv file geo_loc_df = ( pd .read_csv("data/ip_locs.csv", index_col=0) .dropna(subset=["Latitude", "Longitude", "IpAddress"]) # We need to remove an NaN values ) display(geo_loc_df.head(5)) # In[107]: geo_loc_df.mp_plot.folium_map(ip_column="IpAddress") # In[108]: geo_loc_df.mp_plot.folium_map( lat_column="Latitude", long_column="Longitude", zoom_start=10 ) # ### More complex data to display # In[109]: # Create some data to display data_df = pd.DataFrame({ "Status": ["Home", "Office", "Vacation"] * (len(geo_loc_df) // 3), "Friendliness": ["Warm", "Cold", "Medium"] * (len(geo_loc_df) // 3), "Flavor": ["Chocolate", "Cinnamon", "Mango"] * (len(geo_loc_df) // 3), "SpiceLevel": [1, 2, 3] * (len(geo_loc_df) // 3) }) geo_loc_data_df = pd.concat([geo_loc_df, data_df], axis=1).dropna(subset=["IpAddress"]) geo_loc_data_df.head(3) # In[110]: geo_loc_data_df.mp_plot.folium_map( ip_column="IpAddress", layer_column="CountryName", tooltip_columns=["Status", "Flavor"], popup_columns=["Friendliness", "SpiceLevel", "Status", "Flavor"], zoom_start=2, ) # In[111]: icon_map = { "US": { "color": "green", "icon": "flash", }, "GB": { "color": "purple", "icon": "flash", }, "default": { "color": "blue", "icon": "info-sign", }, } geo_loc_df.mp_plot.folium_map( ip_column="AllExtIPs", icon_column="CountryCode", icon_map=icon_map, zoom_start=2, ) # ## Sentinel Workspace Configuration # In[112]: mp.MpConfigEdit() # ## Pivot Support for multiple providers # # - No need to import or initialize Pivot # - Cleaned up TI pivots # - Removed conflicting "shortcut" data query functions # - Data-provider pivot functions added dynamically on "connect" # In[113]: mp.pivot.browse() # In[ ]: qry_prov = mp.QueryProvider("MSSentinel") qry_prov2 = mp.QueryProvider("MSSentinel") qry_prov.connect(workspace="Default") # In[ ]: mp.pivot.browse() # In[ ]: qry_prov.connect(workspace="CyberSecuritySOC") # In[ ]: mp.pivot.browse() # In[ ]: # In[ ]: qry_prov.WindowsSecurity.list_host_logons() -> Host.MSSentinel.wevt_logons() # ### Pivot settings # # - UseV1QueryNames - use query naming pattern used in V1 # - UseQueryFamily - use the "DataFamily" name defined in the query template # rather than the table name (or shortened mnemonic) # ```python # qry_prov.WindowsSecurity.list_host_logons() -> Host.MSSentinel.wevt_logons() # # with UseQueryFamily: True # qry_prov.WindowsSecurity.list_host_logons() -> Host.MSSentinel.WindowsSecurity_logons() # ``` # - UseQueryProviderTimeSpans - by default, all query providers use the pivot timespan # ```python # mp.pivot.timespan # ``` # Setting this to True lets queries from different providers use the timespans # set for each provider. # # # ```yaml # .... # Pivots: # UseV1QueryNames: False # UseQueryFamily: False # UseQueryProviderTimeSpans: False # # ``` # ## Restructure # # | folder | description | previously | # |-----------|----------------------------------------------------------------------------------|--------------------| # | analysis | Data analysis functions - timeseries, anomalies, clustering | analysis | # | auth | authentication and secrets management | common | # | common | common used utilities and definitions (e.g. exceptions) | - | # | config | configuration and settings | - | # | context | enrichment modules some modules may need subfolders - e.g. tiproviders, vtlookup | sectools | # | data | data acquisition/queries (most Azure/Sentinel funcs moved to context) | - | # | datamodel | entities, soc, pivot core functions | - | # | init | package loading and initialization - nbinit, pivot creation modules | nbtools, datamodel | # | transform | simple data processing - decoding, reformatting, schema change, process tree | sectools | # | vis | visualization modules including browsers | nbtools | # | nbwidgets | nb widgets modules | nbtools/nbwidgets | # # `sectools` and `nbtools` still exist but are mostly redirector modules. # E.g. # ```python # from msticpy.sectools.geoip import GeoLiteLookup # ``` # still works but has a deprecation warning.