#!/usr/bin/env python
# coding: utf-8

# # What's new in MSTICPy 2.0?
# 
# - Project reorganization
# - Import simplification:
#   - mp.attributes
#   - init_notebook imports - Entities & Pivots
#   - pandas accessors
# - Time Series pandas accessors
# - Threat Intel Lookup Async
# - DataFrame to Graph
# - Folium map
# - Sentinel Workspace configuration
# - Pivoting and Data Providers
# 
# 
# ## V2.0.0 Docs available online
# https://msticpy.readthedocs.io/en/release-msticpy-v2.0.0/

# ## Imports simplified
# 
# New "house style" - shortening msticpy to "mp"
# 

# In[86]:


import msticpy as mp

mp.init_notebook()


# In[ ]:


# Many classes and functions are available directly from "mp"

# In[89]:


qry_prov = mp.QueryProvider("MSSentinel")
ti = mp.TILookup()

mp.check_version()

mp.search("sentinel")


# `init_notebook` imports many items into the notebook namespace:
# - standard packages such as pandas, numpy, ipywidgets
# - MSTICPy modules and classes such as **Entities**

# In[92]:


ip = "145.1.10.17"

IpAddress.whois(ip)


# In[93]:


IpAddress.util.geoloc(ip)


# ### Pandas accessors - more and better organized.
# 
# Most dataframe-related functionality available through:
# - df.mp - transforms, data processing
# - df.mp_plot - plotting functions: timeline, process tree, etc.
# 
# 
# `df.mp_timeseries` - is a separate item since it requires non-core dependencies such as `statsmodel`

# In[94]:


proc_df = pd.read_csv("data/processes_on_host.csv", index_col=0)
proc_df.head(3)


# In[95]:


proc_df.mp.ioc_extract(columns="CommandLine", ioc_types=["ipv4", "url"])


# In[96]:


help(proc_df.mp_plot.timeline)


# In[97]:


proc_df.mp_plot.timeline(group_by="SubjectUserName", source_columns=["CommandLine"])


# In[98]:


proc_df.mp_plot.process_tree(legend_col="SubjectUserName")


# ## Time Series improvements
# 
# Previously - minimal code
# ```python
# from msticpy.nbtools.timeseries import display_timeseries_anomolies 
# from msticpy.analysis.timeseries import timeseries_anomalies_stl
# 
# ts_data = pd.read_csv("data/TimeSeriesDemo.csv", parse_dates=["TimeGenerated"])
# ts_data = ts_data[["TimeGenerated", "TotalBytesSent"]]
# ts_data = ts_data.set_index("TimeGenerated")
# 
# ts_df = timeseries_anomalies_stl(ts_data)
# display_timeseries_anomolies(ts_df)
# 
# ```

# In[100]:


from msticpy.analysis import timeseries
ts_data = pd.read_csv("data/TimeSeriesDemo.csv", parse_dates=["TimeGenerated"])

ts_data.mp_timeseries.analyze(
    time_column="TimeGenerated", data_column="TotalBytesSent"
).mp_timeseries.plot(y="TotalBytesSent")


# ## Threat Intel Lookup
# 
# Previously, when using multiple providers, indicators were
# sent to each provider in sequence. Large number of indicators
# caused notebook to appear to have hung.
# 
# V 2.0:
# 
# - sends requests to each provider asynchronously.
# - displays progress bar

# In[102]:


iocs = ['162.244.80.235', '185.141.63.120', '82.118.21.1', '85.93.88.165']

ti_lookup = mp.TILookup()

ti_lookup.lookup_iocs(iocs)


# ## DataFrame to Graph
# 
# New in V2.0 - builds on previous Alert and Incident graph modules
# but in generic form

# In[ ]:


proc_df.mp.to_graph


# In[103]:


nxg = proc_df.mp.to_graph(
    source_col="SubjectUserName",
    target_col="NewProcessName",
    source_attrs=["SubjectDomainName", "SubjectLogonId"],
    target_attrs=["CommandLine", "ParentProcessName"],
    edge_attrs=["TimeGenerated"]
)


# In[104]:


import networkx as nx
nx.draw(nxg)


# In[105]:


proc_df.mp_plot.network(
    source_col="SubjectUserName",
    target_col="NewProcessName",
    source_attrs=["SubjectDomainName", "SubjectLogonId"],
    target_attrs=["CommandLine", "ParentProcessName"],
    edge_attrs=["TimeGenerated"],
    font_size=7,
)


# ## Folium Map
# 
# Previous code
# 
# ```python
# # Create a IP Geolookup class
# iplocation = GeoLiteLookup()
# 
# def format_ip_entity(row, ip_col):
#     ip_entity = entities.IpAddress(Address=row[ip_col])
#     iplocation.lookup_ip(ip_entity=ip_entity)
#     ip_entity.AdditionalData["protocol"] = row.L7Protocol
#     if "severity" in row:
#         ip_entity.AdditionalData["threat severity"] = row["severity"]
#     if "Details" in row:
#         ip_entity.AdditionalData["threat details"] = row["Details"]
#     return ip_entity
# 
# ips_out = list(selected_out.apply(lambda x: format_ip_entity(x, "dest"), axis=1))
# ips_in = list(selected_in.apply(lambda x: format_ip_entity(x, "source"), axis=1))
# ips_threats = list(ti_ip_results.apply(lambda x: format_ip_entity(x, "Ioc"), axis=1))
# 
# icon_props = {"color": "green"}
# for ips in host_entity.public_ips:
#     ips.AdditionalData["host"] = host_entity.HostName
# folium_map.add_ip_cluster(ip_entities=host_entity.public_ips, **icon_props)
# icon_props = {"color": "blue"}
# folium_map.add_ip_cluster(ip_entities=ips_out, **icon_props)
# icon_props = {"color": "purple"}
# folium_map.add_ip_cluster(ip_entities=ips_in, **icon_props)
# icon_props = {"color": "red"}
# folium_map.add_ip_cluster(ip_entities=ips_threats, **icon_props)
# 
# display(folium_map)
# ```
# 
# ### New code

# In[106]:


# read in a DataFrame from a csv file
geo_loc_df = (
    pd
    .read_csv("data/ip_locs.csv", index_col=0)
    .dropna(subset=["Latitude", "Longitude", "IpAddress"])  # We need to remove an NaN values
)
    
display(geo_loc_df.head(5))


# In[107]:


geo_loc_df.mp_plot.folium_map(ip_column="IpAddress")


# In[108]:


geo_loc_df.mp_plot.folium_map(
    lat_column="Latitude", long_column="Longitude", zoom_start=10
)


# ### More complex data to display

# In[109]:


# Create some data to display
data_df = pd.DataFrame({
    "Status": ["Home", "Office", "Vacation"] * (len(geo_loc_df) // 3),
    "Friendliness": ["Warm", "Cold", "Medium"] * (len(geo_loc_df) // 3),
    "Flavor": ["Chocolate", "Cinnamon", "Mango"] * (len(geo_loc_df) // 3),
    "SpiceLevel": [1, 2, 3] * (len(geo_loc_df) // 3)
})
geo_loc_data_df = pd.concat([geo_loc_df, data_df], axis=1).dropna(subset=["IpAddress"])
geo_loc_data_df.head(3)


# In[110]:


geo_loc_data_df.mp_plot.folium_map(
    ip_column="IpAddress",
    layer_column="CountryName",
    tooltip_columns=["Status", "Flavor"],
    popup_columns=["Friendliness", "SpiceLevel", "Status", "Flavor"],
    zoom_start=2,
)


# In[111]:


icon_map = {
    "US": {
        "color": "green",
        "icon": "flash",
    },
    "GB": {
        "color": "purple",
        "icon": "flash",
    },
    "default": {
        "color": "blue",
        "icon": "info-sign",
    },
}

geo_loc_df.mp_plot.folium_map(
    ip_column="AllExtIPs",
    icon_column="CountryCode",
    icon_map=icon_map,
    zoom_start=2,
)


# ## Sentinel Workspace Configuration

# In[112]:


mp.MpConfigEdit()


# ## Pivot Support for multiple providers
# 
# - No need to import or initialize Pivot
# - Cleaned up TI pivots
# - Removed conflicting "shortcut" data query functions
# - Data-provider pivot functions added dynamically on "connect"

# In[113]:


mp.pivot.browse()


# In[ ]:


qry_prov = mp.QueryProvider("MSSentinel")
qry_prov2 = mp.QueryProvider("MSSentinel")

qry_prov.connect(workspace="Default")


# In[ ]:


mp.pivot.browse()


# In[ ]:


qry_prov.connect(workspace="CyberSecuritySOC")


# In[ ]:


mp.pivot.browse()


# In[ ]:


# In[ ]:


qry_prov.WindowsSecurity.list_host_logons() -> Host.MSSentinel.wevt_logons()


# ### Pivot settings
# 
# - UseV1QueryNames - use query naming pattern used in V1
# - UseQueryFamily - use the "DataFamily" name defined in the query template
#   rather than the table name (or shortened mnemonic)
#   ```python
#   qry_prov.WindowsSecurity.list_host_logons() -> Host.MSSentinel.wevt_logons()
#   # with UseQueryFamily: True
#   qry_prov.WindowsSecurity.list_host_logons() -> Host.MSSentinel.WindowsSecurity_logons()
#   ```
# - UseQueryProviderTimeSpans - by default, all query providers use the pivot timespan
#   ```python
#   mp.pivot.timespan
#   ```
#   Setting this to True lets queries from different providers use the timespans
#   set for each provider.
# 
# 
# ```yaml
#     ....
#     Pivots:
#         UseV1QueryNames: False
#         UseQueryFamily: False
#         UseQueryProviderTimeSpans: False
# 
# ```

# ## Restructure

# 
# | folder    | description                                                                      | previously         |
# |-----------|----------------------------------------------------------------------------------|--------------------|
# | analysis  | Data analysis functions - timeseries, anomalies, clustering                      | analysis           |
# | auth      | authentication and secrets management                                            | common             |
# | common    | common used utilities and definitions (e.g. exceptions)                          | -                  |
# | config    | configuration and settings                                                       | -                  |
# | context   | enrichment modules some modules may need subfolders - e.g. tiproviders, vtlookup | sectools           |
# | data      | data acquisition/queries (most Azure/Sentinel funcs moved to context)            | -                  |
# | datamodel | entities, soc, pivot core functions                                              | -                  |
# | init      | package loading and initialization - nbinit, pivot creation modules              | nbtools, datamodel |
# | transform | simple data processing - decoding, reformatting, schema change, process tree     | sectools           |
# | vis       | visualization modules including browsers                                         | nbtools            |
# | nbwidgets | nb widgets modules                                                               | nbtools/nbwidgets  |
# 
# `sectools` and `nbtools` still exist but are mostly redirector modules.
# E.g.
# ```python
# from msticpy.sectools.geoip import GeoLiteLookup
# ```
# still works but has a deprecation warning.