#!/usr/bin/env python # coding: utf-8 # # msticpy Threat Intel Lookup # This notebook describes the use of the Threat Intelligence lookup class in msticpy. # The class allows lookup of individual or multiple IoCs from one or more TI providers. # # TILookup is also extensible - you can subclass TIProvider to implement your own custom lookups. You can also subclass the HTTPProvider or KqlProvider classes, which provide support for querying a REST endpoint or Log Analytics table respectively. #

Table of Contents

#
# In[1]: # Imports import sys import warnings from msticpy.common.utility import check_py_version MIN_REQ_PYTHON = (3,6) check_py_version(MIN_REQ_PYTHON) from msticpy import init_notebook init_notebook(namespace=globals()); # # TILookup class # Input can be a single IoC observable or a pandas DataFrame containing # multiple observables. Processing may require a an API key and # processing performance may be limited to a specific number of # requests per minute for the account type that you have. # In[2]: # TILookup class display(Markdown("### Constructor\n")) print(TILookup.__init__.__doc__) display(Markdown("### Attributes\n")) for name in [att for att in dir(TILookup) if not att.startswith("_")]: display(Markdown(f"#### _{name}()_")) print(getattr(TILookup, name).__doc__) print() # ## Available Providers # The **msticpy** TI Provider library can lookup IoCs in multiple providers. # # The list below shows the current set of providers. # In[3]: TILookup.list_available_providers() # You can view the list of supported query types for each provider with the `show_query_types=True` parameter # In[4]: TILookup.list_available_providers(show_query_types=True) # ## Loading TIProviders # # Calling TILookup with no parameters will load all of the available providers # that have a configuration entry in `msticpyconfig.yaml` (see next section) # In[5]: # load all configured providers ti_lookup = TILookup() ti_lookup.provider_status # Restricting which providers get loaded #ti_lookup = TILookup(providers=["VirusTotal", "XForce"]) # ## Configuration File # You can configure primary and secondary providers. # Primary providers are used by default. # # You may need to supply an authorization (API) key and in some cases a user ID for each provider. # # For LogAnalytics/Azure Sentinel providers, you will need the workspace ID and tenant ID and will need to authenticate in order to access the data (although if you have an existing authenticated connection with the same workspace/tenant, this connection will be re-used). # # The configuration file is read from the current directory. # # Alternatively, you can specify a location for this file in an environment variable `MSTICPYCONFIG`. # # If you need to create a config file, uncomment the lines in the following cell.
# ### Warning - this will overwrite a file of the same name in the current directory # # Delete any provider entries that you do not want to use and add the missing parameters for your providers. # In[6]: # %%writefile msticpyconfig.yaml # QueryDefinitions: # TIProviders: # OTX: # Args: # AuthKey: "your-otx-key" # Primary: True # Provider: "OTX" # Explicitly name provider to override # VirusTotal: # Args: # AuthKey: "your-vt-key" # Primary: True # Provider: "VirusTotal" # XForce: # Args: # ApiID: "your-xforce-id" # AuthKey: "your-xforce-key" # Primary: True # Provider: "XForce" # AzureSentinel: # # Note if you do not specify any settings in the Args key for the AzureSentinel # # provider, it will default to using your default Azure Sentinel workspace. # Args: # WorkspaceID: "your-azure-sentinel-workspace-id" # TenantID: "your-azure-sentinel-tenant-id" # Primary: True # Provider: "AzSTI" # Reload providers to pick up new settings # In[7]: ti_lookup.reload_providers() ti_lookup.provider_status # ## Looking up IoCs # ### lookup_ioc # To lookup a single IoC. # ``` # ti_lookup.lookup_ioc( # observable: str = None, # ioc_type: str = None, # ioc_query_type: str = None, # providers: List[str] = None, # prov_scope: str = 'primary', # **kwargs, # ) -> Tuple[bool, List[Tuple[str, msticpy.sectools.tiproviders.ti_provider_base.LookupResult]]] # # Lookup single IoC in active providers. # # Parameters # ---------- # observable : str # IoC observable # (`ioc` is also an alias for observable) # ioc_type : str, optional # One of IoCExtract.IoCType, by default None # If none, the IoC type will be inferred # ioc_query_type: str, optional # The ioc query type (e.g. rep, info, malware) # providers: List[str] # Explicit list of providers to use # prov_scope : str, optional # Use primary, secondary or all providers, by default "primary" # kwargs : # Additional arguments passed to the underlying provider(s) # # Returns # ------- # Tuple[bool, List[Tuple[str, LookupResult]]] # The result returned as a tuple(bool, list): # bool indicates whether a TI record was found in any provider # list has an entry for each provider result # ``` # In[8]: # Uncomment this and run to see the document string # ti_lookup.lookup_ioc? # ### Lookup an IoC from a single provider # And show the output # In[9]: result = ti_lookup.lookup_ioc(observable="52.183.120.194", providers=["AzSTI", "XForce"]) ti_lookup.result_to_df(result) # In[10]: result = ti_lookup.lookup_ioc(observable="52.183.120.194") ti_lookup.result_to_df(result).T # In[11]: import pprint pp = pprint.PrettyPrinter(indent=2) result, details = ti_lookup.lookup_ioc(observable="38.75.137.9", providers=["OTX"]) # the details is a list (since there could be multiple responses for an IoC) for provider, detail in details: print(provider) detail.summary # Un-comment to view raw response # print("\nRaw Results") # pp.pprint(detail.raw_result) # #### Or convert result to a DataFrame and let pandas do the display work... # In[12]: result = ti_lookup.lookup_ioc(observable="38.75.137.9", providers=["OTX"]) ti_lookup.result_to_df(result).T # In[13]: # Extract a single field (RawResult) from the dataframe (.iloc[0] is to select the row) ti_lookup.result_to_df(result)["RawResult"].iloc[0] # ## Lookup using all primary providers # In[14]: result = ti_lookup.lookup_ioc(observable="188.127.231.124") ti_lookup.result_to_df(result) # ## Provider Usage # This shows the supported IoC Types. # # In some cases an IoC type will also support special types of sub-query such as geo-ip and passive-dns # In[15]: display(ti_lookup.provider_status) ti_lookup.loaded_providers["AzSTI"].usage() # In[16]: ti_lookup.provider_usage() # ### Use to do a passive DNS lookup # In[17]: result = ti_lookup.lookup_ioc(observable="38.75.137.9", ico_type="ipv4", ioc_query_type="passivedns", providers=["XForce"]) print(result) print("\nProvider result:") result[1][0][1].raw_result # ### Use to do a GeoIP lookup # In[18]: result = ti_lookup.lookup_ioc(observable="38.75.137.9", ico_type="ipv4", ioc_query_type="geo", providers=["OTX"]) print(result) print("\nProvider result:") result[1][0][1].raw_result # ## Inferring IoC Type vs. Specifying explicity # If you do a lookup without specifying a type, TILookup will try to infer the type by matching regexes. There are patterns for all supported types but there are some caveats: # # - The match is not 100% foolproof - e.g. some URLs and hash types may be misidentified. # - The inference adds an overhead to each lookup. # # If you know the type that you want to look up, it is always better to explicitly include it. # - For single IoC lookup, use the `ioc_type` parameter. # - For multiple IoC lookups (see below), supply either: # - a DataFrame with a column that specifies the type for each entry # - a dictionary of the form `{ioc_observable: ioc_type}` # ## Looking up Multiple IoCs # ### lookup_iocs # ``` # Signature: # ti_lookup.lookup_iocs( # data: Union[pandas.core.frame.DataFrame, Mapping[str, str], Iterable[str]], # obs_col: str = None, # ioc_type_col: str = None, # ioc_query_type: str = None, # providers: List[str] = None, # prov_scope: str = 'primary', # **kwargs, # ) -> pandas.core.frame.DataFrame # # Lookup a collection of IoCs. # # Parameters # ---------- # data : Union[pd.DataFrame, Mapping[str, str], Iterable[str]] # Data input in one of three formats: # 1. Pandas dataframe (you must supply the column name in # `obs_col` parameter) # 2. Mapping (e.g. a dict) of [observable, IoCType] # 3. Iterable of observables - IoCTypes will be inferred # obs_col : str, optional # DataFrame column to use for observables, by default None # ioc_type_col : str, optional # DataFrame column to use for IoCTypes, by default None # ioc_query_type: str, optional # The ioc query type (e.g. rep, info, malware) # providers: List[str] # Explicit list of providers to use # prov_scope : str, optional # Use primary, secondary or all providers, by default "primary" # kwargs : # Additional arguments passed to the underlying provider(s) # # Returns # ------- # pd.DataFrame # DataFrame of results # ``` # In[19]: # Uncomment this and run to see the document string # ti_lookup.lookup_iocs? # ### Multiple IP Lookup from single provider # In[20]: ioc_ips = [ "51.75.29.61", "33.44.55.66" "52.183.120.194", "13.91.229.209", "1.2.3.4", "52.167.223.49", "1.2.3.5", ] ti_lookup.lookup_iocs(data=ioc_ips, providers="AzSTI") # ### Multiple IoCs using all providers # Output sorted by IoC # # Note that these URLs were picked randomly from the TI databases of the three providers used. In most cases the IoC is found by only that provider, which # In[21]: ioc_urls = [ "http://cheapshirts.us/zVnMrG.php", "http://chinasymbolic.com/i9jnrc", "https://hotel-bristol.lu/dlry/MAnJIPnY/", "http://businesstobuy.net", "http://append.pl/srh9xsz", "http://104.248.196.145/apache2", "http://ajaraheritage.ge/g7cberv", "http://cic-integration.com/hjy93JNBasdas", "https://google.com", # benign "https://microsoft.com", # benign "https://python.org", # benign ] results = ti_lookup.lookup_iocs(data=ioc_urls) results.sort_values("Ioc") # ### Multiple Mixed IoC Types # In[22]: ioc_mixed = [ "http://104.248.196.145/apache2", "http://ajaraheritage.ge/g7cberv", "http://cic-integration.com/hjy93JNBasdas", "51.75.29.61", "33.44.55.66", "52.183.120.194", "f8a7135496fd6168df5f0ea21c745db89ecea9accc29c5cf281cdf3145865092", "cc2db822f652ca67038ba7cca8a8bde3", "ajaraheritage.ge", ] results = ti_lookup.lookup_iocs(data=ioc_mixed) results # ## Browsing TI Results # To make it easier to walk through the returned results there is a browser. # This shows you results aggregated by the IoC value (e.g. an individual IP # Address or URL) for all providers. # # For each provider that returns a result for an IoC, the summarized details # will be shown in a table below the browse list. # Click on `Raw results from provider...` to see all returned data. # # > **Note**: the reference URL may not work if you have not authenticated # > to the service # # The value of the selected IoC entry is available as `ti_selector.value` # # You can match this back to the original results DataFrame as follows: # ``` # results[results["Ioc"] == ti_selector.value[0]] # ``` # In[31]: from msticpy.nbtools.ti_browser import browse_results ti_selector = browse_results(data=results, height="200px") ti_selector # ## Specifying Time Ranges # Some providers (currently only AzSTI) support time ranges so that you can specify specific periods to search for. # # If a provider does not support time ranges, the parameters will be ignored # In[ ]: from datetime import datetime search_origin = datetime(2019, 8, 5) q_times = nbwidgets.QueryTime(units="hour", auto_display=True, origin_time=search_origin, max_after=24, max_before=24) # In[ ]: # Using this data range returned no results ti_lookup.lookup_iocs(data=ioc_ips, providers="AzSTI", start=q_times.start, end=q_times.end).head() # In[ ]: from datetime import datetime search_origin = datetime(2019, 8, 5) q_times = nbwidgets.QueryTime(units="day", auto_display=True, origin_time=search_origin, max_after=24, max_before=24) # In[ ]: # Using a wider ranges produces results ti_lookup.lookup_iocs(data=ioc_ips, providers="AzSTI", start=q_times.start, end=q_times.end)