#!/usr/bin/env python # coding: utf-8 # # Title: msticpy - GeoIP Lookup # # ## Introduction # This module contains two classes that allow you to look up the Geolocation of IP Addresses. # # You must have msticpy installed to run this notebook: # ``` # %pip install --upgrade msticpy # ``` # # # ### MaxMind GeoIPLite # This product includes GeoLite2 data created by MaxMind, available from # https://www.maxmind.com. # # This uses a local database which is downloaded first time when class object is instantiated. It gives very fast lookups but you need to download updates regularly. Maxmind offers a free tier of this database, updated monthly. For greater accuracy and more detailed information they have varying levels of paid service. Please check out their site for more details. # # The geoip module uses official maxmind pypi package - geoip2 and also has options to customize the behavior of local maxmind database. # * ```db_folder``` : Specify custom path containing local maxmind city database. If not specified, download to .msticpy dir under user\`s home dir. # * ```force_update``` : can be set to True/False to issue force update despite of age check. # * Check age of maxmind city database based on database info and download new if it is not updated in last 30 days. # * ``auto_update``` : can be set to True/False Allow option to override auto update database if user is desired not to update database older than 30 days. # # ### IPStack # This library uses services provided by ipstack. # https://ipstack.com # # IPStack is an online service and also offers a free tier of their service. Again, the paid tiers offer greater accuracy, more detailed information and higher throughput. Please check out their site for more details. # # # # ## Table of Contents # - [Maxmind GeoIP Lookup](#geoip_lookups) # - [IPStack GeoIP Lookup](#ipstack_lookups) # - [Dataframe input](#dataframe_input) # - [Creating your own GeoIP Class](#custom_lookup) # - [Calculating Geographical Distances](#calc_distance) # In[1]: # Imports import sys MIN_REQ_PYTHON = (3,6) if sys.version_info < MIN_REQ_PYTHON: print('Check the Kernel->Change Kernel menu and ensure that Python 3.6') print('or later is selected as the active kernel.') sys.exit("Python %s.%s or later is required.\n" % MIN_REQ_PYTHON) from IPython.display import display import pandas as pd import msticpy from msticpy.context.geoip import GeoLiteLookup, IPStackLookup # [Contents](#contents) # ## Maxmind GeoIP Lite Lookup Class # Signature: # ``` # iplocation.lookup_ip(ip_address: str = None, # ip_addr_list: collections.abc.Iterable = None, # ip_entity: msticpy.nbtools.entityschema.IpAddress = None) # Docstring: # Lookup IP location from GeoLite2 data created by MaxMind. # # Keyword Arguments: # ip_address {str} -- a single address to look up (default: {None}) # ip_addr_list {Iterable} -- a collection of addresses to lookup (default: {None}) # ip_entity {IpAddress} -- an IpAddress entity # # Returns: # tuple(list{dict}, list{entity}) -- returns raw geolocation results and # same results as IP/Geolocation entities # ``` # In[2]: iplocation = GeoLiteLookup() loc_result, ip_entity = iplocation.lookup_ip(ip_address='90.156.201.97') print('Raw result') display(loc_result) print('IP Address Entity') display(ip_entity[0]) # In[3]: import tempfile from pathlib import Path tmp_folder = tempfile.gettempdir() iplocation = GeoLiteLookup(db_folder=str(Path(tmp_folder).joinpath('geolite'))) loc_result, ip_entity = iplocation.lookup_ip(ip_address='90.156.201.97') print('Raw result') display(loc_result) print('IP Address Entity') display(ip_entity[0]) # In[4]: iplocation = GeoLiteLookup(force_update=True) loc_result, ip_entity = iplocation.lookup_ip(ip_address='90.156.201.97') print('Raw result') display(loc_result) print('IP Address Entity') display(ip_entity[0]) # In[5]: iplocation = GeoLiteLookup(auto_update=False) loc_result, ip_entity = iplocation.lookup_ip(ip_address='90.156.201.97') print('Raw result') display(loc_result) print('IP Address Entity') display(ip_entity[0]) # In[6]: import socket socket_info = socket.getaddrinfo("pypi.org",0,0,0,0) ips = [res[4][0] for res in socket_info] print(ips) _, ip_entities = iplocation.lookup_ip(ip_addr_list=ips) display(ip_entities) # [Contents](#contents) # ## IPStack Geo-lookup Class # # #### Class Initialization # # Note - requires IPStack API Key, Optional parameter bulk_lookup allows multiple IPs in a single request. This is only available with the paid Professional tier and above. # ``` # Init signature: IPStackLookup(api_key: str, bulk_lookup: bool = False) # Docstring: # GeoIP Lookup using IPStack web service. # # Raises: # ConnectionError -- Invalid status returned from http request # PermissionError -- Service refused request (e.g. requesting batch of addresses # on free tier API key) # Init docstring: # Create a new instance of IPStackLookup. # # Arguments: # api_key {str} -- API Key from IPStack - see https://ipstack.com # bulk_lookup {bool} -- For Professional and above tiers allowing you to # submit multiple IPs in a single request. # # ``` # # #### lookup_ip method # ``` # Signature: # iplocation.lookup_ip( # ['ip_address: str = None', 'ip_addr_list: collections.abc.Iterable = None', 'ip_entity: msticpy.nbtools.entityschema.IpAddress = None'], # ) -> tuple # Docstring: # Lookup IP location from IPStack web service. # # Keyword Arguments: # ip_address {str} -- a single address to look up (default: {None}) # ip_addr_list {Iterable} -- a collection of addresses to lookup (default: {None}) # ip_entity {IpAddress} -- an IpAddress entity # # Raises: # ConnectionError -- Invalid status returned from http request # PermissionError -- Service refused request (e.g. requesting batch of addresses # on free tier API key) # # Returns: # tuple(list{dict}, list{entity}) -- returns raw geolocation results and # same results as IP/Geolocation entities # ``` # [Contents](#contents) # ### You will need a IPStack API key # You will get more detailed results and a higher throughput allowance if you have a paid tier. See IPStack website for more details # In[7]: from msticpy.common.provider_settings import get_provider_settings iplocation = IPStackLookup() # Enter your IPStack Key here (if not set in msticpyconfig.yaml) ips_key = msticpy.nbwidgets.GetEnvironmentKey(env_var='IPSTACK_AUTH', help_str='To obtain an API key sign up here https://www.ipstack.com/', prompt='IPStack API key:' ) ipstack_settings = get_provider_settings(config_section="OtherProviders").get("IPStack") if not ipstack_settings: ips_key.display() # In[8]: import os if not ipstack_settings and not ips_key.value: raise ValueError("No Authentication key in config/environment or supplied by user.") if ips_key.value: iplocation = IPStackLookup(api_key=ips_key.value) if "MSTICPY_SKIP_IPSTACK_TEST" not in os.environ: loc_result, ip_entity = iplocation.lookup_ip(ip_address='90.156.201.97') print('Raw result') display(loc_result) if ip_entity: print('IP Address Entity') display(ip_entity[0]) else: print("No result returned") # In[9]: if "MSTICPY_SKIP_IPSTACK_TEST" not in os.environ: loc_result, ip_entities = iplocation.lookup_ip(ip_addr_list=ips) print('Raw results') display(loc_result) print('IP Address Entities') display(ip_entities) # [Contents](#contents) # ## Taking input from a pandas DataFrame # # The base class for both implementations has a method that sources the ip addresses from a dataframe column and returns a new dataframe with the location information merged with the input frame # ``` # Signature: iplocation.df_lookup_ip(data: pandas.core.frame.DataFrame, column: str) # Docstring: # Lookup Geolocation data from a pandas Dataframe. # # Keyword Arguments: # data {pd.DataFrame} -- pandas dataframe containing IpAddress column # column {str} -- the name of the dataframe column to use as a source # ``` # In[10]: import pandas as pd netflow_df = pd.read_csv("data/az_net_flows.csv") netflow_df = netflow_df[["AllExtIPs"]].drop_duplicates() iplocation = GeoLiteLookup() iplocation.df_lookup_ip(netflow_df, column="AllExtIPs") # [Contents](#contents) # ## Creating a Custom GeopIP Lookup Class # # You can derive a class that implements the same operations to use with a different GeoIP service. # # The class signature is as follows: # ``` # class GeoIpLookup(ABC): # """Abstract base class for GeoIP Lookup classes.""" # # @abstractmethod # def lookup_ip(self, ip_address: str = None, ip_addr_list: Iterable = None, # ip_entity: IpAddress = None): # """ # Lookup IP location. # # Keyword Arguments: # ip_address {str} -- a single address to look up (default: {None}) # ip_addr_list {Iterable} -- a collection of addresses to lookup (default: {None}) # ip_entity {IpAddress} -- an IpAddress entity # # Returns: # tuple(list{dict}, list{entity}) -- returns raw geolocation results and # same results as IP/Geolocation entities # # """ # ``` # You should override the lookup_ip method implementing your own method of geoip lookup. # [Contents](#contents) # ## Calculating Geographical Distances # # Use the geo_distance function from msticpy.sectools.geoip to calculated distances between two locations. # I am indebted to Martin Thoma who posted this solution (which I've modified slightly) on Stackoverflow. # # # ``` # Signature: geo_distance(origin: Tuple[float, float], destination: Tuple[float, float]) -> float # Docstring: # Calculate the Haversine distance. # # Author: Martin Thoma - stackoverflow # # Parameters # ---------- # origin : tuple of float # (lat, long) # destination : tuple of float # (lat, long) # # Returns # ------- # distance_in_km : float # ``` # # # Or where you have source and destination IpAddress entities, you can use the wrapper entity_distance. # ``` # Signature: # entity_distance( # ['ip_src: msticpy.nbtools.entityschema.IpAddress', 'ip_dest: msticpy.nbtools.entityschema.IpAddress'], # ) -> float # Docstring: # Return distance between two IP Entities. # # Arguments: # ip_src {IpAddress} -- Source IpAddress Entity # ip_dest {IpAddress} -- Destination IpAddress Entity # # Raises: # AttributeError -- if either entity has no location information # # Returns: # float -- Distance in kilometers. # ``` # In[11]: from msticpy.sectools.geoip import geo_distance _, ip_entity1 = iplocation.lookup_ip(ip_address='90.156.201.97') _, ip_entity2 = iplocation.lookup_ip(ip_address='151.101.64.223') print(ip_entity1[0]) print(ip_entity2[0]) dist = geo_distance(origin=(ip_entity1[0].Location.Latitude, ip_entity1[0].Location.Longitude), destination=(ip_entity2[0].Location.Latitude, ip_entity2[0].Location.Longitude)) print(f'\nDistance between IP Locations = {round(dist, 1)}km')