#!/usr/bin/env python
# coding: utf-8
# # Title: msticpy - GeoIP Lookup
#
# ## Introduction
# This module contains two classes that allow you to look up the Geolocation of IP Addresses.
#
# You must have msticpy installed to run this notebook:
# ```
# %pip install --upgrade msticpy
# ```
#
#
# ### MaxMind GeoIPLite
# This product includes GeoLite2 data created by MaxMind, available from
# https://www.maxmind.com.
#
# This uses a local database which is downloaded first time when class object is instantiated. It gives very fast lookups but you need to download updates regularly. Maxmind offers a free tier of this database, updated monthly. For greater accuracy and more detailed information they have varying levels of paid service. Please check out their site for more details.
#
# The geoip module uses official maxmind pypi package - geoip2 and also has options to customize the behavior of local maxmind database.
# * ```db_folder``` : Specify custom path containing local maxmind city database. If not specified, download to .msticpy dir under user\`s home dir.
# * ```force_update``` : can be set to True/False to issue force update despite of age check.
# * Check age of maxmind city database based on database info and download new if it is not updated in last 30 days.
# * ``auto_update``` : can be set to True/False Allow option to override auto update database if user is desired not to update database older than 30 days.
#
# ### IPStack
# This library uses services provided by ipstack.
# https://ipstack.com
#
# IPStack is an online service and also offers a free tier of their service. Again, the paid tiers offer greater accuracy, more detailed information and higher throughput. Please check out their site for more details.
#
#
#
# ## Table of Contents
# - [Maxmind GeoIP Lookup](#geoip_lookups)
# - [IPStack GeoIP Lookup](#ipstack_lookups)
# - [Dataframe input](#dataframe_input)
# - [Creating your own GeoIP Class](#custom_lookup)
# - [Calculating Geographical Distances](#calc_distance)
# In[12]:
# Imports
import sys
MIN_REQ_PYTHON = (3,6)
if sys.version_info < MIN_REQ_PYTHON:
print('Check the Kernel->Change Kernel menu and ensure that Python 3.6')
print('or later is selected as the active kernel.')
sys.exit("Python %s.%s or later is required.\n" % MIN_REQ_PYTHON)
from IPython.display import display
import pandas as pd
import msticpy.sectools as sectools
from msticpy.nbtools import *
from msticpy.nbtools.entityschema import IpAddress, GeoLocation
from msticpy.sectools.geoip import GeoLiteLookup, IPStackLookup
# [Contents](#contents)
# ## Maxmind GeoIP Lite Lookup Class
# Signature:
# ```
# iplocation.lookup_ip(ip_address: str = None,
# ip_addr_list: collections.abc.Iterable = None,
# ip_entity: msticpy.nbtools.entityschema.IpAddress = None)
# Docstring:
# Lookup IP location from GeoLite2 data created by MaxMind.
#
# Keyword Arguments:
# ip_address {str} -- a single address to look up (default: {None})
# ip_addr_list {Iterable} -- a collection of addresses to lookup (default: {None})
# ip_entity {IpAddress} -- an IpAddress entity
#
# Returns:
# tuple(list{dict}, list{entity}) -- returns raw geolocation results and
# same results as IP/Geolocation entities
# ```
# In[13]:
iplocation = GeoLiteLookup()
loc_result, ip_entity = iplocation.lookup_ip(ip_address='90.156.201.97')
print('Raw result')
display(loc_result)
print('IP Address Entity')
display(ip_entity[0])
# In[14]:
import tempfile
from pathlib import Path
tmp_folder = tempfile.gettempdir()
iplocation = GeoLiteLookup(db_folder=str(Path(tmp_folder).joinpath('geolite')))
loc_result, ip_entity = iplocation.lookup_ip(ip_address='90.156.201.97')
print('Raw result')
display(loc_result)
print('IP Address Entity')
display(ip_entity[0])
# In[15]:
iplocation = GeoLiteLookup(force_update=True)
loc_result, ip_entity = iplocation.lookup_ip(ip_address='90.156.201.97')
print('Raw result')
display(loc_result)
print('IP Address Entity')
display(ip_entity[0])
# In[5]:
iplocation = GeoLiteLookup(auto_update=False)
loc_result, ip_entity = iplocation.lookup_ip(ip_address='90.156.201.97')
print('Raw result')
display(loc_result)
print('IP Address Entity')
display(ip_entity[0])
# In[6]:
import socket
socket_info = socket.getaddrinfo("pypi.org",0,0,0,0)
ips = [res[4][0] for res in socket_info]
print(ips)
_, ip_entities = iplocation.lookup_ip(ip_addr_list=ips)
display(ip_entities)
# [Contents](#contents)
# ## IPStack Geo-lookup Class
#
# #### Class Initialization
#
# Note - requires IPStack API Key, Optional parameter bulk_lookup allows multiple IPs in a single request. This is only available with the paid Professional tier and above.
# ```
# Init signature: IPStackLookup(api_key: str, bulk_lookup: bool = False)
# Docstring:
# GeoIP Lookup using IPStack web service.
#
# Raises:
# ConnectionError -- Invalid status returned from http request
# PermissionError -- Service refused request (e.g. requesting batch of addresses
# on free tier API key)
# Init docstring:
# Create a new instance of IPStackLookup.
#
# Arguments:
# api_key {str} -- API Key from IPStack - see https://ipstack.com
# bulk_lookup {bool} -- For Professional and above tiers allowing you to
# submit multiple IPs in a single request.
#
# ```
#
# #### lookup_ip method
# ```
# Signature:
# iplocation.lookup_ip(
# ['ip_address: str = None', 'ip_addr_list: collections.abc.Iterable = None', 'ip_entity: msticpy.nbtools.entityschema.IpAddress = None'],
# ) -> tuple
# Docstring:
# Lookup IP location from IPStack web service.
#
# Keyword Arguments:
# ip_address {str} -- a single address to look up (default: {None})
# ip_addr_list {Iterable} -- a collection of addresses to lookup (default: {None})
# ip_entity {IpAddress} -- an IpAddress entity
#
# Raises:
# ConnectionError -- Invalid status returned from http request
# PermissionError -- Service refused request (e.g. requesting batch of addresses
# on free tier API key)
#
# Returns:
# tuple(list{dict}, list{entity}) -- returns raw geolocation results and
# same results as IP/Geolocation entities
# ```
# [Contents](#contents)
# ### You will need a IPStack API key
# You will get more detailed results and a higher throughput allowance if you have a paid tier. See IPStack website for more details
# In[7]:
iplocation = IPStackLookup()
# Enter your IPStack Key here (if not set in msticpyconfig.yaml)
ips_key = nbwidgets.GetEnvironmentKey(env_var='IPSTACK_AUTH',
help_str='To obtain an API key sign up here https://www.ipstack.com/',
prompt='IPStack API key:')
if not iplocation.settings.args.get("AuthKey"):
ips_key.display()
# In[8]:
import os
if not iplocation.settings.args.get("AuthKey") and not ips_key.value:
raise ValueError("No Authentication key in config/environment or supplied by user.")
if ips_key.value:
iplocation = IPStackLookup(api_key=ips_key.value)
if "MSTICPY_SKIP_IPSTACK_TEST" not in os.environ:
loc_result, ip_entity = iplocation.lookup_ip(ip_address='90.156.201.97')
print('Raw result')
display(loc_result)
print('IP Address Entity')
display(ip_entity[0])
# In[9]:
if "MSTICPY_SKIP_IPSTACK_TEST" not in os.environ:
loc_result, ip_entities = iplocation.lookup_ip(ip_addr_list=ips)
print('Raw results')
display(loc_result)
print('IP Address Entities')
display(ip_entities)
# [Contents](#contents)
# ## Taking input from a pandas DataFrame
#
# The base class for both implementations has a method that sources the ip addresses from a dataframe column and returns a new dataframe with the location information merged with the input frame
# ```
# Signature: iplocation.df_lookup_ip(data: pandas.core.frame.DataFrame, column: str)
# Docstring:
# Lookup Geolocation data from a pandas Dataframe.
#
# Keyword Arguments:
# data {pd.DataFrame} -- pandas dataframe containing IpAddress column
# column {str} -- the name of the dataframe column to use as a source
# ```
# In[10]:
import pandas as pd
netflow_df = pd.read_csv("data/az_net_flows.csv")
netflow_df = netflow_df[["AllExtIPs"]].drop_duplicates()
iplocation = GeoLiteLookup()
iplocation.df_lookup_ip(netflow_df, column="AllExtIPs")
# [Contents](#contents)
# ## Creating a Custom GeopIP Lookup Class
#
# You can derive a class that implements the same operations to use with a different GeoIP service.
#
# The class signature is as follows:
# ```
# class GeoIpLookup(ABC):
# """Abstract base class for GeoIP Lookup classes."""
#
# @abstractmethod
# def lookup_ip(self, ip_address: str = None, ip_addr_list: Iterable = None,
# ip_entity: IpAddress = None):
# """
# Lookup IP location.
#
# Keyword Arguments:
# ip_address {str} -- a single address to look up (default: {None})
# ip_addr_list {Iterable} -- a collection of addresses to lookup (default: {None})
# ip_entity {IpAddress} -- an IpAddress entity
#
# Returns:
# tuple(list{dict}, list{entity}) -- returns raw geolocation results and
# same results as IP/Geolocation entities
#
# """
# ```
# You should override the lookup_ip method implementing your own method of geoip lookup.
# [Contents](#contents)
# ## Calculating Geographical Distances
#
# Use the geo_distance function from msticpy.sectools.geoip to calculated distances between two locations.
# I am indebted to Martin Thoma who posted this solution (which I've modified slightly) on Stackoverflow.
#
#
# ```
# Signature: geo_distance(origin: Tuple[float, float], destination: Tuple[float, float]) -> float
# Docstring:
# Calculate the Haversine distance.
#
# Author: Martin Thoma - stackoverflow
#
# Parameters
# ----------
# origin : tuple of float
# (lat, long)
# destination : tuple of float
# (lat, long)
#
# Returns
# -------
# distance_in_km : float
# ```
#
#
# Or where you have source and destination IpAddress entities, you can use the wrapper entity_distance.
# ```
# Signature:
# entity_distance(
# ['ip_src: msticpy.nbtools.entityschema.IpAddress', 'ip_dest: msticpy.nbtools.entityschema.IpAddress'],
# ) -> float
# Docstring:
# Return distance between two IP Entities.
#
# Arguments:
# ip_src {IpAddress} -- Source IpAddress Entity
# ip_dest {IpAddress} -- Destination IpAddress Entity
#
# Raises:
# AttributeError -- if either entity has no location information
#
# Returns:
# float -- Distance in kilometers.
# ```
# In[11]:
from msticpy.sectools.geoip import geo_distance
_, ip_entity1 = iplocation.lookup_ip(ip_address='90.156.201.97')
_, ip_entity2 = iplocation.lookup_ip(ip_address='151.101.64.223')
print(ip_entity1[0])
print(ip_entity2[0])
dist = geo_distance(origin=(ip_entity1[0].Location.Latitude, ip_entity1[0].Location.Longitude),
destination=(ip_entity2[0].Location.Latitude, ip_entity2[0].Location.Longitude))
print(f'\nDistance between IP Locations = {round(dist, 1)}km')