#!/usr/bin/env python
# coding: utf-8
#
#
# Measurement Research within the Python3 Ecosystem
# ============================================
#
# ### [Steffie Jacob Eravuchira](mailto: s.eravuchira@jacobs-university.de) , [Vaibhav Bajpai](mailto: v.bajpai@jacobs-university.de) , [Jürgen Schönwälder](mailto: j.schoenwaelder@jacobs-university.de)
#
# School of Engineering and Sciences,
# Campus Ring 1, Jacobs University Bremen,
# Bremen 28759
# Bremen, Germany
#
#
#
# ***
#
# This notebook is available online at: https://github.com/vbajpai/ripe69-python3-toolset
#
#
# Table of Contents
# ----------------
#
# * [Fetching Multi-Dimensional Data](#fetching-multidim-data)
# * [Frictionless SQL Storage](#frictionless-sql-storage)
# * [Frictionless Data Retrieval](#frictionless-data-retrieval)
# * [Data Analysis](#data-analysis)
# * [Data Visualisation](#data-visualization)
#
#
# ## Fetching Multi-Dimensional Data
#
# * [`requests`](#requests)
# * [`requests + atlas.ripe.net`](#requests-ripeatlas)
# * [`requests + stat.ripe.net`](#requests-ripestat)
#
#
#
# ### [`requests` →](http://docs.python-requests.org)
# In[1]:
import requests
# #### `get_json_resource_from_absolute_uri(...):`
# - Takes an absolute URI to a JSON resource and a dictionary of query parameters
# - Performs HTTP GET request on the absolute URI to fetch the JSON resource.
# - Returns the JSON resource as a dictionary
# In[2]:
def get_json_resource_from_absolute_uri(url, query_params):
try: res = requests.get(url, params = query_params)
except Exception as e: print(e, file=sys.stderr)
else:
try: res_json = res.json()
except Exception as e: print(e, file=sys.stderr)
else:
return res_json
# #### Example Usage:
# In[3]:
import json
url = 'https://en.wikipedia.org/w/api.php'
query_params = {'action': 'query', 'titles': 'Albert_Einstein', 'prop': 'info', 'format': 'json', 'rawcontinue':'true'}
res = get_json_resource_from_absolute_uri(url, query_params)
if res: print(json.dumps(res,indent = 4))
# - - -
#
#
# ### `requests` + [`atlas.ripe.net` →](http://atlas.ripe.net)
# In[4]:
import requests
# #### `get_count_registered_atlas_probes(...):`
# - Returns the count of registered probes using the [RIPE Atlas probe API →](https://atlas.ripe.net/docs/rest/#probe)
# In[5]:
def get_count_registered_atlas_probes():
base_uri = 'https://atlas.ripe.net'; url = '%s/api/v1/probe'%base_uri
try: res = get_json_resource_from_absolute_uri(url, None)
except Exception as e: print(e, file=sys.stderr)
else:
try: total_registered = res['meta']['total_count']
except Exception as e: print(e, file=sys.stderr)
else: return total_registered
# #### Example Usage:
# In[6]:
count = get_count_registered_atlas_probes()
if count: print("# RIPE Atlas Registered Probes: %d"%count)
# - - -
#
# #### `get_count_connected_atlas_probes(...):`
# - Returns the count of connected probes using the [RIPE Atlas probe API →](https://atlas.ripe.net/docs/rest/#probe)
# In[7]:
def get_count_connected_atlas_probes():
base_uri = 'https://atlas.ripe.net'; url = '%s/api/v1/probe'%base_uri
offset = 0; limit = 100; connected_objects = 0
registered_count = get_count_registered_atlas_probes()
if not registered_count: return None
while (offset <= registered_count):
query_params = {'offset': '%d'%offset, 'limit': '%d'%limit}
try: res = get_json_resource_from_absolute_uri(url, query_params)
except Exception as e: print(e, file=sys.stderr); return None
try: objects = res['objects']
except Exception as e: print(e, file=sys.stderr); return None
for object in objects:
if object['status'] == 1: connected_objects += 1
print('.', end='')
offset = offset + limit
print('')
return connected_objects
# #### Example Usage:
# In[8]:
count = get_count_connected_atlas_probes()
if count: print("# RIPE Atlas Connected probes: %d"%count)
# - - -
#
#
# ###`requests +` [`stat.ripe.net` →](https://stat.ripe.net)
# In[9]:
import requests
# #### `get_holder_from_asn(...)`
# - Takes ASN as input
# - Fetch the holder organization name associated with the ASN using the [RIPEstat Data API →](https://stat.ripe.net/docs/data_api)
# - Returns the fetched organization name associated with the ASN.
# In[10]:
def get_holder_from_asn(asn):
base_uri = 'https://stat.ripe.net'; url = '%s/data/as-overview/data.json'%base_uri
params = {'resource' : asn}
try: res = get_json_resource_from_absolute_uri(url, params)
except Exception as e: print(e, file=sys.stderr)
try: holder = res['data']['holder']
except Exception as e: print(e, file=sys.stderr)
return holder
# #### Example:
# In[11]:
asn = '15169'
holder = get_holder_from_asn(asn)
print('%s => %s'%(asn, holder))
# - - -
#
# #### `get_asn_from_endpoint(...):`
#
# - Takes an IP endpoint as input
# - Using the [RIPEstat Data API →](https://stat.ripe.net/docs/data_api)
# - Calculates the 1st-level less-specific prefix encompassing the IP endpoint.
# - Fetches the ASN (`ASN`) announcing the prefix.
# - Fetches the holder organization name (`holder`) associated with the ASN.
# - Returns a list of `(ASN, holder)` tuple.
# In[12]:
def get_asn_from_endpoint(endpoint):
asn = holder = None
base_uri = 'https://stat.ripe.net'; url = '%s/data/prefix-overview/data.json'%base_uri
params = {'resource' : endpoint}
try: res = get_json_resource_from_absolute_uri(url, params)
except Exception as e: print(e, file=sys.stderr); return None
try:
asns_holders = []
for item in res['data']['asns']:
asn = item['asn']; holder = item['holder']
asns_holders.append((asn, holder))
except Exception as e: print(e, file=sys.stderr)
return asns_holders
# #### Example:
# In[13]:
ep4 = '8.8.8.8'
ep6 = '2001:4860:4860::8888'
asns_holders_v4 = get_asn_from_endpoint(ep4)
asns_holders_v6 = get_asn_from_endpoint(ep6)
print('%s'%ep4)
for asn, holder in asns_holders_v4: print('=> %d, %s'%(asn, holder))
print('\n%s'%ep6)
for asn, holder in asns_holders_v6: print('=> %d, %s'%(asn, holder))
# - - -
#
# #### `create_pretty_node_names(...)`
# - Takes ASN and associated holder organization name as input
# - Returns the pretty-printed form of input
# In[14]:
def create_pretty_node_names(asn, holder):
firstname = holder.split('-')[0].split(' ')[0]
if 'AS' in str(asn): nodename = '%s (%s)'%(firstname, asn)
else: nodename = '%s (AS%s)'%(firstname, asn)
return nodename
# #### Example:
# In[15]:
node = create_pretty_node_names(asn, holder)
print(node)
# - - -
# [Back to Top](#top)
#
# ## Frictionless SQL storage
#
# * [`pandas + DataFrame`](#pandas-dataframe)
# * [`pandas + to_sql`](#pandas-to-sql)
#
#
#
# ### [`pandas` →](http://pandas.pydata.org/) + `DataFrame`
# In[16]:
import pandas as pd
# #### `pandas.DataFrame(...)`
# - We use the [RIPE Atlas probe archive API →](https://atlas.ripe.net/docs/rest/#probe-archive)
# - We only show few probe API fields for brevity reasons.
# - The probe ID is set as the index of the `DataFrame`.
# In[17]:
url = 'https://atlas.ripe.net/api/v1/probe-archive/'
res = get_json_resource_from_absolute_uri(url, {'format': json})
df = pd.DataFrame(res['objects'])
df[['id', 'asn_v4', 'asn_v6', 'country_code', 'status_name', 'tags']].head()
# - - -
#
# ### `pandas` + `to_sql(...)`
# ####`sqlite3.connect(...)` function:
#
# Connects to a `sqlite3` database:
# - Creates the database if it does not exist, or
# - Connects to the databse if it exists
# In[18]:
import sqlite3
DBNAME = 'ripe69-toolset.db'
con = sqlite3.connect(DBNAME)
# We need to unbox DataFrame columns of a object datatype into a string representation to allow insertion into a SQL table
# In[19]:
for c in df.columns:
if df[c].dtype == object: df[c] = df[c].astype(str)
# In[20]:
df = df.set_index('id')
# ####`DataFrame.to_sql(...)`:
#
# Write DataFrame records to a SQL database table:
# - Creates the table (with the supplied table name) if it does not exit.
# - Overwrites if the table (with the suppleid table name) exists.
# - Inserts the records within a DataFrame into the table.
# In[21]:
try: cur = con.execute('pragma foreign_keys=ON')
except Exception as e: print(e, file=sys.stderr)
else:
TABLENAME = 'ra_probe_api_2014'
try:
df.to_sql( '%s'%TABLENAME
, con
, flavor='sqlite'
, if_exists = 'replace'
, index_label = 'id'
)
except Exception as e: print(e, file=sys.stderr)
con.commit()
# - - -
# [Back to Top](#top)
#
# ## Frictionless Data Retrieval
#
# ### `pandas` + `read(...)`
# ####`pd.read(...)` function:
# - Takes a SQL connection object and a SQL query as input.
# - Applies the SQL query on the connected database and returns a `pandas DataFrame`
# In[22]:
query = '''select id, asn_v4, asn_v6, country_code, status_name, tags from %s'''%TABLENAME
df = pd.read_sql(query, con)
df.head()
# - - -
# [Back to Top](#top)
#
# ## Data Analysis
#
# ### `python3 ipaddress`
# In[23]:
import ipaddress
# ####`ipaddress.ip_address(...)` :
# Creates an IPv4/ipv6 address object from the input string
# In[24]:
try:
myipv4 = ipaddress.ip_address(ep4)
myipv6 = ipaddress.ip_address(ep6)
except Exception as e: print(e, file=sys.stderr)
else:
print('%s => %s'%(myipv4, type(myipv4)))
print('%s => %s'%(myipv6, type(myipv6)))
# ####`ipaddress.is_private`:
#
# - Returns `True` if IP endpoint is Private [RFC1918 →](https://tools.ietf.org/html/rfc1918)
# - Returns `False` if IP endpoint is not Private [RFC1918 →](https://tools.ietf.org/html/rfc1918)
# In[25]:
print('Is %s Private [RFC1918]? => %s'%(myipv4, myipv4.is_private))
# ####`ipaddress.version`:
#
# - Returns 4 if IP endpoint is IPv4.
# - Returns 6 if IP endpoint is IPv6.
#
# In[26]:
print("%s => IPv%s"%(myipv4, myipv4.version))
print("%s => IPv%s"%(myipv6,myipv6.version))
# - - -
# [Back to Top](#top)
#
# ## Data Visualization
#
#
# ### `pandas + matplotlib`
#
# - Uses `Pandas.read_sql(...)` to read the probe ID and ASN v4 from the sqlite3 table (dump of the probe API data).
# - Uses `DataFrame.groupby(...)` to group the probe IDs by ASN v4.
# - Uses `DataFrameGroupby.agg(...)` to aggregate by counting the number of probes behind each ASN v4.
# - Uses `DataFrame.sort(...)` to sort the ASN v4 aggregates by number of probes.
# In[27]:
query = '''select id, asn_v4 from %s'''%TABLENAME
df = pd.read_sql(query, con)
dfgroupby = df.groupby('asn_v4')
dfagg = dfgroupby.agg(len)
dfsort = dfagg.sort('id', ascending=False)
df = dfsort.reset_index().reset_index()
df['index'] = df['index'].apply(lambda x: x+1)
# - Uses `DataFrame.plot(...)` to plot the distribution of number of probes by ASN index (designated by the number of deployed probes).
# In[29]:
get_ipython().run_line_magic('pylab', 'inline')
ax = df.plot('index', 'id', logx = True, kind = 'area', legend = False, figsize = (12,8))
ax.set_ylabel("# of Registered RIPE Atlas probes", fontsize=18)
ax.set_xlabel("ASN index (log scale)", fontsize=18)
# [Back to Top](#top)
#
#
# ### Work Supported by Leone Project: [leone-project.eu →](http://leone-project.eu/) ###