ECN Spider data input

First, load prerequisites and configure things

In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import bz2
from ipaddress import ip_address

# Change this to point at the raw data files (they're not in the paper directory)
#DATAPATH="/Users/brian/work/ecn" # for thaleia

DATAPATH="/zt/ecn" # for maloja
TABLESDIR="/home/briant/work/pam2015/tables"
PLOTSDIR="/home/briant/work/pam2015"

Load the cc_df (ECN Spider) table, which has the following columns:

  • ip: index, IP address as string
  • rank: Alexa website rank
  • site: Website hostname
  • ip6: True if address is IPv6 (for convenience in splitting v4 and v6 analyses without string munging)
  • vantage-trial-e0: True if connection succeeded for trial at vantage without ECN negotiation attempt
  • vantage-trial-e1: True if connection succeeded for trial at vantage with ECN negotiation attempt
In [2]:
cc_df = pd.read_csv("../tables/cc_df.csv")
cc_df.index = cc_df["ip"]
del cc_df["ip"]

Derive some convenience columns from the raw data:

  • vantage-trial-eq: True if connection state is equal for ECN and no-ECN
  • vantage-trial-dep: True if connection without ECN, not with.
  • vantage-trial-odd: True if connection with ECN, not without.
  • all-eq: Connection state equal in all trials
  • e0-sum: Number of trials with successful connection without ECN
  • e1-sum: Number of trials with successful connection with ECN
  • eq-sum: Number of trials with equal connection state
  • dep-sum: Number of trials with ECN-dependent connection
  • odd-sum: Number of trials with connection with ECN, not without
  • all-conn: True if all connections (regardless of ECN) succeeded
  • no-conn: True if no connections succeeded
In [3]:
e0cols = []
e1cols = []
eqcols = []
depcols = []
oddcols = []

# Per-trial additional data
for vp in ["lon","nyc","sin"]:
    for trial in range(1,4):
        trial = str(trial)
        e0col = "-".join([vp,trial,"e0"])
        e1col = "-".join([vp,trial,"e1"])
        eqcol = "-".join([vp,trial,"eq"])
        depcol = "-".join([vp,trial,"dep"])
        oddcol = "-".join([vp,trial,"odd"])
        e0cols += [e0col]
        e1cols += [e1col]
        eqcols += [eqcol]
        depcols += [depcol]       
        oddcols += [oddcol]        
        
        cc_df[eqcol] = (cc_df[e0col] & cc_df[e1col]) | (~cc_df[e0col] & ~cc_df[e1col])
        cc_df[depcol] = cc_df[e0col] & ~cc_df[e1col]
        cc_df[oddcol] = ~cc_df[e0col] & cc_df[e1col]
        
# Sum columns
cc_df["all-eq"] = cc_df.loc[:,eqcols].all(axis=1)
cc_df["eq-sum"] = cc_df.loc[:,eqcols].sum(axis=1)
cc_df["e0-sum"] = cc_df.loc[:,e0cols].sum(axis=1)
cc_df["e1-sum"] = cc_df.loc[:,e1cols].sum(axis=1)
cc_df["dep-sum"] = cc_df.loc[:,depcols].sum(axis=1)
cc_df["odd-sum"] = cc_df.loc[:,oddcols].sum(axis=1)
cc_df["all-conn"] = cc_df.loc[:,e0cols+e1cols].all(axis=1)
cc_df["no-conn"] = ~cc_df.loc[:,e0cols+e1cols].any(axis=1)

Now sieve out cases in order to make a table for the paper...

In [4]:
# Independent cases:
# Everyone always connects
cc_df_allconn  = cc_df[ cc_df["all-conn"]]
# Nobody ever connects
cc_df_permfail = cc_df[ cc_df["no-conn"]]
# Connection failures of ECN and !ECN always equal
cc_df_indep    = cc_df[ cc_df["all-eq"] &
                       ~cc_df["all-conn"] &
                       ~cc_df["no-conn"]]

# Dependent cases:
cc_df_dep = cc_df[~cc_df['all-eq']]

# Remove all rows which are clearly transient: only one failure of ECN or only one failure non-ECN
cc_df_transdep = cc_df_dep[((cc_df_dep['e0-sum'] == 9) & (cc_df_dep['e1-sum'] == 8)) | 
                           ((cc_df_dep['e0-sum'] == 8) & (cc_df_dep['e1-sum'] == 9))]
cc_df_dep      = cc_df_dep.loc[cc_df_dep.index - cc_df_transdep.index]

# Remove all rows which are clearly strange: sometimes non-ECN fails, but ECN never fails
cc_df_transodd = cc_df_dep[(cc_df_dep['dep-sum'] == 0) & (cc_df_dep['odd-sum'] > 0)]
cc_df_dep      = cc_df_dep.loc[cc_df_dep.index - cc_df_transodd.index]

# Now remove all rows which are host-dependent: 
# Definitely host dependent - we always connect without and never with
# Maybe host dependent - we sometimes connect without and never with
cc_df_hostdep = cc_df_dep[(cc_df_dep['e1-sum'] == 0)] 
cc_df_dep     = cc_df_dep.loc[cc_df_dep.index - cc_df_hostdep.index]

# one row contains all 1s
def one_row_definitely_ecndep(df):
    return (( df.loc[:,["lon-1-e0","nyc-1-e0","sin-1-e0"]].all(axis=1) & 
            ~(df.loc[:,["lon-1-e1","nyc-1-e1","sin-1-e1"]].any(axis=1))) |
            ( df.loc[:,["lon-2-e0","nyc-2-e0","sin-2-e0"]].all(axis=1) & 
            ~(df.loc[:,["lon-2-e1","nyc-2-e1","sin-2-e1"]].any(axis=1))) |
            ( df.loc[:,["lon-3-e0","nyc-3-e0","sin-3-e0"]].all(axis=1) & 
            ~(df.loc[:,["lon-3-e1","nyc-3-e1","sin-3-e1"]].any(axis=1))))                  

# one row contains only 1s and 0s and at least one 1
def one_row_maybe_ecndep(df):
    return (( df.loc[:,["lon-1-e0","nyc-1-e0","sin-1-e0"]].any(axis=1) & 
            ~(df.loc[:,["lon-1-e1","nyc-1-e1","sin-1-e1"]].any(axis=1))) |
            ( df.loc[:,["lon-2-e0","nyc-2-e0","sin-2-e0"]].any(axis=1) & 
            ~(df.loc[:,["lon-2-e1","nyc-2-e1","sin-2-e1"]].any(axis=1))) |
            ( df.loc[:,["lon-3-e0","nyc-3-e0","sin-3-e0"]].any(axis=1) & 
            ~(df.loc[:,["lon-3-e1","nyc-3-e1","sin-3-e1"]].any(axis=1))))                  

# one row contains all 3s
def one_row_allconn(df):
    return (( df.loc[:,["lon-1-e0","nyc-1-e0","sin-1-e0"]].all(axis=1) & 
            ~(df.loc[:,["lon-1-e1","nyc-1-e1","sin-1-e1"]].all(axis=1))) |
            ( df.loc[:,["lon-2-e0","nyc-2-e0","sin-2-e0"]].all(axis=1) & 
            ~(df.loc[:,["lon-2-e1","nyc-2-e1","sin-2-e1"]].all(axis=1))) |
            ( df.loc[:,["lon-3-e0","nyc-3-e0","sin-3-e0"]].all(axis=1) & 
            ~(df.loc[:,["lon-3-e1","nyc-3-e1","sin-3-e1"]].all(axis=1))))                  

# one column contains all 1s
def one_col_definitely_ecndep(df):
    return (( df.loc[:,["lon-1-e0","lon-2-e0","lon-3-e0"]].all(axis=1) & 
            ~(df.loc[:,["lon-1-e1","lon-2-e1","lon-3-e1"]].any(axis=1))) |
            ( df.loc[:,["nyc-1-e0","nyc-2-e0","nyc-3-e0"]].all(axis=1) & 
            ~(df.loc[:,["nyc-1-e1","nyc-2-e1","nyc-3-e1"]].any(axis=1))) |
            ( df.loc[:,["sin-1-e0","sin-2-e0","sin-3-e0"]].all(axis=1) & 
            ~(df.loc[:,["sin-1-e1","sin-2-e1","sin-3-e1"]].any(axis=1))))                  

# one row contains only 1s and 0s and at least one 1
def one_col_maybe_ecndep(df):
    return (( df.loc[:,["lon-1-e0","lon-2-e0","lon-3-e0"]].any(axis=1) & 
            ~(df.loc[:,["lon-1-e1","lon-2-e1","lon-3-e1"]].any(axis=1))) |
            ( df.loc[:,["nyc-1-e0","nyc-2-e0","nyc-3-e0"]].any(axis=1) & 
            ~(df.loc[:,["nyc-1-e1","nyc-2-e1","nyc-3-e1"]].any(axis=1))) |
            ( df.loc[:,["sin-1-e0","sin-2-e0","sin-3-e0"]].any(axis=1) & 
            ~(df.loc[:,["sin-1-e1","sin-2-e1","sin-3-e1"]].any(axis=1))))                  

def one_col_allconn(df):
    return (( df.loc[:,["lon-1-e0","lon-2-e0","lon-3-e0"]].all(axis=1) & 
             (df.loc[:,["lon-1-e1","lon-2-e1","lon-3-e1"]].all(axis=1))) |
            ( df.loc[:,["nyc-1-e0","nyc-2-e0","nyc-3-e0"]].all(axis=1) & 
             (df.loc[:,["nyc-1-e1","nyc-2-e1","nyc-3-e1"]].all(axis=1))) |
            ( df.loc[:,["sin-1-e0","sin-2-e0","sin-3-e0"]].all(axis=1) & 
             (df.loc[:,["sin-1-e1","sin-2-e1","sin-3-e1"]].all(axis=1))))   

cc_df_pathdep = cc_df_dep[one_col_definitely_ecndep(cc_df_dep) & 
                          one_col_allconn(cc_df_dep)]
cc_df_dep     = cc_df_dep.loc[cc_df_dep.index - cc_df_pathdep.index]

cc_df_timedep = cc_df_dep[one_row_definitely_ecndep(cc_df_dep) & 
                          one_row_allconn(cc_df_dep)]
cc_df_dep     = cc_df_dep.loc[cc_df_dep.index - cc_df_timedep.index]

cc_df_maypathdep = cc_df_dep[(cc_df_dep['e1-sum'] > 0) &
                             one_col_maybe_ecndep(cc_df_dep)]
cc_df_dep        = cc_df_dep.loc[cc_df_dep.index - cc_df_maypathdep.index]

And build the table:

In [5]:
ip4all_ct   =    len(cc_df[~cc_df['ip6']])
ip4allconn_ct =  len(cc_df_allconn[~cc_df_allconn['ip6']])
ip4permfail_ct = len(cc_df_permfail[~cc_df_permfail['ip6']])
ip4itrans_ct =   len(cc_df_indep[~cc_df_indep['ip6']])
ip4transdep_ct = len(cc_df_transdep[~cc_df_transdep['ip6']])
ip4transodd_ct = len(cc_df_transodd[~cc_df_transodd['ip6']])
ip4hostdep_ct =  len(cc_df_hostdep[~cc_df_hostdep['ip6']])
ip4pathdep_ct =  len(cc_df_pathdep[~cc_df_pathdep['ip6']])
ip4timedep_ct =  len(cc_df_timedep[~cc_df_timedep['ip6']])
ip4maypathdep_ct =  len(cc_df_maypathdep[~cc_df_maypathdep['ip6']])
ip4otherdep_ct =    len(cc_df_dep[~cc_df_dep['ip6']])
ip4transfail_ct =   ip4itrans_ct + ip4transodd_ct 
ip4indep_ct =       ip4allconn_ct + ip4permfail_ct + ip4transdep_ct + ip4transfail_ct
ip4alldep_ct =      ip4hostdep_ct + ip4pathdep_ct + ip4timedep_ct + ip4maypathdep_ct

ip6all_ct   =    len(cc_df[cc_df['ip6']])
ip6allconn_ct =  len(cc_df_allconn[cc_df_allconn['ip6']])
ip6permfail_ct = len(cc_df_permfail[cc_df_permfail['ip6']])
ip6itrans_ct =   len(cc_df_indep[cc_df_indep['ip6']])
ip6transdep_ct = len(cc_df_transdep[cc_df_transdep['ip6']])
ip6transodd_ct = len(cc_df_transodd[cc_df_transodd['ip6']])
ip6hostdep_ct =  len(cc_df_hostdep[cc_df_hostdep['ip6']])
ip6pathdep_ct =  len(cc_df_pathdep[cc_df_pathdep['ip6']])
ip6timedep_ct =  len(cc_df_timedep[cc_df_timedep['ip6']])
ip6maypathdep_ct =  len(cc_df_maypathdep[cc_df_maypathdep['ip6']])
ip6otherdep_ct = len(cc_df_dep[cc_df_dep['ip6']])
ip6transfail_ct = ip6itrans_ct + ip6transodd_ct 
ip6indep_ct =    ip6allconn_ct + ip6permfail_ct + ip6transdep_ct + ip6transfail_ct
ip6alldep_ct =   ip6hostdep_ct + ip6pathdep_ct + ip6timedep_ct + ip6maypathdep_ct


print("%6u & %5.2f\%% & %5u & %5.2f\%% & Always connected from all vantage points\\\\" %
      (ip4allconn_ct, ip4allconn_ct*100/ip4all_ct, ip6allconn_ct, ip6allconn_ct*100/ip6all_ct))
print("%6u & %5.2f\%% & %5u & %5.2f\%% & Never connected from any vantage point\\\\" %
      (ip4permfail_ct, ip4permfail_ct*100/ip4all_ct, ip6permfail_ct, ip6permfail_ct*100/ip6all_ct))
print("%6u & %5.2f\%% & %5u & %5.2f\%% & Single transient connectivity failure \\\\" %
      (ip4transdep_ct, ip4transdep_ct*100/ip4all_ct, ip6transdep_ct, ip6transdep_ct*100/ip6all_ct))
print("%6u & %5.2f\%% & %5u & %5.2f\%% & Non-ECN-related transient connectivity\\\\" %
      (ip4transfail_ct, ip4transfail_ct*100/ip4all_ct, ip6transfail_ct, ip6transfail_ct*100/ip6all_ct))
print("\\hline")
print("{\\bf %6u} & {\\bf %5.2f\%%} & {\\bf %5u} & {\\bf %5.2f\%%} & {\\bf Total ECN-independent connectivity} \\\\" %
       (ip4indep_ct, ip4indep_ct*100/ip4all_ct, ip6indep_ct, ip6indep_ct*100/ip6all_ct))
# print("\\hline")
# print("%6u & %5.2f\%% & %5u & %5.2f\%% & Single transient connection failure\\\\" %
#        (ip4transdep_ct, ip4transdep_ct*100/ip4all_ct, ip6transdep_ct, ip6transdep_ct*100/ip6all_ct))
print("\\hline")
# print("%6u & %5.2f\%% & %5u & %5.2f\%% & Potential evidence of ECN-dependent connectivity, of which...\\\\" %
#       (ip4alldep_ct, ip4alldep_ct*100/ip4all_ct, ip6alldep_ct, ip6alldep_ct*100/ip6all_ct))
print("%6u & %5.2f\%% & %5u & %5.2f\%% & Stable ECN dependency near host\\\\" %
      (ip4hostdep_ct, ip4hostdep_ct*100/ip4all_ct, ip6hostdep_ct, ip6hostdep_ct*100/ip6all_ct))
print("%6u & %5.2f\%% & %5u & %5.2f\%% & Stable ECN dependency on path\\\\" %
      (ip4pathdep_ct, ip4pathdep_ct*100/ip4all_ct, ip6pathdep_ct, ip6pathdep_ct*100/ip6all_ct))
print("%6u & %5.2f\%% & %5u & %5.2f\%% & Potential ECN dependency on path\\\\" %
      (ip4maypathdep_ct, ip4maypathdep_ct*100/ip4all_ct, 
       ip6maypathdep_ct, ip6maypathdep_ct*100/ip6all_ct))
print("%6u & %5.2f\%% & %5u & %5.2f\%% & Temporal ECN dependency\\\\" %
      (ip4timedep_ct, ip4timedep_ct*100/ip4all_ct, ip6timedep_ct, ip6timedep_ct*100/ip6all_ct))
print("\\hline")
print("{\\bf %6u} & {\\bf %5.2f\%%} & {\\bf %5u} & {\\bf %5.2f\%%} & {\\bf Total probable ECN-dependent connectivity}\\\\" %
      (ip4alldep_ct, ip4alldep_ct*100/ip4all_ct, ip6alldep_ct, ip6alldep_ct*100/ip6all_ct))
print("\\hline")
print("%6u & %5.2f\%% & %5u & %5.2f\%% & Inconclusive transient connectivity\\\\" %
       (ip4otherdep_ct, ip4otherdep_ct*100/ip4all_ct, ip6otherdep_ct, ip6otherdep_ct*100/ip6all_ct))
553805 & 95.20\% & 14889 & 87.43\% & Always connected from all vantage points\\
  3998 &  0.69\% &  1594 &  9.36\% & Never connected from any vantage point\\
  8631 &  1.48\% &   138 &  0.81\% & Single transient connectivity failure \\
 11999 &  2.06\% &   324 &  1.90\% & Non-ECN-related transient connectivity\\
\hline
{\bf 578433} & {\bf 99.43\%} & {\bf 16945} & {\bf 99.50\%} & {\bf Total ECN-independent connectivity} \\
\hline
  2193 &  0.38\% &    13 &  0.08\% & Stable ECN dependency near host\\
    15 &  0.00\% &     0 &  0.00\% & Stable ECN dependency on path\\
    34 &  0.01\% &     3 &  0.02\% & Potential ECN dependency on path\\
   201 &  0.03\% &     0 &  0.00\% & Temporal ECN dependency\\
\hline
{\bf   2443} & {\bf  0.42\%} & {\bf    16} & {\bf  0.09\%} & {\bf Total probable ECN-dependent connectivity}\\
\hline
   862 &  0.15\% &    69 &  0.41\% & Inconclusive transient connectivity\\

Look at cases where path dependency occurs (this goes on the website, not in the paper)

In [6]:
def works_in(df, vp):
    return df.loc[:,[vp+"-1-e0", vp+"-1-e1", vp+"-2-e0", vp+"-2-e1",vp+"-3-e0", vp+"-3-e1"]].all(axis=1)

def breaks_in(df, vp):
    return df.loc[:,[vp+"-1-e0", vp+"-2-e0", vp+"-3-e0"]].all(axis=1) & \
                                           ~df.loc[:,[vp+"-1-e1", vp+"-2-e1", vp+"-3-e1"]].any(axis=1)

fail_vp_df = pd.DataFrame(data = {"fail-lon" : breaks_in(cc_df_pathdep, "lon")})
fail_vp_df["fail-nyc"] = breaks_in(cc_df_pathdep, "nyc")
fail_vp_df["fail-sin"] = breaks_in(cc_df_pathdep, "sin")
fail_vp_df["fails-in"] = "None"

fail_vp_df.update(pd.DataFrame(data = {"fails-in" : "Singapore"},
                               index = fail_vp_df[fail_vp_df["fail-sin"]].index))
fail_vp_df.update(pd.DataFrame(data = {"fails-in" : "New York"},
                               index = fail_vp_df[fail_vp_df["fail-nyc"]].index))
fail_vp_df.update(pd.DataFrame(data = {"fails-in" : "London"},
                               index = fail_vp_df[fail_vp_df["fail-lon"]].index))
print(fail_vp_df.loc[:,["fail-lon","fail-nyc","fail-sin"]].to_csv())
ip,fail-lon,fail-nyc,fail-sin
114.80.207.205,True,False,False
12.3.212.2,True,False,False
124.243.25.22,True,False,True
124.36.21.138,False,False,True
124.36.21.140,False,False,True
143.248.5.153,False,True,False
184.168.221.36,False,True,True
184.168.221.63,False,True,True
207.248.166.249,True,False,False
221.148.53.166,True,False,False
50.63.202.33,True,True,False
50.63.202.48,True,True,False
50.63.202.54,True,False,False
50.63.202.60,True,True,False
89.44.109.54,True,False,False

Now we can plot dependency by rank

In [7]:
dep_df = pd.DataFrame(data = {"conndep": np.nan},
                      index = cc_df.index)
dep_df["rank"] = cc_df["rank"]

dep_df.update(pd.DataFrame(data = {"conndep" : True},
                           index = cc_df_hostdep.index   + cc_df_pathdep.index + 
                                   cc_df_timedep.index + cc_df_maypathdep.index),
             overwrite=True)

dep_df.update(pd.DataFrame(data = {"conndep" : False},
                          index = cc_df.index),
             overwrite=False)
In [8]:
dep_grp = dep_df.groupby(np.digitize(dep_df["rank"], np.linspace(0,1000000,201)))
dep_ser = (dep_grp["conndep"].sum() / dep_grp["rank"].count())
dep_ser.index = (dep_ser.index + (-1)) * 5000

plt.figure(figsize=(8,2))
plt.scatter(dep_ser.index, dep_ser, marker=".")
plt.plot(dep_ser.index, dep_ser, c="k", lw=5, alpha=0.20)
plt.xlim(-10000,1010000)
plt.ylim(0,0.022)
plt.ylabel("p(conn. ECN dep.)")
plt.xlabel("site rank (bins of 5000)")
plt.savefig(PLOTSDIR+"/connrank.pdf", bbox_inches="tight")

QoF Data Input

Load the qq_df table, which has the following columns:

  • ip: index, IP address as string
  • rank: Alexa website rank
  • site: Website hostname
  • ip6: True if address is IPv6 (for convenience in splitting v4 and v6 analyses without string munging)
  • vantage-trial-e0: True if connection established for trial at vantage without ECN negotiation attempt
  • vantage-trial-e0ect0: True if non-ECN flow set ECT0 flag on downstream for trial at vantage
  • vantage-trial-e0ect1: True if non-ECN flow set ECT1 flag on downstream for trial at vantage
  • vantage-trial-e0ce: True if non-ECN flow set CE flag on downstream for trial at vantage
  • vantage-trial-e0f: Flags on last SYN of upstream non-ECN flow for trial at vantage
  • vantage-trial-e0rf: Flags on last SYN of downstream non-ECN flow for trial at vantage
  • vantage-trial-e0ruf: Union flags on downstream non-ECN flow for trial at vantage
  • vantage-trial-ttl: Max TTL received on downstream non-ECN flow for trial at vantage
  • vantage-trial-z0: True if no payload received for non-ECN flow for trial at vantage
  • vantage-trial-z1: True if no payload received for ECN flow for trial at vantage
  • vantage-trial-e1: True if connection established for trial at vantage with ECN negotiation attempt
  • vantage-trial-neg: True if ECN negotiated for trial at vantage
  • vantage-trial-ect0: True if ECN flow set ECT0 flag on downstream for trial at vantage
  • vantage-trial-ect1: True if ECN flow set ECT1 flag on downstream for trial at vantage
  • vantage-trial-ce: True if ECN flow set CE flag on downstream for trial at vantage
  • vantage-trial-synect0: True if ECN flow set ECT0 flag on downstream SYN ACK for trial at vantage
  • vantage-trial-synect1: True if ECN flow set ECT1 flag on downstream SYN ACK for trial at vantage
  • vantage-trial-synce: True if ECN flow set CE flag on downstream SYN ACK for trial at vantage
  • vantage-trial-refl: True if ECN TCP flags were reflected (SEW -> SAEW) for trial at vantage
  • vantage-trial-ect: True if ECT0 or ECT1 set on downstream ECN flow for trial at vantage
  • vantage-trial-negok: True if negotiation succeeded (resulted in ECT marking) for trial at vantage

The -sum columns for each of the boolean columns above simply count True values across all trials and vantage points.

In [9]:
qq_df = pd.read_csv("../tables/qq_df.csv")
qq_df.index = qq_df["ip"]
del qq_df["ip"]

Sieve the resulting rows

In [10]:
# First throw away things that never connect with ECN
qq_df_nc =        qq_df[qq_df['e1-sum'] == 0]
qq_df_c =         qq_df[qq_df['e1-sum'] > 0]

# Anything we've ever seen a negotiation from counts as ECN-capable
qq_df_ecn =       qq_df_c[qq_df_c['neg-sum'] > 0]

# Then sieve out those things which always negotiate when they connect
qq_df_ecnall =    qq_df_ecn[qq_df_ecn['e1-sum'] - qq_df_ecn['neg-sum'] == 0]
qq_df_ecnsome =   qq_df_ecn[qq_df_ecn['e1-sum'] - qq_df_ecn['neg-sum'] != 0]

# Then sieve out things where we don't see data, but we always do with ECN
# This case, as it turns out, is not really interesting
# qq_df_ecnzl =     qq_df_ecnall[(qq_df_ecnall['zl-sum'] - qq_df_ecnall['e1-sum'] == 0) &
#                                (qq_df_ecnall['0zl-sum'] - qq_df_ecnall['e1-sum'] >= 0)]

# Then sieve out those things which also always or never mark when they negotiate
qq_df_ecnmark =   qq_df_ecnall[qq_df_ecnall['neg-sum'] - qq_df_ecnall['negok-sum'] == 0]
# For never mark, don't include things that don't mark because we see no data
qq_df_ecnnomark = qq_df_ecnall[(qq_df_ecnall['negok-sum'] == 0) & (qq_df_ecnall['z1-sum'] == 0)]

# Now look at those things which never negotiate
qq_df_noecn =     qq_df_c[qq_df_c['neg-sum'] == 0]
qq_df_noecnrefl = qq_df_noecn[qq_df_noecn['refl-sum'] - qq_df_noecn['e1-sum'] == 0]
qq_df_noecnect0 = qq_df_noecn[qq_df_noecn['ect0-sum'] - qq_df_noecn['e1-sum'] == 0]
qq_df_noecnect1 = qq_df_noecn[qq_df_noecn['ect1-sum'] - qq_df_noecn['e1-sum'] == 0]
qq_df_noecnce  =  qq_df_noecn[qq_df_noecn['ce-sum'] - qq_df_noecn['e1-sum'] == 0]

# And those which sometimes negotiate (these are interesting)
def one_col_allneg(df):
    return ( df.loc[:,["lon-1-neg","lon-2-neg","lon-3-neg"]].all(axis=1) |
             df.loc[:,["nyc-1-neg","nyc-2-neg","nyc-3-neg"]].all(axis=1) |
             df.loc[:,["sin-1-neg","sin-2-neg","sin-3-neg"]].all(axis=1))  

def one_col_noneg(df):
    return ( ~df.loc[:,["lon-1-neg","lon-2-neg","lon-3-neg"]].any(axis=1) |
             ~df.loc[:,["nyc-1-neg","nyc-2-neg","nyc-3-neg"]].any(axis=1) |
             ~df.loc[:,["sin-1-neg","sin-2-neg","sin-3-neg"]].any(axis=1))

qq_df_pathnego = qq_df_ecnsome[one_col_allneg(qq_df_ecnsome) & one_col_noneg(qq_df_ecnsome)]

And build a table for the paper (Table 2) (NOTE: the table in the paper includes reflection as well)

In [11]:
def df4(df):
    return df[~df['ip6']]

def df6(df):
    return df[ df['ip6']]

# Now build a table
qip4all_ct =        len(df4(qq_df))
qip4nc_ct =         len(df4(qq_df_nc))
qip4ecn_ct =        len(df4(qq_df_ecn))
qip4ecnall_ct =     len(df4(qq_df_ecnall))
#qip4ecnzl_ct =      len(df4(qq_df_ecnzl))
qip4ecnmark_ct =    len(df4(qq_df_ecnmark))
qip4ecnnomark_ct =  len(df4(qq_df_ecnnomark))
qip4someecn_ct =    len(df4(qq_df_ecnsome))
qip4pathnego_ct =   len(df4(qq_df_pathnego))
qip4noecn_ct =      len(df4(qq_df_noecn))
qip4noecnect0_ct =  len(df4(qq_df_noecnect0))
qip4noecnect1_ct =  len(df4(qq_df_noecnect1))
qip4noecnce_ct =    len(df4(qq_df_noecnce))
qip4noecnrefl_ct =    len(df4(qq_df_noecnrefl))

qip6all_ct =        len(df6(qq_df))
qip6nc_ct =         len(df6(qq_df_nc))
qip6ecn_ct =        len(df6(qq_df_ecn))
qip6ecnall_ct =     len(df6(qq_df_ecnall))
#qip6ecnzl_ct =      len(df6(qq_df_ecnzl))
qip6ecnmark_ct =    len(df6(qq_df_ecnmark))
qip6ecnnomark_ct =  len(df6(qq_df_ecnnomark))
qip6someecn_ct =    len(df6(qq_df_ecnsome))
qip6pathnego_ct =   len(df6(qq_df_pathnego))
qip6noecn_ct =      len(df6(qq_df_noecn))
qip6noecnect0_ct =  len(df6(qq_df_noecnect0))
qip6noecnect1_ct =  len(df6(qq_df_noecnect1))
qip6noecnce_ct =    len(df6(qq_df_noecnce))
qip6noecnrefl_ct =    len(df6(qq_df_noecnrefl))

def print_tablerow(ct4, ct6, ct4a, ct6a, label):
    print("%6u & %5.2f\%% & %5u & %5.2f\%% & %s\\\\" %
        (ct4, ct4*100/ct4a, ct6, ct6*100/ct6a, label))

#print_tablerow(qip4all_ct,     qip6all_ct,     qip4all_ct, qip6all_ct, "Capable of connecting with ECN")
print_tablerow(qip4ecn_ct,     qip6ecn_ct,     qip4all_ct, qip6all_ct, "Capable of negotiating ECN, of which...")
print_tablerow(qip4ecnall_ct,  qip6ecnall_ct,  qip4all_ct, qip6all_ct,   "\\hspace{1em} Always negotiate, of which...")
#print_tablerow(qip4ecnmark_ct, qip6ecnmark_ct, qip4all_ct, qip6all_ct,     "\\hspace{2em} Always mark ECT(0)")
print_tablerow(qip4ecnnomark_ct, qip6ecnnomark_ct, qip4all_ct, qip6all_ct, "\\hspace{2em} Never mark ECT")
#print_tablerow(qip4ecnzl_ct,     qip6ecnzl_ct, qip4all_ct, qip6all_ct,     "\\hspace{2em} Never send data")
print_tablerow(qip4someecn_ct,  qip6someecn_ct,  qip4all_ct, qip6all_ct,   "\\hspace{1em} Sometimes negotiate, of which...")
print_tablerow(qip4pathnego_ct,  qip6pathnego_ct,  qip4all_ct, qip6all_ct,   "\\hspace{2em} Negotiation depends on path")
print_tablerow(qip4noecn_ct,  qip6noecn_ct,  qip4all_ct, qip6all_ct, "Not capable of negotiating ECN, of which...")
print_tablerow(qip4noecnect0_ct,  qip6noecnect0_ct,  qip4all_ct, qip6all_ct,   "\\hspace{1em} Always mark ECT(0)")
#print_tablerow(qip4noecnect1_ct,  qip6noecnect1_ct,  qip4all_ct, qip6all_ct,   "\\hspace{1em} Always mark ECT(1)")
#print_tablerow(qip4noecnce_ct,    qip6noecnce_ct,  qip4all_ct, qip6all_ct,     "\\hspace{1em} Always mark CE")
#print_tablerow(qip4noecnrefl_ct,  qip6noecnrefl_ct,  qip4all_ct, qip6all_ct,   "\\hspace{1em} Always reflect ECN flags")
print_tablerow(qip4nc_ct,  qip6nc_ct,  qip4all_ct, qip6all_ct, "Never connect with ECN")
326743 & 56.17\% & 11138 & 65.41\% & Capable of negotiating ECN, of which...\\
324607 & 55.80\% & 11121 & 65.31\% & \hspace{1em} Always negotiate, of which...\\
  6698 &  1.15\% &  1861 & 10.93\% & \hspace{2em} Never mark ECT\\
  2136 &  0.37\% &    17 &  0.10\% & \hspace{1em} Sometimes negotiate, of which...\\
   107 &  0.02\% &     1 &  0.01\% & \hspace{2em} Negotiation depends on path\\
248791 & 42.77\% &  3961 & 23.26\% & Not capable of negotiating ECN, of which...\\
   498 &  0.09\% &     0 &  0.00\% & \hspace{1em} Always mark ECT(0)\\
  6177 &  1.06\% &  1929 & 11.33\% & Never connect with ECN\\

Build the negotiation/marking matrix (Table 3)

In [12]:
# Bigger negotiation/marking matrix

def has_data(df):
    return df[(df["e1-sum"] - df["z1-sum"]) > 0]

qq_df_ecnall_nzl =   has_data(qq_df_ecnall)
qq_df_anyrefl =      has_data(qq_df_c[qq_df_c["refl-sum"] > 0])
qq_df_noecn_norefl = has_data(qq_df_noecn[qq_df_noecn["refl-sum"] == 0])

def matrix_col(df, df46):
    e0 =  len(df46(df[(df["ect0-sum"]  > 0) & (df["ect1-sum"] == 0) & (df["ce-sum"] == 0)]))
    e1 =  len(df46(df[(df["ect0-sum"] == 0) & (df["ect1-sum"]  > 0) & (df["ce-sum"] == 0)]))
    ce =  len(df46(df[(df["ect0-sum"] == 0) & (df["ect1-sum"] == 0) & (df["ce-sum"]  > 0)]))
    e01 = len(df46(df[(df["ect0-sum"]  > 0) & (df["ect1-sum"]  > 0) & (df["ce-sum"] == 0)]))
    c01 = len(df46(df[(df["ect0-sum"]       +  df["ect1-sum"]  > 0) & (df["ce-sum"]  > 0)]))
    nm =  len(df46(df[(df["ect0-sum"] == 0) & (df["ect1-sum"] == 0) & (df["ce-sum"] == 0)]))
    return (e0, e1, ce, e01, c01, nm)

def matrix_col_syn(df, df46):
    e0 =  len(df46(df[(df["synect0-sum"] > 0)]))
    e1 =  len(df46(df[(df["synect1-sum"] > 0)]))
    ce =  len(df46(df[(df["synce-sum"] > 0)]))
    return (e0, e1, ce)

def matrix_col_allmark(df, df46):
    e0 =  len(df46(df[(df["ect0-sum"] - df['e1-sum'] == 0) & 
                      (df["ect1-sum"] == 0) & 
                      (df["ce-sum"] == 0)]))
    e1 =  len(df46(df[(df["ect0-sum"] == 0) & 
                      (df["ect1-sum"] - df['e1-sum'] == 0) & 
                      (df["ce-sum"] == 0)]))
    ce =  len(df46(df[(df["ect0-sum"] == 0) & 
                      (df["ect1-sum"] == 0) &
                      (df["ce-sum"] - df['e1-sum'] == 0)]))
    e01 = len(df46(df[(df["ect0-sum"] - df['e1-sum'] == 0) & 
                      (df["ect1-sum"] - df['e1-sum'] == 0) & 
                      (df["ce-sum"] == 0)]))
    c01 = len(df46(df[(df["ect0-sum"] + df["ect1-sum"] - df['e1-sum'] == 0) 
                      & (df["ce-sum"]  > 0)]))
    return (e0, e1, ce, e01, c01)


(mat4_ne0, mat4_ne1, mat4_nce, mat4_ne01, mat4_nc01, mat4_nnm) = matrix_col(qq_df_ecnall_nzl,   df4)
(mat4_re0, mat4_re1, mat4_rce, mat4_re01, mat4_rc01, mat4_rnm) = matrix_col(qq_df_anyrefl,      df4)
(mat4_xe0, mat4_xe1, mat4_xce, mat4_xe01, mat4_xc01, mat4_xnm) = matrix_col(qq_df_noecn_norefl, df4)

(mat6_ne0, mat6_ne1, mat6_nce, mat6_ne01, mat6_nc01, mat6_nnm) = matrix_col(qq_df_ecnall_nzl,   df6)
(mat6_re0, mat6_re1, mat6_rce, mat6_re01, mat6_rc01, mat6_rnm) = matrix_col(qq_df_anyrefl,      df6)
(mat6_xe0, mat6_xe1, mat6_xce, mat6_xe01, mat6_xc01, mat6_xnm) = matrix_col(qq_df_noecn_norefl, df6)

(mat4_ane0, mat4_ane1, mat4_ance, mat4_ane01, mat4_anc01) = matrix_col_allmark(qq_df_ecnall_nzl,   df4)
(mat4_are0, mat4_are1, mat4_arce, mat4_are01, mat4_arc01) = matrix_col_allmark(qq_df_anyrefl,      df4)
(mat4_axe0, mat4_axe1, mat4_axce, mat4_axe01, mat4_axc01) = matrix_col_allmark(qq_df_noecn_norefl, df4)

(mat6_ane0, mat6_ane1, mat6_ance, mat6_ane01, mat6_anc01) = matrix_col_allmark(qq_df_ecnall_nzl,   df6)
(mat6_are0, mat6_are1, mat6_arce, mat6_are01, mat6_arc01) = matrix_col_allmark(qq_df_anyrefl,      df6)
(mat6_axe0, mat6_axe1, mat6_axce, mat6_axe01, mat6_axc01) = matrix_col_allmark(qq_df_noecn_norefl, df6)

(mat4_sne0, mat4_sne1, mat4_snce) = matrix_col_syn(qq_df_ecnall_nzl,   df4)
(mat4_sre0, mat4_sre1, mat4_srce) = matrix_col_syn(qq_df_anyrefl,      df4)
(mat4_sxe0, mat4_sxe1, mat4_sxce) = matrix_col_syn(qq_df_noecn_norefl, df4)

(mat6_sne0, mat6_sne1, mat6_snce) = matrix_col_syn(qq_df_ecnall_nzl,   df6)
(mat6_sre0, mat6_sre1, mat6_srce) = matrix_col_syn(qq_df_anyrefl,      df6)
(mat6_sxe0, mat6_sxe1, mat6_sxce) = matrix_col_syn(qq_df_noecn_norefl, df6)


print("\\begin{tabular}{ r | r  r  r | r r r }")
print("        & \\multicolumn{3}{c|}{IPv4 (N=%u)} & \\multicolumn{3}{c}{IPv6 (N=%u)} \\\\" %
      (len(df4(qq_df)), len(df6(qq_df))))
print("Marking                  & ECN   & Reflect & No ECN & ECN   & Reflect & No ECN\\\\")
print("\hline")
print("only \ectzero            & %6u & %6u & %6u & %6u & %6u & %6u \\\\" %
      (mat4_ne0, mat4_re0, mat4_xe0, mat6_ne0, mat6_re0, mat6_xe0))
print("\ectzero  + \ectone & %6u & %6u & %6u & %6u & %6u & %6u \\\\" %
      (mat4_ne01, mat4_re01, mat4_xe01, mat6_ne01, mat6_re01, mat6_xe01))
print("\hline")
print("\ectzero on \syncack & %6u & %6u & %6u & %6u & %6u & %6u \\\\" %
      (mat4_sne0, mat4_sre0, mat4_sxe0, mat6_sne0, mat6_sre0, mat6_sxe0))
print("\hline \hline")
print("only \ectone         & %6u & %6u & %6u & %6u & %6u & %6u \\\\"%
      (mat4_ne1, mat4_re1, mat4_xe1, mat6_ne1, mat6_re1, mat6_xe1))
print("\hline")
print("\ectone on \synack      & %6u & %6u & %6u & %6u & %6u & %6u \\\\" %
      (mat4_sne1, mat4_sre1, mat4_sxe1, mat6_sne1, mat6_sre1, mat6_sxe1))
print("\hline \hline")
print("only \ce               & %6u & %6u & %6u & %6u & %6u & %6u \\\\"%
      (mat4_nce, mat4_rce, mat4_xce, mat6_nce, mat6_rce, mat6_xce))
print("\ce + \ect   & %6u & %6u & %6u & %6u & %6u & %6u \\\\"%
      (mat4_nc01, mat4_rc01, mat4_xc01, mat6_nc01, mat6_rc01, mat6_xc01))
print("\hline")
print("\ce on \synack & %6u & %6u & %6u & %6u & %6u & %6u \\\\" %
      (mat4_snce, mat4_srce, mat4_sxce, mat6_snce, mat6_srce, mat6_sxce))
print("\hline \hline")
print("none                     & %6u & %6u & %6u & %6u & %6u & %6u \\\\"%
      (mat4_nnm, mat4_rnm, mat4_xnm, mat6_nnm, mat6_rnm, mat6_xnm))

print("\end{tabular}")
print()
print ("%5.2f%% IPv4 negotiation anomalies" % 
       (100*(1 - ((mat4_ne0 + mat4_xnm) / sum ((mat4_ne0, mat4_ne1, mat4_nce, mat4_ne01, mat4_nc01, mat4_nnm,
                                           mat4_re0, mat4_re1, mat4_rce, mat4_re01, mat4_rc01, mat4_rnm,
                                           mat4_xe0, mat4_xe1, mat4_xce, mat4_xe01, mat4_xc01, mat4_xnm))))))
print ("%5.2f%% IPv6 negotiation anomalies" % 
       (100*(1 - ((mat6_ne0 + mat6_xnm) / sum ((mat6_ne0, mat6_ne1, mat6_nce, mat6_ne01, mat6_nc01, mat6_nnm,
                                           mat6_re0, mat6_re1, mat6_rce, mat6_re01, mat6_rc01, mat6_rnm,
                                           mat6_xe0, mat6_xe1, mat6_xce, mat6_xe01, mat6_xc01, mat6_xnm))))))
\begin{tabular}{ r | r  r  r | r r r }
        & \multicolumn{3}{c|}{IPv4 (N=581711)} & \multicolumn{3}{c}{IPv6 (N=17028)} \\
Marking                  & ECN   & Reflect & No ECN & ECN   & Reflect & No ECN\\
\hline
only \ectzero            & 315605 &    693 &   1995 &   8998 &      1 &     46 \\
\ectzero  + \ectone &      0 &      0 &      0 &      4 &      1 &      7 \\
\hline
\ectzero on \syncack &   7780 &      0 &     46 &     89 &      0 &     82 \\
\hline \hline
only \ectone         &      3 &      1 &     17 &      0 &     10 &     12 \\
\hline
\ectone on \synack      &      4 &      0 &     16 &      7 &      0 &     31 \\
\hline \hline
only \ce               &     11 &      1 &      7 &      0 &      0 &     48 \\
\ce + \ect   &      5 &      2 &      0 &     23 &     66 &     39 \\
\hline
\ce on \synack &     11 &      0 &      5 &     22 &      0 &     87 \\
\hline \hline
none                     &   6939 &   1343 & 243150 &   2013 &      5 &   3694 \\
\end{tabular}

 1.93% IPv4 negotiation anomalies
15.20% IPv6 negotiation anomalies

Investigate our one potential real CE marking (!!)

In [13]:
maybe_real_ce = df4(qq_df_ecnall_nzl[(qq_df_ecnall_nzl["ect0-sum"] + qq_df_ecnall_nzl["ect1-sum"]  > 0) & 
                                     (qq_df_ecnall_nzl["ce-sum"]  > 0)])
In [14]:
maybe_real_ce
Out[14]:
ip6 lon-1-e0 lon-1-e0ect0 lon-1-e0ect1 lon-1-e0ce lon-1-e0f lon-1-e0rf lon-1-e0ruf lon-1-ttl lon-1-z0 ... e0ce-sum e0synect0-sum e0synect1-sum e0synce-sum e1-sum e0-sum z1-sum z0-sum rank site
ip
109.190.94.17 False True False False False 2 18 25 55 False ... 2 0 0 0 9 9 0 0 101262 www.ftramp.com
162.242.172.39 False True False False False 2 18 25 54 False ... 0 0 0 0 6 6 3 3 488081 www.mattrude.com
213.162.51.7 False True False False False 2 18 23 54 True ... 0 0 0 0 9 9 0 2 197708 www.grandlyon.com
47.21.17.41 False True False True False 2 18 27 53 False ... 0 0 9 0 9 9 0 0 950512 www.abtworld.com
85.25.248.86 False True False False False 2 18 27 51 False ... 0 0 0 0 8 9 1 0 487561 www.vinternete.com

5 rows × 239 columns

Now look at ECN capability resieved only for TTL64 (Linux) hosts

In [15]:
# Resieve again for Linux
qq_df_linttl = qq_df_c[(qq_df_c['lon-1-ttl'] > 32 ) & (qq_df_c['lon-1-ttl'] < 64)]
qq_df_linecn = qq_df_linttl[qq_df_linttl['neg-sum'] > 0]

print ("considering only initial-TTL 64 hosts, %u of %u or %5.2f\%% are ECN capable" % (
        len(qq_df_linecn), len(qq_df_linttl), 100*len(qq_df_linecn)/len(qq_df_linttl)))
considering only initial-TTL 64 hosts, 326720 of 468555 or 69.73\% are ECN capable

Print rank spectrum for negotiation

In [16]:
qq_df["nego"] = qq_df["neg-sum"] > 0

# Negotiation rank spectrum
grp = qq_df.groupby(np.digitize(qq_df["rank"], np.linspace(0,1000000,201)))
ser = (grp["nego"].sum() / grp["rank"].count())
ser.index = (ser.index + (-1)) * 5000

plt.figure(figsize=(8,2))
plt.scatter(ser.index, ser, marker=".")
plt.plot(ser.index, ser, c="k", lw=5, alpha=0.20)
plt.xlim(-10000,1010000)
plt.ylim(0.45, 0.65)
plt.ylabel("p(negotiates ECN)")
plt.xlabel("site rank (bins of 5000)")
plt.savefig(PLOTSDIR+"/negorank.pdf", bbox_inches="tight")

Marking anomalies: look at markings on non-ECN-negotiated (Table 4)

In [17]:
def e0mark_counts(df, df46, col):
    many = len(df46(df[(df["e0"+col+"-sum"] > 0) & (df["e0-sum"] > 0)]))
    mall = len(df46(df[(df["e0"+col+"-sum"] == df["e0-sum"]) & (df["e0-sum"] > 0)]))
    msyn = len(df46(df[(df["e0syn"+col+"-sum"] > 0) & (df["e0-sum"] > 0)]))
    return (many, mall, msyn)

(m4e0e0, a4e0e0, s4e0e0) = e0mark_counts(qq_df, df4, "ect0")
(m6e0e0, a6e0e0, s6e0e0) = e0mark_counts(qq_df, df6, "ect0")
(m4e0e1, a4e0e1, s4e0e1) = e0mark_counts(qq_df, df4, "ect1")
(m6e0e1, a6e0e1, s6e0e1) = e0mark_counts(qq_df, df6, "ect1")
(m4e0ce, a4e0ce, s4e0ce) = e0mark_counts(qq_df, df4, "ce")
(m6e0ce, a6e0ce, s6e0ce) = e0mark_counts(qq_df, df6, "ce")

print("\\begin{tabular}{ r | r  r  r | r r r }")
print("        & \\multicolumn{3}{c|}{IPv4 (N=%u)} & \\multicolumn{3}{c}{IPv6 (N=%u)} \\\\" %
      (len(df4(qq_df)), len(df6(qq_df))))
print("Codepoint                & Once & Always & \synack & Once & Always & \synack \\\\")
print("\hline")
print("\ectzero                & %6u & %6u & %6u & %6u & %6u & %6u \\\\" %
      (m4e0e0, a4e0e0, s4e0e0, m6e0e0, a6e0e0, s6e0e0))
print("\ectone               & %6u & %6u & %6u & %6u & %6u & %6u \\\\" %
      (m4e0e1, a4e0e1, s4e0e1, m6e0e1, a6e0e1, s6e0e1))
print("\ce                    & %6u & %6u & %6u & %6u & %6u & %6u \\\\" %
      (m4e0ce, a4e0ce, s4e0ce, m6e0ce, a6e0ce, s6e0ce))
\begin{tabular}{ r | r  r  r | r r r }
        & \multicolumn{3}{c|}{IPv4 (N=581711)} & \multicolumn{3}{c}{IPv6 (N=17028)} \\
Codepoint                & Once & Always & \synack & Once & Always & \synack \\
\hline
\ectzero                &   4592 &    104 &     68 &    179 &      2 &    101 \\
\ectone               &     21 &     18 &     18 &    116 &     76 &     39 \\
\ce                    &     21 &     17 &     17 &    162 &     12 &     94 \\

A few more anecdotes:

In [18]:
print("%u IPv4 and %u IPv6 mark CE when not negotiating ECN; %u IPv4 and %u IPv6 on every flow" % (
    len(df4(qq_df[(qq_df["e0ce-sum"] > 0) & qq_df["e0-sum"] > 0])),
    len(df6(qq_df[(qq_df["e0ce-sum"] > 0) & qq_df["e0-sum"] > 0])),
    len(df4(qq_df[(qq_df["e0ce-sum"] == qq_df["e0-sum"]) & qq_df["e0-sum"] > 0])),
    len(df6(qq_df[(qq_df["e0ce-sum"] == qq_df["e0-sum"]) & qq_df["e0-sum"] > 0])),
    ))
print("%u IPv4 and %u IPv6 mark ECT0 when not negotiating ECN; %u IPv4 and %u IPv6 on every flow" % (
    len(df4(qq_df[(qq_df["e0ect0-sum"] > 0) & qq_df["e0-sum"] > 0])),
    len(df6(qq_df[(qq_df["e0ect0-sum"] > 0) & qq_df["e0-sum"] > 0])),
    len(df4(qq_df[(qq_df["e0ect0-sum"] == qq_df["e0-sum"]) & qq_df["e0-sum"] > 0])),
    len(df6(qq_df[(qq_df["e0ect0-sum"] == qq_df["e0-sum"]) & qq_df["e0-sum"] > 0])),
    ))
print("%u IPv4 and %u IPv6 mark ECT1 when not negotiating ECN; %u IPv4 and %u IPv6 on every flow" % (
    len(df4(qq_df[(qq_df["e0ect1-sum"] > 0) & qq_df["e0-sum"] > 0])),
    len(df6(qq_df[(qq_df["e0ect1-sum"] > 0) & qq_df["e0-sum"] > 0])),
    len(df4(qq_df[(qq_df["e0ect1-sum"] == qq_df["e0-sum"]) & qq_df["e0-sum"] > 0])),
    len(df6(qq_df[(qq_df["e0ect1-sum"] == qq_df["e0-sum"]) & qq_df["e0-sum"] > 0])),
    ))
21 IPv4 and 162 IPv6 mark CE when not negotiating ECN; 17 IPv4 and 12 IPv6 on every flow
4592 IPv4 and 179 IPv6 mark ECT0 when not negotiating ECN; 104 IPv4 and 2 IPv6 on every flow
21 IPv4 and 116 IPv6 mark ECT1 when not negotiating ECN; 18 IPv4 and 76 IPv6 on every flow
In [19]:
print("%u IPv4 and %u IPv6 mark CE on SYN when not negotiating ECN; %u IPv4 and %u IPv6 on every flow" % (
    len(df4(qq_df[(qq_df["e0synce-sum"] > 0) & qq_df["e0-sum"] > 0])),
    len(df6(qq_df[(qq_df["e0synce-sum"] > 0) & qq_df["e0-sum"] > 0])),
    len(df4(qq_df[(qq_df["e0synce-sum"] == qq_df["e0-sum"]) & qq_df["e0-sum"] > 0])),
    len(df6(qq_df[(qq_df["e0synce-sum"] == qq_df["e0-sum"]) & qq_df["e0-sum"] > 0])),
    ))
print("%u IPv4 and %u IPv6 mark ECT0 on SYN when not negotiating ECN; %u IPv4 and %u IPv6 on every flow" % (
    len(df4(qq_df[(qq_df["e0synect0-sum"] > 0) & qq_df["e0-sum"] > 0])),
    len(df6(qq_df[(qq_df["e0synect0-sum"] > 0) & qq_df["e0-sum"] > 0])),
    len(df4(qq_df[(qq_df["e0synect0-sum"] == qq_df["e0-sum"]) & qq_df["e0-sum"] > 0])),
    len(df6(qq_df[(qq_df["e0synect0-sum"] == qq_df["e0-sum"]) & qq_df["e0-sum"] > 0])),
    ))
print("%u IPv4 and %u IPv6 mark ECT1 on SYN when not negotiating ECN; %u IPv4 and %u IPv6 on every flow" % (
    len(df4(qq_df[(qq_df["e0synect1-sum"] > 0) & qq_df["e0-sum"] > 0])),
    len(df6(qq_df[(qq_df["e0synect1-sum"] > 0) & qq_df["e0-sum"] > 0])),
    len(df4(qq_df[(qq_df["e0synect1-sum"] == qq_df["e0-sum"]) & qq_df["e0-sum"] > 0])),
    len(df6(qq_df[(qq_df["e0synect1-sum"] == qq_df["e0-sum"]) & qq_df["e0-sum"] > 0])),
    ))
17 IPv4 and 94 IPv6 mark CE on SYN when not negotiating ECN; 17 IPv4 and 0 IPv6 on every flow
68 IPv4 and 101 IPv6 mark ECT0 on SYN when not negotiating ECN; 27 IPv4 and 0 IPv6 on every flow
18 IPv4 and 39 IPv6 mark ECT1 on SYN when not negotiating ECN; 18 IPv4 and 0 IPv6 on every flow
In [20]:
e0badmark_index = pd.Index((qq_df[(qq_df["e0ect0-sum"] > 0) & (qq_df["e0-sum"] > 0)].index |
                            qq_df[(qq_df["e0ect1-sum"] > 0) & (qq_df["e0-sum"] > 0)].index |
                            qq_df[(qq_df["e0ce-sum"] > 0) & (qq_df["e0-sum"] > 0)].index).unique(), name="ip")
print ("%u IPv4 and %u IPv6 hosts mark without ECN" % (
    len(df4(qq_df.loc[e0badmark_index])),
    len(df6(qq_df.loc[e0badmark_index]))))

e1badmark_index = qq_df_noecn_norefl[(qq_df_noecn_norefl["ect0-sum"] + 
                                      qq_df_noecn_norefl["ect1-sum"] + 
                                      qq_df_noecn_norefl["ce-sum"])  > 0].index
print ("%u IPv4 and %u IPv6 hosts mark when ECN negotiation fails" % (
    len(df4(qq_df.loc[e1badmark_index])),
    len(df6(qq_df.loc[e1badmark_index]))))

print ("%u IPv4 and %u IPv6 hosts are in the intersection" % (
    len(df4(qq_df.loc[e0badmark_index & e1badmark_index])),
    len(df6(qq_df.loc[e0badmark_index & e1badmark_index]))))                                                              
4632 IPv4 and 338 IPv6 hosts mark without ECN
2019 IPv4 and 152 IPv6 hosts mark when ECN negotiation fails
1304 IPv4 and 95 IPv6 hosts are in the intersection

TTL spectra

Now we can have a look at the TTLs of flows to characterize OS dependency in

In [21]:
# Negotiation TTL spectrum
def ttl_spectrum(df, col):
    grp = df.groupby(np.digitize(df[col], np.linspace(0,255,256)))
    ser = grp[col].count() / len(df)
    ser.index = ser.index + (-1)
    return ser

ettl = ttl_spectrum(qq_df_ecn,"lon-1-ttl")
nttl = ttl_spectrum(qq_df_noecn,"lon-1-ttl")

plt.figure(figsize=(8,1))
plt.plot(ettl.index, ettl, c="b", lw=2, alpha=0.50, label="ECN")
plt.plot(nttl.index, nttl, c="r", lw=2, alpha=0.50, label="no ECN")
plt.annotate("Linux", xy=(65,0.1))
plt.annotate("Windows", xy=(129,0.05))
plt.legend()
plt.xlim(32,255)
plt.yticks([0,0.05,0.1])
plt.xticks([64,128,255])
plt.ylabel("density")
plt.xlabel("TTL")
plt.savefig(PLOTSDIR+"/negottl.pdf", bbox_inches="tight")
In [22]:
# Connectivity TTL spectrum
cc_conndep_ttl_df = cc_df.loc[qq_df.index & (cc_df_hostdep.index + 
                                             cc_df_pathdep.index + 
                                             cc_df_timedep.index + 
                                             cc_df_maypathdep.index)]
cc_conndep_ttl_df["ttl"] = qq_df.loc[cc_conndep_ttl_df.index,['lon-1-ttl']]

cc_nodep_ttl_df = cc_df.loc[qq_df.index & cc_df_allconn.index]
cc_nodep_ttl_df["ttl"] = qq_df.loc[cc_nodep_ttl_df.index,['lon-1-ttl']]

dttl = ttl_spectrum(cc_conndep_ttl_df,"ttl")
cttl = ttl_spectrum(cc_nodep_ttl_df,"ttl")


plt.figure(figsize=(8,1))
plt.plot(cttl.index, cttl, c="b", lw=2, alpha=0.50, label="no dep")
plt.plot(dttl.index, dttl, c="r", lw=2, alpha=0.50, label="ECN dep")
plt.annotate("Linux", xy=(65,0.08))
plt.annotate("Windows", xy=(129,0.04))
plt.legend()
plt.xlim(32,255)
plt.xticks([64,128,255])
plt.yticks([0,0.05,0.1])
plt.ylabel("density")
plt.xlabel("TTL")
plt.savefig(PLOTSDIR+"/connttl.pdf", bbox_inches="tight")
In [ ]: