Notebook

Parsing FlockLab link quality test results¶

Author	Last modified
Romain Jacob	01-08-2019

This notebook describes and performs the parsing of the raw data for the link quality tests run on FlockLab. Most of the code would be reusable for parsing results collected on other networks, but one would have to adapt some of the parsing to the specific serial log format.

The rationale about the collection of this dataset is described in the following abstract.

Dataset: Wireless Link Quality Estimationon FlockLab – and Beyond

Romain Jacob, Reto Da Forno, Roman Trüb, Andreas Biri, Lothar Thiele
(Under submission) DATA '19 Proceedings of the 2nd Workshop on Data Acquisition To Analysis, 2019
Direct link

The entire dataset is openly available on Zenodo. The repository contains the collected raw measurement data and other related files, including this notebook and the abstract.

In [11]:

import os
import re
import csv
import numpy as np 
import pandas as pd
from pathlib import Path

## Plotting
import seaborn as sns
import matplotlib.pyplot as plt
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
# Enable offline mode for plotly
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [12]:

# -----------------------------------------------------------------------------------------------------------------
# Parameter settings
# -----------------------------------------------------------------------------------------------------------------
# Targets to parse
targets = {'sky','dpp-cc430'}

# Path to resutls to parse
data_root_folder = Path("results")

# Expected list of node ids
node_list = [1, 2, 3, 4, 6, 7, 8, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 32, 33]

# Number of strobes sent
number_of_strobes = 100

# Custom file name prefix (optional)
result_file_prefix = '2019-07'

# Enable/Disable verbose print information
verbose_print = 0
# -----------------------------------------------------------------------------------------------------------------

In [13]:

# -----------------------------------------------------------------------------------------------------------------
# Parsing routine
# -----------------------------------------------------------------------------------------------------------------
for target in targets:

    # Set the result file name
    result_file = result_file_prefix+'_FlockLab_'+target+'.csv'

    # Create the result file and write the header line
    with open(result_file, 'w',newline='') as csvfile:
        result_writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
        result_writer.writerow([
            'date_time', 
            'test_number', 
            'rf_channel',  
            'tx_power',  
            'payload', 
            'host_id', 
            'random_seed', 
            'snd_id', 
            'rcv_id', 
            'rcv_total',
            'rcv_stream',
        ])
      
    # Set the data folder and list of test dates  
    data_folder = data_root_folder / target
    date_list = np.array(os.listdir(str(data_folder)))
    mask = np.array([os.path.isdir(os.path.join(str(data_folder), date)) for date in date_list])
    date_list = date_list[mask]
    
    # Loop through the tests and parse the serial log file
    for test_date in sorted(date_list)[:-2]: # the last two days are not yet available

        # Open and read again the test scheduling log
        sum_file_name = "sched_log.txt"
        sum_file = data_folder / test_date / sum_file_name
        g = open( str(sum_file), "r")
        line = g.readline()

        while line != '':

            if "Test ID" in line:
                # Get test number
                tmp = line.split(':')
                (test_number,tmp) = tmp[1].split(',')
                test_number = int(test_number)

                # Get test datetime
                tmp = line.split(' ')
                test_datetime = tmp[7]

                # Create a clean tmp output file
                h = open('tmp_output.txt', 'w')
                h.close()

                # Open the test serial log
                try: 
                    f = open( str(data_folder / test_date)+'/' + str(test_number) + "/serial.csv" , "r")
                    result_file_exist = 1
                except FileNotFoundError: 
                    result_file_exist = 0
                    if verbose_print:
                        print("%s / %s / %g : %s"
                              % (target,
                                 test_date,
                                 test_number,
                                 'Results not found (failed or not yet fetched)'
                                ))
                    
                # Get test metadata from the FlockLab xml config file
                if result_file_exist:
                    
                    # Reset the storing variables
                    (rf_channel,  
                    tx_power,  
                    host_id,  
                    test_payload, 
                    random_seed) = '','','','',''
                            
                    # Parse the XML file
                    file_name = str(data_folder / test_date)+'/'+ str(test_number)+'/testconfiguration.xml'
                    with open(file_name, 'r') as xml_file: 
                        for line in xml_file:
                            if 'RF_CH' in line:
                                tmp = line[0:-1].split('=')
                                rf_channel = int(tmp[1])
                            if 'TX_PW' in line:
                                tmp = line[0:-1].split('=')
                                tx_power = int(tmp[1])
                            if 'HOST' in line:
                                tmp = line[0:-1].split('=')
                                host_id = int(tmp[1])
                            if 'PAYLOAD' in line:
                                tmp = line[0:-1].split('=')
                                test_payload = int(tmp[1])
                            if 'SEED' in line:
                                tmp = line[0:-1].split('=')
                                random_seed = int(tmp[1])
                            if '</description>' in line:
                                # end of the description field
                                break
                                
                    # Manually add missing data from the metadata
                    # -> Data is now included in new test results
                    # -> Line kept for re-parsing initial tests (Jul-Aug 2019)
                    if tx_power == '':
                        tx_power = 0 # 0dBm is default TX power

                # Initialized list of failed nodes during for this test
                failed_node_list = []
                
                # Loop through the list of nodes
                for node_id in node_list:
                    if result_file_exist:

                        # Re-start reading the serial file from the top
                        f.seek(0, 0)

                        # Reset tracking counters
                        snd_counter = len(node_list) 
                        # -> To stop parsing the file once all data from `node_id` has been read
                        scheduled_to_strobe = 0
                        # -> Mark that the `node_id` has been sending its stobes
                        slot_failed_counter = 0
                        # -> Track the number of failed slots

                        # Read serial log first line
                        line = f.readline()
                        
                        while line != '' and snd_counter !=0: 
                            
                            # Discard the header line
                            if line[0]=='#':
                                line = f.readline()
                                continue

                            tmp = line[0:-1].split(',')

                            if int(tmp[2]) == node_id:

                                # Reset the storing variables
                                (src_id,  
                                rcv_total,
                                rcv_stream) = '','',''
                                    
                                # Parse the data from the serial log
                                if "Log:" in tmp[4]:

                                    if "Strobing" in tmp[4]:
                                        # Flag when `node_id` has been strobing 
                                        scheduled_to_strobe = 1
                                        snd_counter -= 1
                                        line = f.readline()
                                        continue

                                    if "Err!" in tmp[4]:
                                        # Slot failed! Get `src_id` and set number of strobes received to null
                                        slot_failed_counter += 1
                                        (tag, src_id, rcv_total) = tmp[4].split(':')
                                        rcv_total = ''
                                        rcv_stream_string = ''

                                    else:
                                        # Get the `src_id` and strobes reception data
                                        (tag, src_id, rcv_total, rcv_stream) = tmp[4].split(':')
                                        rcv_stream = rcv_stream.split('.')
                                        rcv_stream_string = ''
                                        for byte in rcv_stream[:-1]:
                                            rcv_stream_string+='{0:08b}'.format(int(byte))

                                    # Save test data in tmp file
                                    with open('tmp_output.txt', 'a', newline='') as csvfile:
                                        result_writer = csv.writer(csvfile, 
                                                                   delimiter=',', 
                                                                   quotechar='|', 
                                                                   quoting=csv.QUOTE_MINIMAL)
                                        result_writer.writerow([
                                            test_datetime,
                                            test_number, 
                                            rf_channel,  
                                            tx_power,  
                                            test_payload, 
                                            host_id, 
                                            random_seed, 
                                            src_id, 
                                            node_id, 
                                            rcv_total,
                                            rcv_stream_string[:-(number_of_strobes%8)],
                                        ])

                                    # Decrement the counter
                                    snd_counter -= 1

                            # Read the next lime
                            line = f.readline()

                        # Mark list of failed nodes
                        if scheduled_to_strobe != 1:
                            # `node_id` did not strobe 
                            failed_node_list.append(node_id)
                            if verbose_print:
                                print('%s / %s / %g : %s' 
                                     % (target,
                                        test_date,
                                        test_number,
                                        'Node '+str(node_id)+' failed'
                                       ))

                        if slot_failed_counter != 0:
                            # `node_id` had some failed slots, something went wrong...
                            if verbose_print:
                                print('%s / %s / %g : %s' 
                                     % (target,
                                        test_date,
                                        test_number,
                                        'Node '+str(node_id)+' failed, '+str(slot_failed_counter)+' slots failed'
                                       ))
                            # if all slots failed, mark the node as failed
                            if slot_failed_counter == (len(node_list)-1):
                                failed_node_list.append(node_id)

                # Close serial file
                if result_file_exist:
                    f.close()
                
                # All nodes but the host failed, discard the test
                if len(failed_node_list) == (len(node_list)-1):
                    if verbose_print:
                        print(target+" / "+test_date+" / "+str(test_number)+" : All nodes failed, test discarded.")
                    continue
                
                # Write test data to the global result file    
                with open(result_file, 'a') as outfile:
                    
                    # Read tmp output file and clean failed nodes data
                    with open('tmp_output.txt', 'r') as tmp_file:
                        
                        # Prepare replacement data
                        rcv_stream_string = ''
                        for i in range(number_of_strobes):
                            rcv_stream_string += '0'
                        
                        # Read tmp output file
                        tmp_data = tmp_file.read()
                        
                        # Loop through failed nodes and replace corresponding data
                        for src_id in failed_node_list:
                            for dest_id in node_list:
                                old_line = ',%g,%g,.*' % (float(src_id),float(dest_id))
                                new_line = ',%g,%g,,' % (src_id,dest_id)
                                tmp_data = re.sub(old_line, new_line, tmp_data)
                                
                        # Write cleaned test data to the global result file
                        outfile.write(tmp_data)
                        
            # Move on to the next test 
            line = g.readline()

        # Clean-up
        g.close()
        os.remove('tmp_output.txt')
        
    print("Done (%s)" % target)

Done (sky)
Done (dpp-cc430)

The raw data is now parsed and available easily from the .cvs files. We wrote simple routines to extract and plot two information from the .csv files: a connectivity map for the FlockLab network, and the time series of average link quality.

FlockLab connectivity map¶

From the pre-processed .csv files, it is easy to compute a connectivity matrix for the FlockLab network.

The numerical values in our matrix are the median number of strobes received across all tests.
Grey squares indicate that there is no data available for the corresponding source <-> destination pair.

In [14]:

def plot_connectivity_matrix(data_file, plot_title='', slice_size=0):

    # Input check
    if not isinstance(data_file, str):
        raise ValueError("Wrong input type. Expect a string, got "+repr(data_file)+".")

    # Import csv into dataframe
    try: 
        df = pd.read_csv(data_file, delimiter=',', error_bad_lines=False)
    except FileNotFoundError: 
        print(repr(data_file) + " not found")

    # Keep only the relevant columns
    df = df[['snd_id','rcv_id','rcv_total']]

    # Create an empty DataFrame to hold the connectivity map data
    connect_matrix = pd.DataFrame(np.nan, index=node_list, columns=node_list)
    connect_matrix.index.name = 'Sender node ID'

    # Fill the DataFrame
    for snd_id in node_list:
        for rcv_id in node_list:
            data = df.loc[(df['snd_id'] == snd_id) & (df['rcv_id'] == rcv_id)].median()[2]
            connect_matrix.at[snd_id, rcv_id] = data

    # Select a subset of the matrix for ploting
    if slice_size:
        connect_matrix = connect_matrix.loc[:slice_size,:slice_size]
    
    # Output the map
#     sns.set(rc={'figure.figsize':(9,8)})
    mask = connect_matrix.isnull()
    ax = plt.axes()
    if plot_title == '':
        ax.set_title('FlockLab connectivity matrix', fontsize=22)
    else:
        ax.set_title(plot_title, fontsize=20)
    ax = sns.heatmap(connect_matrix, mask=mask, annot=True, fmt='g', ax=ax)
    ax.set_xlabel('Receiver node ID', fontsize=16)
    ax.set_ylabel('Sender node ID', fontsize=16)

In [15]:

data_file = '2019-07_FlockLab_dpp-cc430.csv'

# Uncomment to generate the plot used in the abstract
# sns.set(rc={'figure.figsize':(9,8)})
# plot_connectivity_matrix(data_file, plot_title=' ', slice_size=20)

sns.set(rc={'figure.figsize':(14,12)})
plot_connectivity_matrix(data_file, plot_title='FlockLab connectivity matrix - TelosB - CH26')

In [16]:

data_file = '2019-07_FlockLab_sky.csv'
sns.set(rc={'figure.figsize':(14,12)})
plot_connectivity_matrix(data_file, plot_title='FlockLab connectivity matrix - DPP-cc430 - CH5')

Average link quality time series¶

From the pre-processed .csv files, it is also easy to plot a time series for some given metric.

As an example, we wrote a routine that plot a time series of average link quality.
We define such an average by considering only links where the number of strobes received is above a given threshold (e.g., 50 - out of 100), then taking the mean of the number of strobes received between all source <-> destination pairs.

Note. This definition of average link quality is arbitrary, and only serves as illustration.

In [17]:

def plot_link_quality_series(data_file, active_threshold, plot_title='', plot_size=[]):
    
    # Input check
    if not isinstance(data_file, str):
        raise ValueError("Wrong input type. Expect a string, got "+repr(data_file)+".")

    # Import csv into dataframe
    try: 
        df = pd.read_csv(data_file, delimiter=',', error_bad_lines=False)
    except FileNotFoundError: 
        print(repr(data_file) + " not found")
        
    # Parse dates
    df['date_time'] = pd.to_datetime(df['date_time'], utc=True)
    # Set date as index
    df.set_index('date_time', inplace=True)
    
    
    # Compute the averate link quality
    df.loc[df['rcv_total'] < active_threshold]= np.nan
    df = df.groupby('date_time')['rcv_total'].mean()

    # Create the traces to plot
    trace = go.Scatter(
        name='Data',
        x=df.index,
        y=df,
        mode='markers'
    )
    data = [trace]
    
    # Layout
    opacity=0.2
    layout = go.Layout(
        title='FlockLab average link quality',
        yaxis={'title':'Average link quality [# strobes received]'},
        shapes=[
            {
                'type': 'rect',
                # x-reference is assigned to the x-values
                'xref': 'x',
                # y-reference is assigned to the plot paper [0,1]
                'yref': 'paper',
                'x0': '2019-07-13 00:00:00+02:00',
                'y0': 0,
                'x1': '2019-07-15 00:00:00+02:00',
                'y1': 1,
                'fillcolor': '#d3d3d3',
                'opacity': opacity,
                'line': {
                    'width': 0,
                }
            },
            {
                'type': 'rect',
                # x-reference is assigned to the x-values
                'xref': 'x',
                # y-reference is assigned to the plot paper [0,1]
                'yref': 'paper',
                'x0': '2019-07-20 00:00:00+02:00',
                'y0': 0,
                'x1': '2019-07-22 00:00:00+02:00',
                'y1': 1,
                'fillcolor': '#d3d3d3',
                'opacity': opacity,
                'line': {
                    'width': 0,
                }
            },
            {
                'type': 'rect',
                # x-reference is assigned to the x-values
                'xref': 'x',
                # y-reference is assigned to the plot paper [0,1]
                'yref': 'paper',
                'x0': '2019-07-27 00:00:00+02:00',
                'y0': 0,
                'x1': '2019-07-29 00:00:00+02:00',
                'y1': 1,
                'fillcolor': '#d3d3d3',
                'opacity':opacity,
                'line': {
                    'width': 0,
                }
            }
        ]
    )

    # Generate the figure
    figure=go.Figure(data=data,layout=layout)
    
    if not plot_title == '':
        figure.layout.update(title=plot_title)
        
    if len(plot_size):
        figure.layout.update(
            width=plot_size[0],
            height=plot_size[1])

    iplot(figure)
    
    return df

In [18]:

active_threshold = 50
data_file = '2019-07_FlockLab_dpp-cc430.csv'

# Uncomment to generate the plot used in the abstract
# plot_title=' '
# df = plot_link_quality_series(data_file, active_threshold, plot_title=plot_title, plot_size=[500,400])

plot_title = 'DPP-cc430 mote - FlockLab - CH5 - 0dBm'
df = plot_link_quality_series(data_file, active_threshold, plot_title=plot_title)

In [19]:

active_threshold = 50
data_file = '2019-07_FlockLab_sky.csv'

# Uncomment to generate the plot used in the abstract
# plot_title=' '
# df = plot_link_quality_series(data_file, active_threshold, plot_title=plot_title, plot_size=[500,400])

plot_title = 'Sky mote - FlockLab - CH26 - 0dBm'
df = plot_link_quality_series(data_file, active_threshold, plot_title=plot_title)

In [ ]: