Author | Last modified |
---|---|
Romain Jacob | 01-08-2019 |
This notebook describes and performs the parsing of the raw data for the link quality tests run on FlockLab. Most of the code would be reusable for parsing results collected on other networks, but one would have to adapt some of the parsing to the specific serial log format.
The rationale about the collection of this dataset is described in the following abstract.
Dataset: Wireless Link Quality Estimationon FlockLab – and Beyond
Romain Jacob, Reto Da Forno, Roman Trüb, Andreas Biri, Lothar Thiele
(Under submission) DATA '19 Proceedings of the 2nd Workshop on Data Acquisition To Analysis, 2019
Direct link
The entire dataset is openly available on Zenodo. The repository contains the collected raw measurement data and other related files, including this notebook and the abstract.
import os
import re
import csv
import numpy as np
import pandas as pd
from pathlib import Path
## Plotting
import seaborn as sns
import matplotlib.pyplot as plt
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
# Enable offline mode for plotly
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
# -----------------------------------------------------------------------------------------------------------------
# Parameter settings
# -----------------------------------------------------------------------------------------------------------------
# Targets to parse
targets = {'sky','dpp-cc430'}
# Path to resutls to parse
data_root_folder = Path("results")
# Expected list of node ids
node_list = [1, 2, 3, 4, 6, 7, 8, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 32, 33]
# Number of strobes sent
number_of_strobes = 100
# Custom file name prefix (optional)
result_file_prefix = '2019-07'
# Enable/Disable verbose print information
verbose_print = 0
# -----------------------------------------------------------------------------------------------------------------
# -----------------------------------------------------------------------------------------------------------------
# Parsing routine
# -----------------------------------------------------------------------------------------------------------------
for target in targets:
# Set the result file name
result_file = result_file_prefix+'_FlockLab_'+target+'.csv'
# Create the result file and write the header line
with open(result_file, 'w',newline='') as csvfile:
result_writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
result_writer.writerow([
'date_time',
'test_number',
'rf_channel',
'tx_power',
'payload',
'host_id',
'random_seed',
'snd_id',
'rcv_id',
'rcv_total',
'rcv_stream',
])
# Set the data folder and list of test dates
data_folder = data_root_folder / target
date_list = np.array(os.listdir(str(data_folder)))
mask = np.array([os.path.isdir(os.path.join(str(data_folder), date)) for date in date_list])
date_list = date_list[mask]
# Loop through the tests and parse the serial log file
for test_date in sorted(date_list)[:-2]: # the last two days are not yet available
# Open and read again the test scheduling log
sum_file_name = "sched_log.txt"
sum_file = data_folder / test_date / sum_file_name
g = open( str(sum_file), "r")
line = g.readline()
while line != '':
if "Test ID" in line:
# Get test number
tmp = line.split(':')
(test_number,tmp) = tmp[1].split(',')
test_number = int(test_number)
# Get test datetime
tmp = line.split(' ')
test_datetime = tmp[7]
# Create a clean tmp output file
h = open('tmp_output.txt', 'w')
h.close()
# Open the test serial log
try:
f = open( str(data_folder / test_date)+'/' + str(test_number) + "/serial.csv" , "r")
result_file_exist = 1
except FileNotFoundError:
result_file_exist = 0
if verbose_print:
print("%s / %s / %g : %s"
% (target,
test_date,
test_number,
'Results not found (failed or not yet fetched)'
))
# Get test metadata from the FlockLab xml config file
if result_file_exist:
# Reset the storing variables
(rf_channel,
tx_power,
host_id,
test_payload,
random_seed) = '','','','',''
# Parse the XML file
file_name = str(data_folder / test_date)+'/'+ str(test_number)+'/testconfiguration.xml'
with open(file_name, 'r') as xml_file:
for line in xml_file:
if 'RF_CH' in line:
tmp = line[0:-1].split('=')
rf_channel = int(tmp[1])
if 'TX_PW' in line:
tmp = line[0:-1].split('=')
tx_power = int(tmp[1])
if 'HOST' in line:
tmp = line[0:-1].split('=')
host_id = int(tmp[1])
if 'PAYLOAD' in line:
tmp = line[0:-1].split('=')
test_payload = int(tmp[1])
if 'SEED' in line:
tmp = line[0:-1].split('=')
random_seed = int(tmp[1])
if '</description>' in line:
# end of the description field
break
# Manually add missing data from the metadata
# -> Data is now included in new test results
# -> Line kept for re-parsing initial tests (Jul-Aug 2019)
if tx_power == '':
tx_power = 0 # 0dBm is default TX power
# Initialized list of failed nodes during for this test
failed_node_list = []
# Loop through the list of nodes
for node_id in node_list:
if result_file_exist:
# Re-start reading the serial file from the top
f.seek(0, 0)
# Reset tracking counters
snd_counter = len(node_list)
# -> To stop parsing the file once all data from `node_id` has been read
scheduled_to_strobe = 0
# -> Mark that the `node_id` has been sending its stobes
slot_failed_counter = 0
# -> Track the number of failed slots
# Read serial log first line
line = f.readline()
while line != '' and snd_counter !=0:
# Discard the header line
if line[0]=='#':
line = f.readline()
continue
tmp = line[0:-1].split(',')
if int(tmp[2]) == node_id:
# Reset the storing variables
(src_id,
rcv_total,
rcv_stream) = '','',''
# Parse the data from the serial log
if "Log:" in tmp[4]:
if "Strobing" in tmp[4]:
# Flag when `node_id` has been strobing
scheduled_to_strobe = 1
snd_counter -= 1
line = f.readline()
continue
if "Err!" in tmp[4]:
# Slot failed! Get `src_id` and set number of strobes received to null
slot_failed_counter += 1
(tag, src_id, rcv_total) = tmp[4].split(':')
rcv_total = ''
rcv_stream_string = ''
else:
# Get the `src_id` and strobes reception data
(tag, src_id, rcv_total, rcv_stream) = tmp[4].split(':')
rcv_stream = rcv_stream.split('.')
rcv_stream_string = ''
for byte in rcv_stream[:-1]:
rcv_stream_string+='{0:08b}'.format(int(byte))
# Save test data in tmp file
with open('tmp_output.txt', 'a', newline='') as csvfile:
result_writer = csv.writer(csvfile,
delimiter=',',
quotechar='|',
quoting=csv.QUOTE_MINIMAL)
result_writer.writerow([
test_datetime,
test_number,
rf_channel,
tx_power,
test_payload,
host_id,
random_seed,
src_id,
node_id,
rcv_total,
rcv_stream_string[:-(number_of_strobes%8)],
])
# Decrement the counter
snd_counter -= 1
# Read the next lime
line = f.readline()
# Mark list of failed nodes
if scheduled_to_strobe != 1:
# `node_id` did not strobe
failed_node_list.append(node_id)
if verbose_print:
print('%s / %s / %g : %s'
% (target,
test_date,
test_number,
'Node '+str(node_id)+' failed'
))
if slot_failed_counter != 0:
# `node_id` had some failed slots, something went wrong...
if verbose_print:
print('%s / %s / %g : %s'
% (target,
test_date,
test_number,
'Node '+str(node_id)+' failed, '+str(slot_failed_counter)+' slots failed'
))
# if all slots failed, mark the node as failed
if slot_failed_counter == (len(node_list)-1):
failed_node_list.append(node_id)
# Close serial file
if result_file_exist:
f.close()
# All nodes but the host failed, discard the test
if len(failed_node_list) == (len(node_list)-1):
if verbose_print:
print(target+" / "+test_date+" / "+str(test_number)+" : All nodes failed, test discarded.")
continue
# Write test data to the global result file
with open(result_file, 'a') as outfile:
# Read tmp output file and clean failed nodes data
with open('tmp_output.txt', 'r') as tmp_file:
# Prepare replacement data
rcv_stream_string = ''
for i in range(number_of_strobes):
rcv_stream_string += '0'
# Read tmp output file
tmp_data = tmp_file.read()
# Loop through failed nodes and replace corresponding data
for src_id in failed_node_list:
for dest_id in node_list:
old_line = ',%g,%g,.*' % (float(src_id),float(dest_id))
new_line = ',%g,%g,,' % (src_id,dest_id)
tmp_data = re.sub(old_line, new_line, tmp_data)
# Write cleaned test data to the global result file
outfile.write(tmp_data)
# Move on to the next test
line = g.readline()
# Clean-up
g.close()
os.remove('tmp_output.txt')
print("Done (%s)" % target)
Done (sky) Done (dpp-cc430)
The raw data is now parsed and available easily from the .cvs
files. We wrote simple routines to extract and plot two information from the .csv
files: a connectivity map for the FlockLab network, and the time series of average link quality.
From the pre-processed .csv
files, it is easy to compute a connectivity matrix for the FlockLab network.
The numerical values in our matrix are the median number of strobes received across all tests.
Grey squares indicate that there is no data available for the corresponding source <-> destination
pair.
def plot_connectivity_matrix(data_file, plot_title='', slice_size=0):
# Input check
if not isinstance(data_file, str):
raise ValueError("Wrong input type. Expect a string, got "+repr(data_file)+".")
# Import csv into dataframe
try:
df = pd.read_csv(data_file, delimiter=',', error_bad_lines=False)
except FileNotFoundError:
print(repr(data_file) + " not found")
# Keep only the relevant columns
df = df[['snd_id','rcv_id','rcv_total']]
# Create an empty DataFrame to hold the connectivity map data
connect_matrix = pd.DataFrame(np.nan, index=node_list, columns=node_list)
connect_matrix.index.name = 'Sender node ID'
# Fill the DataFrame
for snd_id in node_list:
for rcv_id in node_list:
data = df.loc[(df['snd_id'] == snd_id) & (df['rcv_id'] == rcv_id)].median()[2]
connect_matrix.at[snd_id, rcv_id] = data
# Select a subset of the matrix for ploting
if slice_size:
connect_matrix = connect_matrix.loc[:slice_size,:slice_size]
# Output the map
# sns.set(rc={'figure.figsize':(9,8)})
mask = connect_matrix.isnull()
ax = plt.axes()
if plot_title == '':
ax.set_title('FlockLab connectivity matrix', fontsize=22)
else:
ax.set_title(plot_title, fontsize=20)
ax = sns.heatmap(connect_matrix, mask=mask, annot=True, fmt='g', ax=ax)
ax.set_xlabel('Receiver node ID', fontsize=16)
ax.set_ylabel('Sender node ID', fontsize=16)
data_file = '2019-07_FlockLab_dpp-cc430.csv'
# Uncomment to generate the plot used in the abstract
# sns.set(rc={'figure.figsize':(9,8)})
# plot_connectivity_matrix(data_file, plot_title=' ', slice_size=20)
sns.set(rc={'figure.figsize':(14,12)})
plot_connectivity_matrix(data_file, plot_title='FlockLab connectivity matrix - TelosB - CH26')
data_file = '2019-07_FlockLab_sky.csv'
sns.set(rc={'figure.figsize':(14,12)})
plot_connectivity_matrix(data_file, plot_title='FlockLab connectivity matrix - DPP-cc430 - CH5')
From the pre-processed .csv
files, it is also easy to plot a time series for some given metric.
As an example, we wrote a routine that plot a time series of average link quality.
We define such an average by considering only links where the number of strobes received is above a given threshold (e.g., 50 - out of 100), then taking the mean of the number of strobes received between all source <-> destination
pairs.
Note. This definition of average link quality is arbitrary, and only serves as illustration.
def plot_link_quality_series(data_file, active_threshold, plot_title='', plot_size=[]):
# Input check
if not isinstance(data_file, str):
raise ValueError("Wrong input type. Expect a string, got "+repr(data_file)+".")
# Import csv into dataframe
try:
df = pd.read_csv(data_file, delimiter=',', error_bad_lines=False)
except FileNotFoundError:
print(repr(data_file) + " not found")
# Parse dates
df['date_time'] = pd.to_datetime(df['date_time'], utc=True)
# Set date as index
df.set_index('date_time', inplace=True)
# Compute the averate link quality
df.loc[df['rcv_total'] < active_threshold]= np.nan
df = df.groupby('date_time')['rcv_total'].mean()
# Create the traces to plot
trace = go.Scatter(
name='Data',
x=df.index,
y=df,
mode='markers'
)
data = [trace]
# Layout
opacity=0.2
layout = go.Layout(
title='FlockLab average link quality',
yaxis={'title':'Average link quality [# strobes received]'},
shapes=[
{
'type': 'rect',
# x-reference is assigned to the x-values
'xref': 'x',
# y-reference is assigned to the plot paper [0,1]
'yref': 'paper',
'x0': '2019-07-13 00:00:00+02:00',
'y0': 0,
'x1': '2019-07-15 00:00:00+02:00',
'y1': 1,
'fillcolor': '#d3d3d3',
'opacity': opacity,
'line': {
'width': 0,
}
},
{
'type': 'rect',
# x-reference is assigned to the x-values
'xref': 'x',
# y-reference is assigned to the plot paper [0,1]
'yref': 'paper',
'x0': '2019-07-20 00:00:00+02:00',
'y0': 0,
'x1': '2019-07-22 00:00:00+02:00',
'y1': 1,
'fillcolor': '#d3d3d3',
'opacity': opacity,
'line': {
'width': 0,
}
},
{
'type': 'rect',
# x-reference is assigned to the x-values
'xref': 'x',
# y-reference is assigned to the plot paper [0,1]
'yref': 'paper',
'x0': '2019-07-27 00:00:00+02:00',
'y0': 0,
'x1': '2019-07-29 00:00:00+02:00',
'y1': 1,
'fillcolor': '#d3d3d3',
'opacity':opacity,
'line': {
'width': 0,
}
}
]
)
# Generate the figure
figure=go.Figure(data=data,layout=layout)
if not plot_title == '':
figure.layout.update(title=plot_title)
if len(plot_size):
figure.layout.update(
width=plot_size[0],
height=plot_size[1])
iplot(figure)
return df
active_threshold = 50
data_file = '2019-07_FlockLab_dpp-cc430.csv'
# Uncomment to generate the plot used in the abstract
# plot_title=' '
# df = plot_link_quality_series(data_file, active_threshold, plot_title=plot_title, plot_size=[500,400])
plot_title = 'DPP-cc430 mote - FlockLab - CH5 - 0dBm'
df = plot_link_quality_series(data_file, active_threshold, plot_title=plot_title)
active_threshold = 50
data_file = '2019-07_FlockLab_sky.csv'
# Uncomment to generate the plot used in the abstract
# plot_title=' '
# df = plot_link_quality_series(data_file, active_threshold, plot_title=plot_title, plot_size=[500,400])
plot_title = 'Sky mote - FlockLab - CH26 - 0dBm'
df = plot_link_quality_series(data_file, active_threshold, plot_title=plot_title)