Notebook

The function 'read_dataframe' takes all the heave displacement data for a month and uses Welch's method to compute the Power Spectral Density.

A comparison between the W@ves21 spectra output and Welch's method is shown at the bottom of the Notebook using the read_raw function to read a raw directly as opposed to using the month displacement files created by Hebtools

Details on the function for Welch's method in the Scipy Signals package. Functions defaults to segment size of 256 and overlap of 128.

Using the signal_error column from the dataframe we can filter the data supplied to Welch's method, this is useful in scenarios where low battery will have a significant effect on the spectra produced.

In [91]:

%matplotlib inline

import pandas as pd
import scipy.signal as sig
import scipy.stats as stats
import matplotlib.pyplot as plt
import os
import numpy as np
from datetime import datetime

def iterate_months(path, title=''):
    """ Changes to the specificed path assumed to be a buoy year directory and loads the displacement DataFrame for each month
        plotting the spectra for each method using Welch's method
    """
    print "path", path
    os.chdir(path)
    months_data = os.listdir('.')
    max_psd = {}
    for month_dir in months:
        month_path = path + month_dir
        try:
            welch_df_path = month_path + '/month_welch_df'
            if os.path.isfile(welch_df_path):
                welch_df = pd.read_pickle(welch_df_path)
            else:
                dataframe = read_dataframe(month_path)
                os.chdir(month_path)
                welch_df = create_welch_df(dataframe)
                welch_df.to_pickle(welch_df_path)
            plt, data, max_psd = calc_welch(welch_df, psd_dic = max_psd, save = True, line_name = month_dir )
        except:
            print "No Data", month_path
        os.chdir(path)
    plt.title(title) 
    return max_psd

def create_displacements_df(path):
    displacements_df = read_dataframe(path)
    try:
        os.chdir(path)
    except:
        print "No directory", path
    return displacements_df

def create_welch_df(dataframe):
    """Takes a single column displacement DataFrame in metres"""
    x,y = sig.welch(dataframe, 1.28, noverlap=None)
    dic = {'x':x,'y':y}
    welch_df = pd.DataFrame(dic)
    return welch_df

def calc_welch(welch_df, psd_dic = {}, save = False, line_name = ''):
    """ Given a dataframe will run welch's methods on the values, printing out the maxmimum power spectral density and the 
        frequency it occurs at, plots the output and returns the data
    """
    psd_dic[line_name] = [welch_df.x.max(), welch_df.y.ix[welch_df.y==welch_df.y.max()]]
    plt.plot(welch_df.x,welch_df.y, label=line_name)
    plt.ylabel('S(f) m^2/Hz')
    plt.xlabel('Frequency (Hz)')
    plt.legend()
    return plt, welch_df, psd_dic
        
        
def read_dataframe(path, start = None, end = None, filter_signal_error=True):
    """ Given a path to a folder containing a DataFrame file called raw_plus_std created by hebtools returns the a subset of the 
        heave column converted from centimetres to metres """
    try:
        displacement_df = pd.read_pickle(path + '/' + 'raw_plus_std')
        if filter_signal_error:
            displacement_df = displacement_df[displacement_df.signal_error == 0]            
        return displacement_df.heave[start:end]/100
    except:
        return pd.Series()

def read_raw(raw_file_path, seg_overlap = None, filter = False):
    """ Given a path to a raw file reads it using Pandas and returns a DataFrame object optionally filtered. """
    raw_df = pd.read_csv(raw_file_path, names=['signal_quality', 'heave', 'north', 'west'])
    if filter:
        raw_df = raw_df[raw_df.signal_quality<1]
    return raw_df

def plot_raw_file(raw_file_path, seg_overlap = None, filter = False, column = 'heave'):
    """ Takes path to a raw file as input with option to plot filter/unfiltered or any of the directions returns output of
        calc_welch the plt object and data in chart.
    """
    raw_df = read_raw(raw_file_path, seg_overlap, filter)
    print raw_df.describe()
    print "Skewness", stats.skew(raw_df[column])
    print "Kurtosis", stats.kurtosis(raw_df[column])
    welch_df = create_welch_df(raw_df[column]/100)
    return calc_welch(welch_df, line_name = raw_file_path + ' Filter ' + str(filter) )

if os.name == 'posix':
    path_prefix = '/media/Data/Datawell/'
    c_prefix = '/media/System/'
else:
    path_prefix = 'D:/Datawell/'
    c_prefix = 'C:/'

months = ['January','February','March','April','May','June','July','August','September','October','November','December']
buoys = ['Siadar_HebMarine1', 'Bragar_HebMarine2', 'Roag_Wavegen']
years = [2011, 2012, 2013]

Monthly spectra ( filtered )¶

In [84]:

def iterate_buoys_months(path_prefix, buoys, year):
    title_suffix = ' Buoy monthly Power Spectral Density '
    for buoy in buoys:
        fig = plt.figure(figsize=(20,10), dpi=100)
        year_path = path_prefix + buoy + '/' + year + '/'
        title = buoy + title_suffix + year
        max_psd = iterate_months(year_path, title)

iterate_buoys_months(path_prefix, buoys, '2012')        

path /media/Data/Datawell/Siadar_HebMarine1/2012/
path /media/Data/Datawell/Bragar_HebMarine2/2012/
path /media/Data/Datawell/Roag_Wavegen/2012/
No Data /media/Data/Datawell/Roag_Wavegen/2012/October
No Data /media/Data/Datawell/Roag_Wavegen/2012/November
No Data /media/Data/Datawell/Roag_Wavegen/2012/December

In [87]:

def iterate_buoys_one_month(buoys, year, month):
    for buoy in buoys:
        try:
            path = path_prefix + buoy + '/' + year + '/' + month
            os.chdir(path)
            #dataframe = read_dataframe(path)
            welch_df = pd.read_pickle('month_welch_df')
            #welch_df = create_welch_df(dataframe)
            plt, january_data, max_psd = calc_welch(welch_df, line_name = path)
        except:
            print "No data"
            
for month in months:
    fig = plt.figure(figsize=(20,10), dpi=100)
    iterate_buoys_one_month(buoys, '2012', month)

No data
No data
No data

Half hourly spectra computed from raw files¶

In [93]:

fig = plt.figure(figsize=(20,10), dpi=100)
plt, data, psd_dic = plot_raw_file(path_prefix + 'Bragar_HebMarine2/2012/February/Bragar_HebMarine2}2012-02-01T01h00Z.raw')
plt.title('First half hour of February 2012 Bragar buoy')

       signal_quality        heave       north         west
count            2304  2304.000000  2304.00000  2304.000000
mean                0    -0.035590     0.02691     0.013455
std                 0    46.150376    35.08469    40.543007
min                 0  -174.000000  -117.00000  -161.000000
25%                 0   -31.000000   -23.00000   -25.000000
50%                 0    -1.000000     0.00000     1.000000
75%                 0    30.000000    22.00000    27.000000
max                 0   190.000000   144.00000   145.000000
Skewness 0.0848958501944
Kurtosis 0.238120488036

Out[93]:

<matplotlib.text.Text at 0x7f0c292809d0>

Comparing with W@ves21 output¶

In [4]:

from IPython.display import Image
Image(filename='D:/image.png')

Out[4]:

4th February Storm¶

In [96]:

fig = plt.figure(figsize=(20,10), dpi=100)
plt, heave_spectra_unfiltered_2013_02_04_13_00, psd_dic = plot_raw_file(path_prefix + 'Bragar_HebMarine2/2013/February/Bragar_HebMarine2}2013-02-04T13h00Z.raw', filter = False)
plt, data_filtered_02_04_13_00, psd_dic = plot_raw_file(path_prefix + 'Bragar_HebMarine2/2013/February/Bragar_HebMarine2}2013-02-04T13h00Z.raw', filter = True)
plt.title('Comparing filtered and unfiltered spectra at the peak of the 4th Feb 2013 Storm')
plt.legend()

       signal_quality        heave        north         west
count     2381.000000  2381.000000  2381.000000  2381.000000
mean         0.000420    -0.297774    -1.735405    -1.172617
std          0.020494   351.459085   287.602407   491.008101
min          0.000000 -1114.000000  -957.000000 -1766.000000
25%          0.000000  -246.000000  -189.000000  -340.000000
50%          0.000000   -13.000000     5.000000     7.000000
75%          0.000000   230.000000   191.000000   360.000000
max          1.000000  1307.000000   959.000000  1684.000000
Skewness 0.256391209262
Kurtosis 0.0473473541811
       signal_quality        heave        north         west
count            2380  2380.000000  2380.000000  2380.000000
mean                0    -0.282773    -1.669328    -1.009664
std                 0   351.532181   287.644768   491.046887
min                 0 -1114.000000  -957.000000 -1766.000000
25%                 0  -246.250000  -189.000000  -339.250000
50%                 0   -12.500000     5.000000     7.500000
75%                 0   230.000000   191.250000   360.000000
max                 0  1307.000000   959.000000  1684.000000
Skewness 0.256211424877
Kurtosis 0.0460501210149

Out[96]:

<matplotlib.legend.Legend at 0x7f0c293a70d0>

In [74]:

# Find the raw file with the worst Signal Quality in February 2013 to see the effect of filtering
os.chdir(path_prefix + 'Bragar_HebMarine2/2013/February/')
raw_displacements = pd.read_pickle('raw_plus_std')
grouped_raw_displacements = raw_displacements.groupby('file_name')
sig_qual_files_sum = grouped_raw_displacements.sig_qual.sum()
sig_qual_files_sum[sig_qual_files_sum.values == sig_qual_files_sum.max()]

Out[74]:

file_name
Bragar_HebMarine2}2013-02-04T00h30Z.raw    743
Name: sig_qual, dtype: int32

In [97]:

fig = plt.figure(figsize=(20,10), dpi=100)
plt, filtered_2013_02_04_00_30, psd_filtered = plot_raw_file(path_prefix + 'Bragar_HebMarine2/2013/February/Bragar_HebMarine2}2013-02-04T00h30Z.raw', filter = True)
plt, unfiltered_2013_02_04_00_30, psd_unfiltered = plot_raw_file(path_prefix + 'Bragar_HebMarine2/2013/February/Bragar_HebMarine2}2013-02-04T00h30Z.raw', filter = False)
plt.title('File with worst Signal Quality February 2013 Bragar buoy')

       signal_quality        heave        north         west
count            2120  2120.000000  2120.000000  2120.000000
mean                0     0.426415    -0.175000    -0.640094
std                 0   174.139625   118.291754   185.031924
min                 0  -500.000000  -381.000000  -950.000000
25%                 0  -120.250000   -74.250000  -115.000000
50%                 0     0.000000     8.000000    -1.000000
75%                 0   111.000000    77.000000   120.000000
max                 0   779.000000   385.000000   835.000000
Skewness 0.13114386235
Kurtosis 0.298591077683
       signal_quality        heave        north         west
count     2303.000000  2303.000000  2303.000000  2303.000000
mean         0.322623    -0.060790    -0.102041    -1.276162
std          1.202967   179.074745   132.482091   203.005964
min          0.000000  -859.000000 -1079.000000 -1580.000000
25%          0.000000  -121.000000   -74.000000  -114.000000
50%          0.000000     0.000000     8.000000     0.000000
75%          0.000000   111.000000    79.000000   121.500000
max          6.000000  1401.000000  1330.000000  1632.000000
Skewness 0.253086326178
Kurtosis 1.97276217782

Out[97]:

<matplotlib.text.Text at 0x7f0c26f95b50>

In [100]:

# There is a buoy computed spectra dated 2013/02/04 17:33 this computes a similar spectra for the same 
# period from the DataFrame produced by hebtools
path = 'Bragar_HebMarine2/2013/February'
os.chdir(path_prefix + path)
feb_raw = pd.read_pickle('raw_plus_std')
subset = feb_raw[datetime(2013,02,03,23,47,33):datetime(2013,02,04,00,17,33)]
fig = plt.figure(figsize=(20,10), dpi=100)
welch_df = create_welch_df(subset.heave/100)
plt, data, psd = calc_welch(welch_df, line_name = path)
plt.title("Dataframe computed spectra 3rd/4th Feb 23:57-00:17 to match Buoy spectra")

Out[100]:

<matplotlib.text.Text at 0x7f0c292396d0>

Comparison with W@ves21 Spectra output¶

In [29]:

# Example W@ves21 Spectra plot
from IPython.display import Image
Image(filename='D:\Profiles\le12jm\My Documents\My Pictures\plot_02_04_00_30.png')

Out[29]:

In [101]:

def dat_to_x_y_list(data_list_strings):
    """ Function which takes the strings from an open dat file and outputs a nested list of x and y values for plotting
        dat files can be output by right clicking on spectra plot in W@ves21 and selecting 'Save Chart Data' and changing
        the default file from OC to dat """
    data = ' '.join(data_list_strings).split(' ')
    data_values = []
    values = []
    for i in data:
        if i == 'Y':
            data_values.append(values)
            values = []
        if '.' in i:
            values.append(i)
    data_values.append(values)
    return data_values

In [104]:

# Comparing a Datawell buoy and pc spectra taken over a similar period ( PC 00:00-00:30, Buoy 23:47-00:17 )
# Also comparing with a spectra produced from a hebtools dataframe for the period of the buoy spectra
os.chdir(path_prefix + "Bragar_HebMarine2/2013/February/")
pc_spectra = dat_to_x_y_list(open('20140904110659.dat'))
buoy_spectra = dat_to_x_y_list(open('2013_02_04_00_47_33_buoy_spectra.dat'))
fig = plt.figure(figsize=(20,10), dpi=100)
plt.plot(pc_spectra[0],pc_spectra[1], label='PC Spectra 00:30:00')
plt.ylabel('S(f) m^2/Hz')
plt.xlabel('Frequency (Hz)')
plt.title('Plot of Spectra 4th February 2013 - 00:30')
plt.plot(buoy_spectra[0],buoy_spectra[1], label='Buoy Spectra 00:17:33')
welch_df = create_welch_df(subset.heave/100)
plt, data, psd = calc_welch(welch_df, line_name = 'Dataframe Spectra 23:47-00:17')
plt.legend()

Out[104]:

<matplotlib.legend.Legend at 0x7f0c290dab10>

In [106]:

fig = plt.figure(figsize=(20,10), dpi=100)
plt, west_filtered_2013_02_04_00_30, psd = plot_raw_file(path_prefix + 'Bragar_HebMarine2/2013/February/Bragar_HebMarine2}2013-02-04T00h30Z.raw', filter = True, column='west')
plt.title('West Displacement - Peak of Storm February 2013 Bragar buoy')

       signal_quality        heave        north         west
count            2120  2120.000000  2120.000000  2120.000000
mean                0     0.426415    -0.175000    -0.640094
std                 0   174.139625   118.291754   185.031924
min                 0  -500.000000  -381.000000  -950.000000
25%                 0  -120.250000   -74.250000  -115.000000
50%                 0     0.000000     8.000000    -1.000000
75%                 0   111.000000    77.000000   120.000000
max                 0   779.000000   385.000000   835.000000
Skewness -0.135171532284
Kurtosis 1.02151202418

Out[106]:

<matplotlib.text.Text at 0x7f0c292c0210>

In [110]:

fig = plt.figure(figsize=(20,10), dpi=100)
plt.plot(filtered_2013_02_04_00_30.x, filtered_2013_02_04_00_30.y, label = "Dataframe Filtered")
plt.plot(unfiltered_2013_02_04_00_30.x, unfiltered_2013_02_04_00_30.y, label = "Dataframe Unfiltered")
plt.plot(pc_spectra[0], pc_spectra[1], label = "Datawell PC Spectra")
plt.plot(west_filtered_2013_02_04_00_30.x, west_filtered_2013_02_04_00_30.y, label = "Dataframe West Displacement Filtered")
plt.title('Spectra Comparison Bragar February 4th 2014 00:00-00:30')
plt.legend()

Out[110]:

<matplotlib.legend.Legend at 0x7f0c27188650>

Why is there a significant difference between the Max PSD for Datawell and the Spectra produced from raw displacements with Welch's method. Perhaps because there are twice as many points ( divisions ) for the Welch's method produced data. It doesn't seem possible to lower the number of frequencies that Welch's method is output to.

Statistics ( Skewness and Kurtosis )¶

In [38]:

from IPython.display import Image
Image(filename='D:\Profiles\le12jm\My Documents\My Pictures\wave_stats_02_04_00_30.png')

Out[38]:

The W@ves21 spectra has a far more pronounced peak. Than either the filtered or nonfiltered spectra from Welch's method. This could be to do with the filtering value used on signal_quality but more likely to do with the output bins. A upper filter bound of 1 on signal quality gives a good match for the statistics values in the above table.

Skew and Kurtosis values in wave statistics are heave, north and west are easily calculable in Numpy. The skew and kurtosis in the spt files are per frequency. It is not clear how to get the directional spectra from the displacements. Although the spectra produced from putting the heave into Welch's method gives a pretty good match to the one produced by the SPT. I can't see how you could decompose from the spectral plot to direction because only vertical displacement goes into it. How does the buoy itself/RFBuoy/W@ves21 take care of North and West into spectra, do those directions account for the difference in Peaks seen in the Bragar-02-04-00-30 file plotted above?

Plotting North direction displacement Spectra¶

In [112]:

fig = plt.figure(figsize=(20,10), dpi=100)
plt, data, psd = plot_raw_file(path_prefix + 'Bragar_HebMarine2/2013/February/Bragar_HebMarine2}2013-02-04T00h30Z.raw', filter = True, column='north')
plt.title('North Displacement - Peak of Storm February 2013 Bragar buoy')

       signal_quality        heave        north         west
count            2120  2120.000000  2120.000000  2120.000000
mean                0     0.426415    -0.175000    -0.640094
std                 0   174.139625   118.291754   185.031924
min                 0  -500.000000  -381.000000  -950.000000
25%                 0  -120.250000   -74.250000  -115.000000
50%                 0     0.000000     8.000000    -1.000000
75%                 0   111.000000    77.000000   120.000000
max                 0   779.000000   385.000000   835.000000
Skewness -0.172617192558
Kurtosis 0.117161959229

Out[112]:

<matplotlib.text.Text at 0x7f0c293d2990>

In [44]:

# Percentage of records returned with an upper filter bound of 1.

2120/2303.0

Out[44]:

0.9205384281372123

In [76]:

from IPython.display import Image
Image(filename='D:\Profiles\le12jm\My Documents\My Pictures\Spectra_Summary_00_30_W@ves21.png')

Out[76]:

Could the information about Power Spectral Density be obtained straight from the displacements? No, a mean period could be obtained or some form of weighted period relative to wave height, which is essentially what Welch's method produces averaged.

In [112]:

If we want to use pre calculated welch dataframes the function calc_welch needs to be split up, the pre calculated ones are only for months. There needs to be a distinction in the call whether it is a full month or subset.

In [ ]: