The function 'read_dataframe' takes all the heave displacement data for a month and uses Welch's method to compute the Power Spectral Density.

A comparison between the [email protected] spectra output and Welch's method is shown at the bottom of the Notebook using the read_raw function to read a raw directly as opposed to using the month displacement files created by Hebtools

Details on the function for Welch's method in the Scipy Signals package. Functions defaults to segment size of 256 and overlap of 128.

Using the signal_error column from the dataframe we can filter the data supplied to Welch's method, this is useful in scenarios where low battery will have a significant effect on the spectra produced.

In [91]:
%matplotlib inline

import pandas as pd
import scipy.signal as sig
import scipy.stats as stats
import matplotlib.pyplot as plt
import os
import numpy as np
from datetime import datetime

def iterate_months(path, title=''):
    """ Changes to the specificed path assumed to be a buoy year directory and loads the displacement DataFrame for each month
        plotting the spectra for each method using Welch's method
    """
    print "path", path
    os.chdir(path)
    months_data = os.listdir('.')
    max_psd = {}
    for month_dir in months:
        month_path = path + month_dir
        try:
            welch_df_path = month_path + '/month_welch_df'
            if os.path.isfile(welch_df_path):
                welch_df = pd.read_pickle(welch_df_path)
            else:
                dataframe = read_dataframe(month_path)
                os.chdir(month_path)
                welch_df = create_welch_df(dataframe)
                welch_df.to_pickle(welch_df_path)
            plt, data, max_psd = calc_welch(welch_df, psd_dic = max_psd, save = True, line_name = month_dir )
        except:
            print "No Data", month_path
        os.chdir(path)
    plt.title(title) 
    return max_psd

def create_displacements_df(path):
    displacements_df = read_dataframe(path)
    try:
        os.chdir(path)
    except:
        print "No directory", path
    return displacements_df

def create_welch_df(dataframe):
    """Takes a single column displacement DataFrame in metres"""
    x,y = sig.welch(dataframe, 1.28, noverlap=None)
    dic = {'x':x,'y':y}
    welch_df = pd.DataFrame(dic)
    return welch_df

def calc_welch(welch_df, psd_dic = {}, save = False, line_name = ''):
    """ Given a dataframe will run welch's methods on the values, printing out the maxmimum power spectral density and the 
        frequency it occurs at, plots the output and returns the data
    """
    psd_dic[line_name] = [welch_df.x.max(), welch_df.y.ix[welch_df.y==welch_df.y.max()]]
    plt.plot(welch_df.x,welch_df.y, label=line_name)
    plt.ylabel('S(f) m^2/Hz')
    plt.xlabel('Frequency (Hz)')
    plt.legend()
    return plt, welch_df, psd_dic
        
        
def read_dataframe(path, start = None, end = None, filter_signal_error=True):
    """ Given a path to a folder containing a DataFrame file called raw_plus_std created by hebtools returns the a subset of the 
        heave column converted from centimetres to metres """
    try:
        displacement_df = pd.read_pickle(path + '/' + 'raw_plus_std')
        if filter_signal_error:
            displacement_df = displacement_df[displacement_df.signal_error == 0]            
        return displacement_df.heave[start:end]/100
    except:
        return pd.Series()

def read_raw(raw_file_path, seg_overlap = None, filter = False):
    """ Given a path to a raw file reads it using Pandas and returns a DataFrame object optionally filtered. """
    raw_df = pd.read_csv(raw_file_path, names=['signal_quality', 'heave', 'north', 'west'])
    if filter:
        raw_df = raw_df[raw_df.signal_quality<1]
    return raw_df

def plot_raw_file(raw_file_path, seg_overlap = None, filter = False, column = 'heave'):
    """ Takes path to a raw file as input with option to plot filter/unfiltered or any of the directions returns output of
        calc_welch the plt object and data in chart.
    """
    raw_df = read_raw(raw_file_path, seg_overlap, filter)
    print raw_df.describe()
    print "Skewness", stats.skew(raw_df[column])
    print "Kurtosis", stats.kurtosis(raw_df[column])
    welch_df = create_welch_df(raw_df[column]/100)
    return calc_welch(welch_df, line_name = raw_file_path + ' Filter ' + str(filter) )

if os.name == 'posix':
    path_prefix = '/media/Data/Datawell/'
    c_prefix = '/media/System/'
else:
    path_prefix = 'D:/Datawell/'
    c_prefix = 'C:/'

months = ['January','February','March','April','May','June','July','August','September','October','November','December']
buoys = ['Siadar_HebMarine1', 'Bragar_HebMarine2', 'Roag_Wavegen']
years = [2011, 2012, 2013]

Monthly spectra ( filtered )

In [84]:
def iterate_buoys_months(path_prefix, buoys, year):
    title_suffix = ' Buoy monthly Power Spectral Density '
    for buoy in buoys:
        fig = plt.figure(figsize=(20,10), dpi=100)
        year_path = path_prefix + buoy + '/' + year + '/'
        title = buoy + title_suffix + year
        max_psd = iterate_months(year_path, title)

iterate_buoys_months(path_prefix, buoys, '2012')        
path /media/Data/Datawell/Siadar_HebMarine1/2012/
path /media/Data/Datawell/Bragar_HebMarine2/2012/
path /media/Data/Datawell/Roag_Wavegen/2012/
No Data /media/Data/Datawell/Roag_Wavegen/2012/October
No Data /media/Data/Datawell/Roag_Wavegen/2012/November
No Data /media/Data/Datawell/Roag_Wavegen/2012/December
In [87]:
def iterate_buoys_one_month(buoys, year, month):
    for buoy in buoys:
        try:
            path = path_prefix + buoy + '/' + year + '/' + month
            os.chdir(path)
            #dataframe = read_dataframe(path)
            welch_df = pd.read_pickle('month_welch_df')
            #welch_df = create_welch_df(dataframe)
            plt, january_data, max_psd = calc_welch(welch_df, line_name = path)
        except:
            print "No data"
            
for month in months:
    fig = plt.figure(figsize=(20,10), dpi=100)
    iterate_buoys_one_month(buoys, '2012', month)
No data
No data
No data