The function 'read_dataframe' takes all the heave displacement data for a month and uses Welch's method to compute the Power Spectral Density.
A comparison between the W@ves21 spectra output and Welch's method is shown at the bottom of the Notebook using the read_raw function to read a raw directly as opposed to using the month displacement files created by Hebtools
Details on the function for Welch's method in the Scipy Signals package. Functions defaults to segment size of 256 and overlap of 128.
Using the signal_error column from the dataframe we can filter the data supplied to Welch's method, this is useful in scenarios where low battery will have a significant effect on the spectra produced.
%matplotlib inline
import pandas as pd
import scipy.signal as sig
import scipy.stats as stats
import matplotlib.pyplot as plt
import os
import numpy as np
from datetime import datetime
def iterate_months(path, title=''):
""" Changes to the specificed path assumed to be a buoy year directory and loads the displacement DataFrame for each month
plotting the spectra for each method using Welch's method
"""
print "path", path
os.chdir(path)
months_data = os.listdir('.')
max_psd = {}
for month_dir in months:
month_path = path + month_dir
try:
welch_df_path = month_path + '/month_welch_df'
if os.path.isfile(welch_df_path):
welch_df = pd.read_pickle(welch_df_path)
else:
dataframe = read_dataframe(month_path)
os.chdir(month_path)
welch_df = create_welch_df(dataframe)
welch_df.to_pickle(welch_df_path)
plt, data, max_psd = calc_welch(welch_df, psd_dic = max_psd, save = True, line_name = month_dir )
except:
print "No Data", month_path
os.chdir(path)
plt.title(title)
return max_psd
def create_displacements_df(path):
displacements_df = read_dataframe(path)
try:
os.chdir(path)
except:
print "No directory", path
return displacements_df
def create_welch_df(dataframe):
"""Takes a single column displacement DataFrame in metres"""
x,y = sig.welch(dataframe, 1.28, noverlap=None)
dic = {'x':x,'y':y}
welch_df = pd.DataFrame(dic)
return welch_df
def calc_welch(welch_df, psd_dic = {}, save = False, line_name = ''):
""" Given a dataframe will run welch's methods on the values, printing out the maxmimum power spectral density and the
frequency it occurs at, plots the output and returns the data
"""
psd_dic[line_name] = [welch_df.x.max(), welch_df.y.ix[welch_df.y==welch_df.y.max()]]
plt.plot(welch_df.x,welch_df.y, label=line_name)
plt.ylabel('S(f) m^2/Hz')
plt.xlabel('Frequency (Hz)')
plt.legend()
return plt, welch_df, psd_dic
def read_dataframe(path, start = None, end = None, filter_signal_error=True):
""" Given a path to a folder containing a DataFrame file called raw_plus_std created by hebtools returns the a subset of the
heave column converted from centimetres to metres """
try:
displacement_df = pd.read_pickle(path + '/' + 'raw_plus_std')
if filter_signal_error:
displacement_df = displacement_df[displacement_df.signal_error == 0]
return displacement_df.heave[start:end]/100
except:
return pd.Series()
def read_raw(raw_file_path, seg_overlap = None, filter = False):
""" Given a path to a raw file reads it using Pandas and returns a DataFrame object optionally filtered. """
raw_df = pd.read_csv(raw_file_path, names=['signal_quality', 'heave', 'north', 'west'])
if filter:
raw_df = raw_df[raw_df.signal_quality<1]
return raw_df
def plot_raw_file(raw_file_path, seg_overlap = None, filter = False, column = 'heave'):
""" Takes path to a raw file as input with option to plot filter/unfiltered or any of the directions returns output of
calc_welch the plt object and data in chart.
"""
raw_df = read_raw(raw_file_path, seg_overlap, filter)
print raw_df.describe()
print "Skewness", stats.skew(raw_df[column])
print "Kurtosis", stats.kurtosis(raw_df[column])
welch_df = create_welch_df(raw_df[column]/100)
return calc_welch(welch_df, line_name = raw_file_path + ' Filter ' + str(filter) )
if os.name == 'posix':
path_prefix = '/media/Data/Datawell/'
c_prefix = '/media/System/'
else:
path_prefix = 'D:/Datawell/'
c_prefix = 'C:/'
months = ['January','February','March','April','May','June','July','August','September','October','November','December']
buoys = ['Siadar_HebMarine1', 'Bragar_HebMarine2', 'Roag_Wavegen']
years = [2011, 2012, 2013]
def iterate_buoys_months(path_prefix, buoys, year):
title_suffix = ' Buoy monthly Power Spectral Density '
for buoy in buoys:
fig = plt.figure(figsize=(20,10), dpi=100)
year_path = path_prefix + buoy + '/' + year + '/'
title = buoy + title_suffix + year
max_psd = iterate_months(year_path, title)
iterate_buoys_months(path_prefix, buoys, '2012')
path /media/Data/Datawell/Siadar_HebMarine1/2012/ path /media/Data/Datawell/Bragar_HebMarine2/2012/ path /media/Data/Datawell/Roag_Wavegen/2012/ No Data /media/Data/Datawell/Roag_Wavegen/2012/October No Data /media/Data/Datawell/Roag_Wavegen/2012/November No Data /media/Data/Datawell/Roag_Wavegen/2012/December
def iterate_buoys_one_month(buoys, year, month):
for buoy in buoys:
try:
path = path_prefix + buoy + '/' + year + '/' + month
os.chdir(path)
#dataframe = read_dataframe(path)
welch_df = pd.read_pickle('month_welch_df')
#welch_df = create_welch_df(dataframe)
plt, january_data, max_psd = calc_welch(welch_df, line_name = path)
except:
print "No data"
for month in months:
fig = plt.figure(figsize=(20,10), dpi=100)
iterate_buoys_one_month(buoys, '2012', month)
No data No data No data
fig = plt.figure(figsize=(20,10), dpi=100)
plt, data, psd_dic = plot_raw_file(path_prefix + 'Bragar_HebMarine2/2012/February/Bragar_HebMarine2}2012-02-01T01h00Z.raw')
plt.title('First half hour of February 2012 Bragar buoy')
signal_quality heave north west count 2304 2304.000000 2304.00000 2304.000000 mean 0 -0.035590 0.02691 0.013455 std 0 46.150376 35.08469 40.543007 min 0 -174.000000 -117.00000 -161.000000 25% 0 -31.000000 -23.00000 -25.000000 50% 0 -1.000000 0.00000 1.000000 75% 0 30.000000 22.00000 27.000000 max 0 190.000000 144.00000 145.000000 Skewness 0.0848958501944 Kurtosis 0.238120488036
<matplotlib.text.Text at 0x7f0c292809d0>
from IPython.display import Image
Image(filename='D:/image.png')
fig = plt.figure(figsize=(20,10), dpi=100)
plt, heave_spectra_unfiltered_2013_02_04_13_00, psd_dic = plot_raw_file(path_prefix + 'Bragar_HebMarine2/2013/February/Bragar_HebMarine2}2013-02-04T13h00Z.raw', filter = False)
plt, data_filtered_02_04_13_00, psd_dic = plot_raw_file(path_prefix + 'Bragar_HebMarine2/2013/February/Bragar_HebMarine2}2013-02-04T13h00Z.raw', filter = True)
plt.title('Comparing filtered and unfiltered spectra at the peak of the 4th Feb 2013 Storm')
plt.legend()
signal_quality heave north west count 2381.000000 2381.000000 2381.000000 2381.000000 mean 0.000420 -0.297774 -1.735405 -1.172617 std 0.020494 351.459085 287.602407 491.008101 min 0.000000 -1114.000000 -957.000000 -1766.000000 25% 0.000000 -246.000000 -189.000000 -340.000000 50% 0.000000 -13.000000 5.000000 7.000000 75% 0.000000 230.000000 191.000000 360.000000 max 1.000000 1307.000000 959.000000 1684.000000 Skewness 0.256391209262 Kurtosis 0.0473473541811 signal_quality heave north west count 2380 2380.000000 2380.000000 2380.000000 mean 0 -0.282773 -1.669328 -1.009664 std 0 351.532181 287.644768 491.046887 min 0 -1114.000000 -957.000000 -1766.000000 25% 0 -246.250000 -189.000000 -339.250000 50% 0 -12.500000 5.000000 7.500000 75% 0 230.000000 191.250000 360.000000 max 0 1307.000000 959.000000 1684.000000 Skewness 0.256211424877 Kurtosis 0.0460501210149
<matplotlib.legend.Legend at 0x7f0c293a70d0>
# Find the raw file with the worst Signal Quality in February 2013 to see the effect of filtering
os.chdir(path_prefix + 'Bragar_HebMarine2/2013/February/')
raw_displacements = pd.read_pickle('raw_plus_std')
grouped_raw_displacements = raw_displacements.groupby('file_name')
sig_qual_files_sum = grouped_raw_displacements.sig_qual.sum()
sig_qual_files_sum[sig_qual_files_sum.values == sig_qual_files_sum.max()]
file_name Bragar_HebMarine2}2013-02-04T00h30Z.raw 743 Name: sig_qual, dtype: int32
fig = plt.figure(figsize=(20,10), dpi=100)
plt, filtered_2013_02_04_00_30, psd_filtered = plot_raw_file(path_prefix + 'Bragar_HebMarine2/2013/February/Bragar_HebMarine2}2013-02-04T00h30Z.raw', filter = True)
plt, unfiltered_2013_02_04_00_30, psd_unfiltered = plot_raw_file(path_prefix + 'Bragar_HebMarine2/2013/February/Bragar_HebMarine2}2013-02-04T00h30Z.raw', filter = False)
plt.title('File with worst Signal Quality February 2013 Bragar buoy')
signal_quality heave north west count 2120 2120.000000 2120.000000 2120.000000 mean 0 0.426415 -0.175000 -0.640094 std 0 174.139625 118.291754 185.031924 min 0 -500.000000 -381.000000 -950.000000 25% 0 -120.250000 -74.250000 -115.000000 50% 0 0.000000 8.000000 -1.000000 75% 0 111.000000 77.000000 120.000000 max 0 779.000000 385.000000 835.000000 Skewness 0.13114386235 Kurtosis 0.298591077683 signal_quality heave north west count 2303.000000 2303.000000 2303.000000 2303.000000 mean 0.322623 -0.060790 -0.102041 -1.276162 std 1.202967 179.074745 132.482091 203.005964 min 0.000000 -859.000000 -1079.000000 -1580.000000 25% 0.000000 -121.000000 -74.000000 -114.000000 50% 0.000000 0.000000 8.000000 0.000000 75% 0.000000 111.000000 79.000000 121.500000 max 6.000000 1401.000000 1330.000000 1632.000000 Skewness 0.253086326178 Kurtosis 1.97276217782
<matplotlib.text.Text at 0x7f0c26f95b50>
# There is a buoy computed spectra dated 2013/02/04 17:33 this computes a similar spectra for the same
# period from the DataFrame produced by hebtools
path = 'Bragar_HebMarine2/2013/February'
os.chdir(path_prefix + path)
feb_raw = pd.read_pickle('raw_plus_std')
subset = feb_raw[datetime(2013,02,03,23,47,33):datetime(2013,02,04,00,17,33)]
fig = plt.figure(figsize=(20,10), dpi=100)
welch_df = create_welch_df(subset.heave/100)
plt, data, psd = calc_welch(welch_df, line_name = path)
plt.title("Dataframe computed spectra 3rd/4th Feb 23:57-00:17 to match Buoy spectra")
<matplotlib.text.Text at 0x7f0c292396d0>
# Example W@ves21 Spectra plot
from IPython.display import Image
Image(filename='D:\Profiles\le12jm\My Documents\My Pictures\plot_02_04_00_30.png')
def dat_to_x_y_list(data_list_strings):
""" Function which takes the strings from an open dat file and outputs a nested list of x and y values for plotting
dat files can be output by right clicking on spectra plot in W@ves21 and selecting 'Save Chart Data' and changing
the default file from OC to dat """
data = ' '.join(data_list_strings).split(' ')
data_values = []
values = []
for i in data:
if i == 'Y':
data_values.append(values)
values = []
if '.' in i:
values.append(i)
data_values.append(values)
return data_values
# Comparing a Datawell buoy and pc spectra taken over a similar period ( PC 00:00-00:30, Buoy 23:47-00:17 )
# Also comparing with a spectra produced from a hebtools dataframe for the period of the buoy spectra
os.chdir(path_prefix + "Bragar_HebMarine2/2013/February/")
pc_spectra = dat_to_x_y_list(open('20140904110659.dat'))
buoy_spectra = dat_to_x_y_list(open('2013_02_04_00_47_33_buoy_spectra.dat'))
fig = plt.figure(figsize=(20,10), dpi=100)
plt.plot(pc_spectra[0],pc_spectra[1], label='PC Spectra 00:30:00')
plt.ylabel('S(f) m^2/Hz')
plt.xlabel('Frequency (Hz)')
plt.title('Plot of Spectra 4th February 2013 - 00:30')
plt.plot(buoy_spectra[0],buoy_spectra[1], label='Buoy Spectra 00:17:33')
welch_df = create_welch_df(subset.heave/100)
plt, data, psd = calc_welch(welch_df, line_name = 'Dataframe Spectra 23:47-00:17')
plt.legend()
<matplotlib.legend.Legend at 0x7f0c290dab10>
fig = plt.figure(figsize=(20,10), dpi=100)
plt, west_filtered_2013_02_04_00_30, psd = plot_raw_file(path_prefix + 'Bragar_HebMarine2/2013/February/Bragar_HebMarine2}2013-02-04T00h30Z.raw', filter = True, column='west')
plt.title('West Displacement - Peak of Storm February 2013 Bragar buoy')
signal_quality heave north west count 2120 2120.000000 2120.000000 2120.000000 mean 0 0.426415 -0.175000 -0.640094 std 0 174.139625 118.291754 185.031924 min 0 -500.000000 -381.000000 -950.000000 25% 0 -120.250000 -74.250000 -115.000000 50% 0 0.000000 8.000000 -1.000000 75% 0 111.000000 77.000000 120.000000 max 0 779.000000 385.000000 835.000000 Skewness -0.135171532284 Kurtosis 1.02151202418
<matplotlib.text.Text at 0x7f0c292c0210>
fig = plt.figure(figsize=(20,10), dpi=100)
plt.plot(filtered_2013_02_04_00_30.x, filtered_2013_02_04_00_30.y, label = "Dataframe Filtered")
plt.plot(unfiltered_2013_02_04_00_30.x, unfiltered_2013_02_04_00_30.y, label = "Dataframe Unfiltered")
plt.plot(pc_spectra[0], pc_spectra[1], label = "Datawell PC Spectra")
plt.plot(west_filtered_2013_02_04_00_30.x, west_filtered_2013_02_04_00_30.y, label = "Dataframe West Displacement Filtered")
plt.title('Spectra Comparison Bragar February 4th 2014 00:00-00:30')
plt.legend()
<matplotlib.legend.Legend at 0x7f0c27188650>
Why is there a significant difference between the Max PSD for Datawell and the Spectra produced from raw displacements with Welch's method. Perhaps because there are twice as many points ( divisions ) for the Welch's method produced data. It doesn't seem possible to lower the number of frequencies that Welch's method is output to.
from IPython.display import Image
Image(filename='D:\Profiles\le12jm\My Documents\My Pictures\wave_stats_02_04_00_30.png')
The W@ves21 spectra has a far more pronounced peak. Than either the filtered or nonfiltered spectra from Welch's method. This could be to do with the filtering value used on signal_quality but more likely to do with the output bins. A upper filter bound of 1 on signal quality gives a good match for the statistics values in the above table.
Skew and Kurtosis values in wave statistics are heave, north and west are easily calculable in Numpy. The skew and kurtosis in the spt files are per frequency. It is not clear how to get the directional spectra from the displacements. Although the spectra produced from putting the heave into Welch's method gives a pretty good match to the one produced by the SPT. I can't see how you could decompose from the spectral plot to direction because only vertical displacement goes into it. How does the buoy itself/RFBuoy/W@ves21 take care of North and West into spectra, do those directions account for the difference in Peaks seen in the Bragar-02-04-00-30 file plotted above?
fig = plt.figure(figsize=(20,10), dpi=100)
plt, data, psd = plot_raw_file(path_prefix + 'Bragar_HebMarine2/2013/February/Bragar_HebMarine2}2013-02-04T00h30Z.raw', filter = True, column='north')
plt.title('North Displacement - Peak of Storm February 2013 Bragar buoy')
signal_quality heave north west count 2120 2120.000000 2120.000000 2120.000000 mean 0 0.426415 -0.175000 -0.640094 std 0 174.139625 118.291754 185.031924 min 0 -500.000000 -381.000000 -950.000000 25% 0 -120.250000 -74.250000 -115.000000 50% 0 0.000000 8.000000 -1.000000 75% 0 111.000000 77.000000 120.000000 max 0 779.000000 385.000000 835.000000 Skewness -0.172617192558 Kurtosis 0.117161959229
<matplotlib.text.Text at 0x7f0c293d2990>
# Percentage of records returned with an upper filter bound of 1.
2120/2303.0
0.9205384281372123
from IPython.display import Image
Image(filename='D:\Profiles\le12jm\My Documents\My Pictures\Spectra_Summary_00_30_W@ves21.png')
Could the information about Power Spectral Density be obtained straight from the displacements? No, a mean period could be obtained or some form of weighted period relative to wave height, which is essentially what Welch's method produces averaged.
If we want to use pre calculated welch dataframes the function calc_welch needs to be split up, the pre calculated ones are only for months. There needs to be a distinction in the call whether it is a full month or subset.