#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import numpy as np from scipy import stats import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: plt.rcParams['figure.dpi'] = 120 # ## Generate Some Data # # Create an artificial climate signal at mothly resolution, containing different periodic signals. # In[3]: x = np.arange(1820, 1980, 1/12) # cyclic variability annual = np.sin((x + 3/12) * 2 * np.pi) # annual c1 = np.sin((x - 7/12) * 2 * np.pi / 6) # 6-year c2 = np.sin((x + 2/12) * 2 * np.pi / 8) # 8-year # appearance and disappearance of multi-year cycles c1_onoff = stats.norm.pdf(x, 1860, 22) c2_onoff = stats.norm.pdf(x, 1930, 8) # normalise the on/off signals c1_onoff /= c1_onoff.max() c2_onoff /= c2_onoff.max() # add noise err = np.random.normal(0, 0.12, x.size) # background bkg = 0.8 + -3e-3 * (x - 1820) y = ( annual * 0.2 + # annual signal c1 * 0.18 * c1_onoff + # 6-year cycle c2 * 0.23 * c2_onoff + # 7-year cycle err + # error bkg # linear background ) fig, axs = plt.subplots(4, 1, figsize=[8, 8], sharex=True) anx, ccx, errx, cmx = axs anx.plot(x, annual * 0.2) ccx.plot(x, c1 * c1_onoff * 0.18) ccx.plot(x, c2 * c2_onoff * 0.23) errx.plot(x, err + bkg) cmx.plot(x, y) anx.set_ylabel('Annual') ccx.set_ylabel('Multi-Annual') errx.set_ylabel('Backround\n& Error') cmx.set_ylabel('Synthetic Signal') # # Torrence and Compo Wavelet Method # # Wavelet analysis allows the identification of periodic signals across the duration of a signal. It's key advantage over 'normal' fourier spectral analysis is that it changes the size of the data window depending on the frequency it's looking for, so that all frequencies are resolved with similar 'power' in the resulting spectrogram. # We've included [some slides](https://github.com/rses-datascience/DataSurgeries/raw/master/07_Wavelets/TSA%202007-7.pdf), which explain this better. # # - [Description of method](http://paos.colorado.edu/research/wavelets/). # - [Python Implementation of Method (PyCWT)](http://pycwt.readthedocs.io/en/latest/) # - Code below modified from [this tutorial](http://pycwt.readthedocs.io/en/latest/tutorial.html) # # In[4]: import pycwt as wavelet from pycwt.helpers import find # In[5]: # get data t = x # time dat = y # data # replace the above with references to your data to use this code with your data # some data characteristics dt = t[1] - t[0] # delta time (time step - must be the same for all data) t0 = t[0] # start time N = t.size # length of data # de-trend and normalise data p = np.polyfit(t - t0, dat, 1) # fit polynomial dat_notrend = dat - np.polyval(p, t - t0) # subtract polynomial std = dat_notrend.std() # calculate Standard deviation var = std ** 2 # calculate Variance dat_norm = dat_notrend / std # divide by standard deviation to normalized dataset # In[6]: # define wavelet parameters # try using different wavelets, and see what effect it has on the analysis. mother = wavelet.Morlet(6) s0 = 2 * dt # Starting scale, in this case 2 months dj = 1 / 12 # Twelve sub-octaves per octaves J = 7 / dj # Seven powers of two with dj sub-octaves alpha, _, _ = wavelet.ar1(dat) # Lag-1 autocorrelation for red noise # In[7]: # apply wavelet transform wave, scales, freqs, coi, fft, fftfreqs = wavelet.cwt(dat_norm, dt, dj, s0, J, mother) iwave = wavelet.icwt(wave, scales, dt, dj, mother) * std # In[8]: # calculate the normalised wavelet and Fourier spectra and associated scales power = (np.abs(wave)) ** 2 fft_power = np.abs(fft) ** 2 period = 1 / freqs # rectify power spectrum power /= scales[:, None] # In[9]: # calculate wavelet significance signif, fft_theor = wavelet.significance(1.0, dt, scales, 0, alpha, significance_level=0.95, wavelet=mother) sig95 = np.ones([1, N]) * signif[:, None] sig95 = power / sig95 # In[10]: # calculate global wavelet spectrum, and determine significance glbl_power = power.mean(axis=1) dof = N - scales # Correction for padding at edges glbl_signif, tmp = wavelet.significance(var, dt, scales, 1, alpha, significance_level=0.95, dof=dof, wavelet=mother) # In[11]: # calculate 2-8 year power spectrum and significance sel = find((period >= 2) & (period < 8)) Cdelta = mother.cdelta scale_avg = (scales * np.ones((N, 1))).transpose() scale_avg = power / scale_avg # As in Torrence and Compo (1998) equation 24 scale_avg = var * dj * dt / Cdelta * scale_avg[sel, :].sum(axis=0) scale_avg_signif, tmp = wavelet.significance(var, dt, scales, 2, alpha, significance_level=0.95, dof=[scales[sel[0]], scales[sel[-1]]], wavelet=mother) # ## Plot Results # In[12]: title = 'Wavelet Analysis' units = 'zscore' label = 'Sythetic Record' # Prepare the figure figprops = dict(figsize=(11, 8), dpi=72) fig = plt.figure(**figprops) # First sub-plot, the original time series anomaly and inverse wavelet # transform. ax = fig.add_axes([0.1, 0.75, 0.65, 0.2]) ax.plot(t, iwave, '-', linewidth=1, color=[0.5, 0.5, 0.5]) ax.plot(t, dat, 'k', linewidth=1) ax.set_title('a) {}'.format(title)) ax.set_ylabel(r'{} [{}]'.format(label, units)) # Second sub-plot, the normalized wavelet power spectrum and significance # level contour lines and cone of influece hatched area. Note that period # scale is logarithmic. bx = fig.add_axes([0.1, 0.37, 0.65, 0.28], sharex=ax) levels = [0.0625, 0.125, 0.25, 0.5, 1, 2, 4, 8, 16] bx.contourf(t, np.log2(period), np.log2(power), np.log2(levels), extend='both', cmap=plt.cm.Blues_r) extent = [t.min(), t.max(), 0, max(period)] bx.contour(t, np.log2(period), sig95, [-99, 1], colors='k', linewidths=2, extent=extent) bx.fill(np.concatenate([t, t[-1:] + dt, t[-1:] + dt, t[:1] - dt, t[:1] - dt]), np.concatenate([np.log2(coi), [1e-9], np.log2(period[-1:]), np.log2(period[-1:]), [1e-9]]), 'w', alpha=0.4, hatch='x') bx.set_title('b) {} Wavelet Power Spectrum ({})'.format(label, mother.name)) bx.set_ylabel('Period (years)') # Yticks = 2 ** np.arange(np.ceil(np.log2(period.min())), np.ceil(np.log2(period.max()))) bx.set_yticks(np.log2(Yticks)) bx.set_yticklabels(Yticks) # Third sub-plot, the global wavelet and Fourier power spectra and theoretical # noise spectra. Note that period scale is logarithmic. cx = fig.add_axes([0.77, 0.37, 0.2, 0.28], sharey=bx) cx.plot(glbl_signif, np.log2(period), 'k--', zorder=5) cx.plot(var * fft_theor, np.log2(period), '--', color='#cccccc') cx.plot(var * fft_power, np.log2(1./fftfreqs), '-', color='#cccccc', linewidth=1.) cx.plot(var * glbl_power, np.log2(period), 'k-', linewidth=1.5) cx.set_title('c) Global Wavelet Spectrum') cx.set_xlabel(r'Power [({})^2]'.format(units)) cx.set_xlim([0, np.percentile(glbl_power, 90) + var]) cx.set_ylim(np.log2([period.min(), period.max()])) cx.set_yticks(np.log2(Yticks)) cx.set_yticklabels(Yticks) plt.setp(cx.get_yticklabels(), visible=False) # Fourth sub-plot, the scale averaged wavelet spectrum. dx = fig.add_axes([0.1, 0.07, 0.65, 0.2], sharex=ax) dx.axhline(scale_avg_signif, color='k', linestyle='--', linewidth=1.) dx.plot(t, scale_avg, 'k-', linewidth=1.5) dx.set_title('d) {}--{} year scale-averaged power'.format(2, 8)) dx.set_xlabel('Time (year)') dx.set_ylabel(r'Average variance [{}]'.format(units)) ax.set_xlim([t.min(), t.max()]) fig.savefig('wavelets.pdf') # The 6- and 8-year components are clearly visible in the power spectrum, but they are not 'significant'. This is likely because the intensity of the 1-year cycle 'saturates' the analysis. # # We *know* these signals are real, because we generated them, which highlights potential limitations of the analysis. If using this, you'll need to think carefully about what 'significant' means.