#!/usr/bin/env python
# coding: utf-8
# # Palmer LTER Inorganic Analysis
# Antarctica's Western Peninsula is a rapidly changing ecosystem. To quantify these changes and prepare for the Global biogeochemical and ecological costs of these changes, we must look at long terms trends. Here is analysis of 15 years of Inorganic Nutrient data taken from Palmer LTER January cruises. The Cruise follows a regional grid along the Western Antarctica Peninsula (wAP) as follows:
# In[90]:
Image(filename='palmerGrid.jpg')
# The software in this notebook is intended to help standardize data to expedite analysis and high-quality research. Specifically, this iPython notebook transfers the Palmer Inorganic Nutrient dataset into a Pandas dataframe, cleans and standardizes measurements according to a simplified grid, provides annual depth profiles by station, and interpolates station data between and throughout years to get a glimpse of regional inorganic nutrient patterns. CTD data is also used to estimate Mixed Layer Depth (MLD) using a gradient and threshold method. The combined NO2 NO3 is integrated from the surface to the MLD for each station annually to quantify the nutrients removed from the water column due to Phytoplankton and Diatoms. From there, we can employ the stoichiometric ratio of marine biota-- known as the Redfield Ratio, to calculate how much CO2 is taken from the water column from this process called, Net Community Production (NCP).
#
# All files necessary to run this notebook can be found in the github repo, all data was downloaded from UCSD's Datazoo, additional information is available from Palmer LTER.
#
This Notebook is intended to be downloaded and run on your personal machine OR viewed from Jupyter's nbviewer, because of this the indexing links from the table of contents will not work on github!
#
Last edited 01.06.16 by Leon Yin.
#
Acknowledgements
#
I want to thank Hugh Ducklow and Ryan Abernathey for their expert guidance.
# # Table of Contents
# I. Data Prossessing: How can we can compare ship-board measurements?
# Standardizing the Database
# Saving the Clean Database
# II. Biogeochemistry: How do intorganic nutrients vary with depth and breadth?
# Interannual Nutrient Profiles
# Interannual Surface Interpolation Algorithm
# Average Surface Interpolation Algorithm
# Interannual Surface Interpolation NO3 + NO2
# Average Surface Interpolation NO3 + NO2
# III. Physics: Using CTD data to estimate the Mixed Layer Depth.
# Estimating Mixed Layer Depth
# Mixed Layer Depth coverage relative to Nutrient samples
# Interannual Mixed Layer Depth
# IV. Biogeochemistry: Using MLD and Interannual profiles to calculate Net Community Productivity.
# Calculating Net Community Production
# Comparing Redfield Ratios
# Discussion
# In[1]:
import os
import json
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.basemap import Basemap
import seaborn
import colorsys
from scipy.interpolate import griddata
from scipy.integrate import simps
from numpy import trapz
from IPython.display import Image
get_ipython().run_line_magic('matplotlib', 'inline')
# In[2]:
# Global variables
seaborn.set(font_scale=1.5)
plt.rcParams['figure.figsize'] = (15,9)
skip = -999 # skip value in thr dataset
mark = ['o','p','d','v','s'] # markers for scatterplots
surfDepth = 10 #(m) depth of surface
maxDepth = 250
cMap = 'RdBu_r'
outOfGrid = ['Trap','Rothera','B','E','notFound','outGrid']
dsPath = 'data_sets/' # data set directory
# Let's read the grid into Python through a Pandas dataframe, so we can work with it... Moreover, let's create some functions so we can continue to do so.
# In[11]:
def read_grd(fileIn,col):
if(os.path.isfile(fileIn)):
grid = pd.read_csv(fileIn,sep=',',skiprows=1,names=col,na_filter=True)
grid = grid[grid.stationID.isin(outOfGrid)==False]
return grid
else:
print (fileIn,"is either unreadable or DNE.")
raise SystemExit(0)
def draw_wAP(zoom=False,lat=-66.90,lon=-67.05):
if(zoom!=1):
map = Basemap(width=1800000,height=1000000,
resolution='l',projection='stere',\
lat_ts=50,lat_0=lat,lon_0=lon)
elif(zoom==1):
map = Basemap(width=320000,height=180000,
resolution='l',projection='stere',\
lat_ts=50,lat_0=-64.90,lon_0=-66.05)
map.drawparallels(np.arange(-90.,90.,1.),labels=[False,False,False,True],
color='w',dashes=[100,.0001],zorder=0)
map.drawmeridians(np.arange(-180.,181.,1.),labels=[True,False,False,False],
color='w',dashes=[100,.0001],zorder=0)
map.drawcoastlines()
map.fillcontinents()
map.drawmapboundary()
return map
def plt_grd(grid_df):
map = draw_wAP(zoom=False,lat=-65.40,lon=-67.05)
x1 = grid_df['lon'].values
y1 = grid_df['lat'].values
labels = grid_df['stationID']
xS, yS = map(x1, y1)
map.scatter(xS, yS, marker='o',s=26,color='k')
for label, xpt, ypt in zip(labels, xS, yS):
plt.text(xpt-70000, ypt-5000, label,size=8)
plt.title('Palmer LTER Regional Grid')
plt.show()
# In[13]:
# files
gridInfile = os.path.join(dsPath+'Basic Western Antarctic Peninsula Survey Grid.csv')
gridCol = ["studyName","gridLine","gridStation","stationID","lat","lon","gridCode"]
# In[5]:
# first read in the grid standard
grid = read_grd(gridInfile,gridCol)
plt_grd(grid)
# Sometime is not right with the station names, so let's fix it!
# In[14]:
newGrid = read_grd(gridInfile,gridCol)
newGrid.gridStation = newGrid.gridStation.astype(int).apply(lambda x: '{0:0>3}'.format(x))
newGrid.gridLine = newGrid.gridLine.astype(int).apply(lambda x: '{0:0>3}'.format(x))
newGrid['stationID'] = newGrid.gridLine.map(str) +"."+ newGrid.gridStation.map(str)
plt_grd(newGrid)
# Now let's take a look at the cruise measurements from 2000-onwards.
# In[3]:
# CSV file from Datazoo
inOrgInfile = os.path.join(dsPath+'Dissolved Inorganic Nutrients.csv')
# Column formats for inorganic nutrients
inOrgCol = ['studyName','eventNum','cast','bottle','datetime','stationID','gridLine',\
'gridStation','lat','lon','percentIrradiance','depth','PO4','SiO4','NO2','NO3','NH4','Nitrite',\
'notes']
# In[4]:
def read_inOrg(infile,cols):
# Import the files
if(os.path.isfile(infile)):
inOrg = pd.read_csv(infile,skiprows=1,names=cols)
# Munging...
inOrg = inOrg[(inOrg['lon'].notnull()) & (inOrg['lat'].notnull())&
(inOrg['depth']!=skip) & (inOrg['depth'].notnull())&
(inOrg['datetime'].notnull())]
# Time keeping...
inOrg =inOrg[inOrg.datetime>'1999-12-31 00:00:00'] # start date
inOrg.datetime=pd.to_datetime(inOrg.datetime,format='%Y-%m-%d %H:%M:%S',exact=True)
inOrg['year']=pd.DatetimeIndex(inOrg['datetime']).year
inOrg['month']=pd.DatetimeIndex(inOrg['datetime']).month
inOrg.lat=abs(inOrg.lat.map('{:,.5f}'.format).astype(float))*-1 # convert all coords to south
inOrg.lon=abs(inOrg.lon.map('{:,.5f}'.format).astype(float))*-1 # convert all coords to west
inOrg = inOrg.reset_index()
print("The imported database contains",len(inOrg.index),"records.")
return inOrg
else:
print (fileIn1,"is either unreadable or DNE.")
raise SystemExit(0)
inOrg = read_inOrg(inOrgInfile,inOrgCol)
inOrg.head()
# Let's take a peep at the spatial-temporal distribution of measurments in the grid...
# In[9]:
def plt_interannual(df,zoom=False):
# Initialize the plot
map = draw_wAP(zoom)
yearList = np.array(df.year.unique()) # List of years from the LTRE
yearVar = len(yearList) # How long has this been going on?
colorSwatch = cm.YlGnBu(np.linspace(0, 1, yearVar)) # Colors for each year
for i in range(yearVar):
querry = df[df.year==yearList[i]] # Querry the database per year
x1 = querry.lon.values
y1 = querry.lat.values
x, y = map(x1, y1) # Convert the lat and lon to a Basemap-readable format.
map.scatter(x, y, color=colorSwatch[i], marker='o',s=24,label=np.asarray(querry.year)[0])
plt.legend(scatterpoints=1,
loc='lower right',
ncol=1,
fontsize=10)
plt.title('Interannual Distribution of Palmer LTER Inorganic Nutrients')
plt.show()
plt_interannual(inOrg)
# It looks like all stations are lined up with the excpetion of some outside of the station lines... But let's not make any assumptions just yet, and take a closer look.
# In[10]:
plt_interannual(inOrg,zoom=True)
# We see that although these points are taken in the same proximity, their coordinates aren't flush to the correct percision...
#
# In[11]:
print("We also see that there are ",len(inOrg.lat.unique()),"and",len(inOrg.lon.unique()),"unique latitude and longitude coordiantes,")
print("and that the stationID's are not standardized:\n",inOrg.stationID.unique()[:14])
print("We want to see",len(grid.stationID),"coordinate pairs and stationIDs...")
# # Standardizing the Dataset
# Before we can do any anaylsis, we need to wrangle the data into a comparable format...
#
This task can be accomplished by standizing stationIDs and coordiantes that are within reasonable bounrds of idealized station coordaintes. This function compares each gritty coordinate pair with idealized pairs and outputs a Pandas DF contianing corrected coordinates (lat,lon) and stationID.
#
cord2stationID
#
Inputs:
#
pandas dataframe (DF), latitude numPy array, longitue numPy array
#
Outputs:
#
Pandas Dataframe containing standardized lat/lon coordinates and stationID.
#
ex . updatedDF = coor2stationID(df,df.lat,df.lon)
#
Top
# In[12]:
latThresh = 0.0645
lonThresh = 0.0750
def cord2stationID(df):
# Lets standardize the coordinates for SW
latIn = -1*abs(df.lat)
lonIn = -1*abs(df.lon)
# how many points are there?
x=len(latIn)
# List to hold corrected stations.
newStations = []
# Iterate through each lat lon pair in the list
for i in range(x):
# Find standard latitude that is closest to observed.
querry = newGrid[newGrid.lat<=latIn[i]+latThresh]
querry = querry[querry.lat>=latIn[i]-latThresh]
# If DNE...
if querry.empty:
newStations.append({'lat':latIn[i],'lon':lonIn[i],
'stationID':'notFound',
'gridLine':'notFound',
'stationLine':'notFound'})
# IF DE, look for standard longitude closest to observed.
else:
querry = querry[querry.lon<=lonIn[i]+lonThresh]
querry = querry[querry.lon>=lonIn[i]-lonThresh]
qLen = len(querry.index)
# DNE...
if querry.empty:
newStations.append({'lat':latIn[i],'lon':lonIn[i],
'stationID':'notFound',
'gridLine':'notFound',
'stationLine':'notFound'})
# If there is perfect a match
elif (qLen==1):
newStations.append({'lat':np.asarray(querry.lat, dtype=np.float)[0],
'lon':np.asarray(querry.lon, dtype=np.float)[0],
'stationID':np.asarray(querry.stationID, dtype=object)[0],
'gridLine':np.asarray(querry.gridStation, dtype=object)[0],
'stationLine':np.asarray(querry.gridLine, dtype=object)[0]})
# the list has multiple values
else:
qLon = querry.lon.values
qLat = querry.lat.values
qStation = querry.stationID.values
qGridLine = querry.gridLine.values
qStationLine = querry.gridStation.values
minDist = 1e6
# calculate which lat lon combo is closest to station values
for j in range(qLen):
lonDiff = abs(lonIn[i]-qLon[j])
latDiff = abs(latIn[i]-qLat[j])
sumDist = lonDiff+latDiff
if(sumDist
# Now each point along the station lines has standardized coordinates and is separated from non-conventional station points!
Since this clean-up process is memory-intensive, we want to run it once. To save our progress we can save the cleaned up dataframe as a CSV...
#
#
Parameters:
#
df: Pandas dataframe
#
csvout:String (filename)
#
returns:
#
CSV of the Pandas dataframe in the parent directory.
#
ex . convert2CSV(df,'output.csv')
#
Top
# In[15]:
def convert2CSV(df,csvout):
toCSV=pd.DataFrame.to_csv(df,sep=',',line_terminator='\n')
csvout = open(csvout, 'w') # opens the file to write into
csvout.write(toCSV) # writes df to csv...
print("Database transferred to csv...")
csvout.close()
# In[16]:
# Drop the year and month columns.
cleanDF = inOrgClean[['studyName','eventNum','cast','bottle','datetime','stationID','gridLine',
'gridStation','lat','lon','percentIrradiance','depth','PO4','SiO4','NO2',
'NO3','NH4','Nitrite','notes']]
# filenames
inOrgOutfile = os.path.join(dsPath+'inOrganicNutrientsClean.csv')
gridOutfile = os.path.join(dsPath+'Revised Basic Western Antarctic Peninsula Survey Grid.csv')
# Run it!
convert2CSV(cleanDF, inOrgOutfile)
convert2CSV(newGrid,gridOutfile)
# # Interannual Nutrient Profiles
# Now that we've preprocessed the data, we can finally start doing some analysis!
#
Top
# In[5]:
# Import the clean files
inOrgCleanFile = os.path.join(dsPath+'inOrganicNutrientsClean.csv')
inOrg = read_inOrg(inOrgCleanFile,inOrgCol)
# vertical_profile(inOrg,maxDepth,drawMLD,method,byStation,station)
#
Parameters:
#
inOrg: Pandas DataFrame
#
Inorganic nutrients pandas dataframe (DF),
#
maxDepth: Int or Float
#
depth of the profile in meters, can be neg or pos.
#
drawMLD: Boolean (default, False)
#
set to True (after compiling mld_df) to display the mixed layer depth of each the profile.
#
method: String (default,'t_sig_down')
#
Method of MLD estimation. Read more about MLD estimation methods here
#
byStation: Boolean (default, False)
#
set to True to display one station's interannual profiles.
#
station: String (default,'200.220')
#
When byStation=True, searches for requested station.
#
Returns:
#
Matplotlib figure containing vertical nutrient profiles for NO3+NO2,PO4 and SiO4.
#
ex - vertical_profiles(inOrg,200) # for all stations, and no MLD drawn.
#
vertical_profiles(inOrg,200,drawMLD=True,method='g_sig_down',byStation=True,station='300.140') # for station 300.140 with interannual MLD drawn.
# In[83]:
def vertical_profiles(inOrg,maxDepth,drawMLD=False,method='t_sig_down',byStation=False,station='200.220',byYear=True,year=2010):
yearList = np.array(inOrg.year.unique()) # List of years from the LTRE
colorSwatch = cm.YlGnBu(np.linspace(0, 1, len(yearList) )) # Colors for plotting
color = pd.DataFrame(colorSwatch,columns=['hue','lightness','saturation','other'])
color['year'] = yearList
scatMark = 25
# Search each station
if(byStation!=1):
for station in np.sort(pd.unique(inOrg['stationID'].loc[inOrg['stationID'].isin(outOfGrid)==False].values.ravel())):
stationData = inOrg[inOrg.stationID==station] # get all values in specific station for all stationVar
stationTime = stationData.datetime.unique() # all sample dates from the station
# Initiate the plot
fig,ax = plt.subplots(ncols=3,figsize=(14,6),sharey=True)
# label and limit the 3 axis
xlabel = [r'PO$_{4}$ [$\mu$m/L]',r'NO$_{3}$+NO$_{2}$ [$\mu$m/L]',r'SiO$_{4}$ [$\mu$m/L]']
for j in range(3):
ax[j].locator_params(axis = 'x',nbins=5)
ax[j].set_xlabel(xlabel[j])
ax[0].set_ylim([-abs(maxDepth),0])
ax[0].set_ylabel('Water column height [m]')
count = 0
# Plot each year
for jj in range(len(stationTime)):
target = stationData.loc[stationData.datetime==stationTime[jj]] # get only values from selected stationInterannualVar
if(len(target.index)<=3): # make sure there are more than 3 points per time
break
else:
colorYear = (color[['hue','lightness','saturation']][color.year==np.asarray(target.year.unique(), dtype=np.float)[0]]).values
# For each chemical tracer
chem = ['PO4','Nitrite','SiO4']
for k in range(len(chem)):
df = target.loc[(target[chem[k]]!=skip) & (target[chem[k]].notnull())]
x = df[chem[k]].values
y = df['depth'].values*-1
if(x.any()):
labels = np.asarray(target.year, dtype=np.int)[0]
ax[k].scatter(x,y ,marker=mark[jj%5],s=scatMark,color=colorYear,zorder=1,linewidth=.2,label=labels)
ax[k].plot(x,y,color=colorYear[0],zorder=10)
ax[k].legend(scatterpoints=1,loc='lower left',ncol=1,fontsize=10)
if(drawMLD==1):
MLD = mld_df.loc[(mld_df.stationID==stationID[i]) & (mld_df.year==labels)]
if (len(MLD)>1):
ax[k].axhline(y=MLD[method].iloc[0]*-1,linewidth=2,color=colorYear[0],alpha=0.8,zorder = 3)
count +=1
if(count==0):
plt.close()
fig.suptitle(r'Interannual Variability of Inorganic Nutrients at Station '+np.asarray(target.stationID)[0]+\
' ('+str(np.asarray(target.lat)[0])+','+str(np.asarray(target.lon)[0])+')',fontsize=22)
plt.show()
plt.tight_layout
# Or get profile of specified station...
else:
stationData = inOrg[inOrg.stationID==station] # get all values in specific station for all stationVar
stationTime = stationData.datetime.unique() # all sample dates from the station
# Initiate the plot
fig,ax = plt.subplots(ncols=3,figsize=(14,6),sharey=True)
# label and limit the 3 axis
xlabel = [r'PO$_{4}$ [$\mu$m/L]',r'NO$_{3}$+NO$_{2}$ [$\mu$m/L]',r'SiO$_{4}$ [$\mu$m/L]']
for j in range(3):
ax[j].locator_params(axis = 'x',nbins=5)
ax[j].set_xlabel(xlabel[j])
ax[0].set_ylim([-abs(maxDepth),0])
ax[0].set_ylabel('Water column height [m]')
count = 0
# Plot each year
for jj in range(len(stationTime)):
target = stationData.loc[stationData.datetime==stationTime[jj]] # get only values from selected stationInterannualVar
colorYear = (color[['hue','lightness','saturation']][color.year==np.asarray(target.year.unique(), dtype=np.float)[0]]).values
# For each chemical tracer
chem = ['PO4','Nitrite','SiO4']
for k in range(len(chem)):
df = target.loc[(target[chem[k]]!=skip) & (target[chem[k]].notnull())]
x = df[chem[k]].values
y = df['depth'].values*-1
if(x.any()):
labels = np.asarray(target.year, dtype=np.int)[0]
ax[k].scatter(x,y ,marker=mark[jj%5],s=scatMark,color=colorYear,zorder=1,linewidth=.2,label=labels)
ax[k].plot(x,y,color=colorYear[0],zorder=10)
ax[k].legend(scatterpoints=1,loc='lower left',ncol=1,fontsize=10)
if(drawMLD==1):
MLD = mld_df.loc[(mld_df.stationID==station) & (mld_df.year==labels)]
if (len(MLD)>1):
ax[k].axhline(y=MLD[method].iloc[0]*-1,linewidth=2,color=colorYear[0],alpha=0.8,zorder = 3)
count +=1
if(count==0):
plt.close()
fig.suptitle(r'Interannual Variability of Inorganic Nutrients at Station '+np.asarray(target.stationID)[0]+\
' ('+str(np.asarray(target.lat)[0])+','+str(np.asarray(target.lon)[0])+')',fontsize=22)
plt.show()
plt.tight_layout
# With vertical_profiles(), we can inspect how each station varies interanually with depth, or by individual station when byStation=True, and station='stationID' are set as arguments.
# In[89]:
vertical_profiles(inOrg,maxDepth,byStation=True,station='200.100')
# To save space, I only showed one, but we can print them all using verticalprofiles(inOrg,maxDepth) Now we know how each station changes with depth over time, but these depth profiles are not informative as standalone figures. To get more insight let's compare each cruise across the years and averaged among years. We can do so by interpolating between stations.
# # Interannual Surface Interpolation Algorithm
# For now, we're only going to interpolate surface nutrients but this function can also interpoalte MLD and will help us calculate NCP!
#
Top
# In[20]:
def interpolate(df,tracer,MLD=False):
# Querry the database and clean up
tracer_df = df.loc[(df[tracer]!=skip) &
(df[tracer].notnull()) &
(df['stationID'].isin(outOfGrid)==False)]
# tracer specific values like title, depth-dependence, and colorbar labels.
if(tracer=='Nitrite'):
title = 'Surface NO$_{3}$+NO$_{2}$'
tracer_df = tracer_df[tracer_df['depth']<=surfDepth]
cBarLabel = r'$\mu$mol/L'
elif(tracer=='PO4'):
title = 'Surface PO$_{4}$'
tracer_df = tracer_df[tracer_df['depth']<=surfDepth]
cBarLabel = r'$\mu$mol/L'
elif(tracer=='SiO4'):
title = 'Surface SiO$_{4}$'
tracer_df = tracer_df[tracer_df['depth']<=surfDepth]
cBarLabel = r'$\mu$mol/L'
elif(tracer=='Nuit_Deficiency'):
title = 'Net Community Production'
cBarLabel = r'mol C / m$^{2}$'
else:
title = 'Mixed Layer Depth'
tracer_df = tracer_df.loc[(tracer_df[tracer]<200) & (tracer_df[tracer]>0)]
cBarLabel = 'm'
# Interpolate and plot for each year
for year in pd.unique(tracer_df.year.values.ravel()):
querry = tracer_df[tracer_df['year']==year]
# Define the plotable elements
x = querry.lon.values
y = querry.lat.values
z = querry[tracer].values
# If it's not empty let's interpolate!
if(len(x)>5):
# plot data points.
#plt.scatter(x,y,facecolors='none', edgecolors='k',s=26, zorder=3)
if(MLD==True):
querr2 = mld_df[mld_df['year']==year]
x = querr2.lon.values
y = querr2.lat.values
z = querr2['clustered_MLD']
#plt.scatter(x,y, color='orange',s=26,zorder = 2)
title = 'Mixed Layer Depth'
cBarLabel = 'm'
# define grid.
xi = np.linspace(x.min(),x.max(),300)
yi = np.linspace(y.min(),y.max(),300)
# grid the data.
zi = abs(griddata((x, y), z, (xi[None,:], yi[:,None]), method='cubic'))
if(tracer=='Nuit_Deficiency'): # use redfield rations to convert mmol deficiency to carbon in mols
zi = (zi*(106./16.))/1000
# contour the gridded data.
plt.contourf(xi,yi,zi,12,cmap=cMap,zorder=1)
plt.colorbar(orientation='vertical').ax.set_xlabel(cBarLabel) # draw colorbar
plt.xlim(min(x),max(x))
plt.ylim(min(y),max(y))
plt.title('Interpolated '+title+' '+str(np.asarray(querry.year)[0])+' (%d points)' % len(x))
plt.show()
# # NO2+NO3 Interannual Surface Interpolation
# Combined surface NO2+NO3 represents the stock of Nitrogen-based nuitrients available to support Phytoplankton growth. NO2+NO3 samples (hollow points) are used to genreate an interpolation of nuitrient distributions along the wAP each January. Notice any trends?
#
Top
# In[23]:
interpolate(inOrg,'Nitrite')
# # Average Surface Interpolation Algorithm
# To compare interannual trends in one plot, we can average all nuitrient stocks by station and interpolate again.
#
Top
# In[22]:
def interpolate_average(df,tracer):
# Querry the database and clean up
tracer_df = df.loc[(df[tracer]!=skip) & (df[tracer].notnull())& (df['stationID'].isin(outOfGrid)==False)]
# Title for plot
if(tracer=='Nitrite'):
title = 'Surface NO$_{2}$+NO$_{3}$'
tracer_df = tracer_df.loc[tracer_df['depth']<=surfDepth]
cBarLabel = r'$\mu$mol/L'
elif(tracer=='PO4'):
title = 'SurfacePO$_{4}$'
tracer_df = tracer_df.loc[tracer_df['depth']<=surfDepth]
cBarLabel = r'$\mu$mol/L'
elif(tracer=='SiO4'):
title = 'Surface SiO$_{4}$'
tracer_df = tracer_df.loc[tracer_df['depth']<=surfDepth]
cBarLabel = r'$\mu$mol/L'
elif(tracer=='Nuit_Deficiency'):
title = 'Net Community Production'
cBarLabel = r'mol C / m$^{2}$'
else:
title = 'MLD'
tracer_df = tracer_df.loc[(tracer_df[tracer]<200) & (tracer_df[tracer]>0)]
cBarLabel = 'm'
# Average surface values across years
stationList = tracer_df.stationID.unique()
lons = []
lats = []
nuits = []
for stationID in stationList:
querry= tracer_df[tracer_df['stationID']==stationID]
if(len(querry.year.unique())>=5):
lons.append(np.asarray(querry.lon)[0])
lats.append(np.asarray(querry.lat)[0])
nuits.append(abs(np.nanmean(querry[tracer])))
# define grid.
xi = np.linspace(min(lons),max(lons),360)
yi = np.linspace(min(lats),max(lats),360)
# grid the data.
zi = abs(griddata((lons, lats), nuits, (xi[None,:], yi[:,None]), method='cubic'))
if(tracer=='Nuit_Deficiency'): # use redfield rations to convert mmol deficiency to carbon in mols
zi = (zi*(106./16.))/1000
# contour the gridded data.
plt.contourf(xi,yi,zi,15,cmap=cMap)
plt.colorbar(orientation='vertical').ax.set_xlabel(cBarLabel) # draw colorbar
# plot data points.
#plt.scatter(lons,lats,facecolors='none', edgecolors='k',s=26)
plt.xlim(min(lons),max(lons))
plt.ylim(min(lats),max(lats))
plt.title(r'Summertime Averages of '+title+' (%d points)' % len(nuits))
plt.show()
# # NO2+NO3 Interannual Average
#
Top
# In[23]:
interpolate_average(inOrg,'Nitrite')
# We can leverage these combined NO2 NO3 measurements to calculate how much inorganic carbon has been taken out of the water column by Phytoplankton. This Biogeochemical measurement is called Net Community Production (NCP), and can be attained with a help from Physical parameters... namely, Mixed Layer Depth (MLD). The integrated area down to the MLD of combined NO2 NO3 represents the total nutrients taken out of the homogenous water column by Phytoplankton. With this number, we can reverse engineer how much CO2 has been sequestered using a stoichiometric ratio of elements found in Marine biota called the Redfield Ratio.
# # Estimating MLD
# Along with inorganic nutrients, CTD profile are taken on the annual cruise along the wAP. The profile is recorded on the way down (faster) and on the way back up (slower) to the ship. For this reason, MLD estimations are taken from both directions with two methods highlighted in Holte and Talley (2009)-- the Threshold Method by
# de Boyer Montegut et al. (2004) and the
# Gradient Method by
# Dong et al. (2008) using temperature and potential desnity.
#
Top
# In[8]:
# files
mldPath = 'package_650/**/*.std'
mldOutfile = os.path.join(dsPath+'Palmer_MLD.json')
# In[85]:
# values for threshold (t_) and gradient(g_) MLD estimations
t_threshT = 0.2 # celsius
t_threshS = 0.03 # kg/m^3
g_threshT = 0.005 # celsius
g_threshS = 0.0005 # kg/m^3
def find_mld(path):
mld_list = [] # initialize a list for all dictionaries containing MLD estimates.
records = 0
for filename in glob.iglob(path, recursive=True):
records += 1
# First let's read each CTD profile into a pandas Dataframe
lines = [line.split() for line in open(filename,'r',errors="ignore")]
cols = [" ".join(line).lower() for line in lines[1:25] if len(line)<12]
rows = lines[len(cols)+1:]
df = pd.DataFrame(data=np.reshape(rows,(len(rows),len(cols))),columns=cols)
mld = {} # initialize a dictionary to store all the MLD estimates
# add propoer leading zeros to lines
station,line = lines[0][5].split(".")
if("-" not in line):
mld['stationID']= station+"."+str("%03d" % (int(line),))
mld['year'] = '20'+filename.split('/')[1][:2]
else:
mld['stationID']= station+"."+str("%04d" % (int(line),))
mld['year'] = '20'+filename.split('/')[1][:2]
# Boolean switches
sFound = 0
t1Found = 0
t2Found = 0
pFound = 0
dFound = 0
# check each column in the uploaded file for potential desnity, temps, pressure, and depth
for col in cols:
if(sFound==0 and 'sigma' in col):
sigmaCol = col
sFound = 1
elif(t1Found==0 and 'temperature' in col and 'potential' not in col):
tempCol = col
t1Found = 1
elif(t2Found==0 and 'temperature' in col and 'potential' not in col):
temp2Col = col
t2Found =1
elif('pressure' in col and pFound==0):
presCol = col
pFound = 1
elif('depth' in col and dFound==0):
depthCol = col
dFound = 1
# rename the columns
df = df.rename(columns={sigmaCol: 'sigma',
tempCol: 'temp1',
temp2Col: 'temp2',
presCol : 'pres',
depthCol : 'depth'})
for col in ['pres','sigma','temp1','temp2','depth']:
df[col] = pd.to_numeric(df[col], errors='coerce')
#The measurement closest to 10 dbar is used as the reference value, now enter Holte and Talley 2009.
ref = df.loc[df['pres']==5].index
if(len(ref)==2):
df[ref[0]:ref[1]]
bottomUp = True
elif(len(ref)==1):
if(ref[0]<(len(df)/2)):
df[ref[0]:]
bottomUp = False
else:
df[:ref[0]]
bottomUp = True
# Sigma Threshold
for isopyc in df['sigma']:
if (abs(isopyc-np.asarray(df['sigma'].head(1))[0]) > t_threshS):
mld['t_sig_down']= np.asarray(df['depth'].loc[df['sigma']==isopyc].values)[0]
break
if(bottomUp==True):
for isopyc in reversed(df['sigma']):
if (abs(isopyc-np.asarray(df['sigma'].tail(1))[0]) > t_threshS):
mld['t_sig_up']= np.asarray(df['depth'].loc[df['sigma']==isopyc].values)[0]
break
# Temp Threshold 1
for isotherm in df['temp1']:
if (abs(isotherm-np.asarray(df['temp1'].head(1))[0]) > t_threshT):
mld['t_t1_down']= np.asarray(df['depth'].loc[df['temp1']==isotherm].values)[0]
break
if(bottomUp==True):
for isotherm in reversed(df['temp1']):
if (abs(isotherm-np.asarray(df['temp1'].tail(1))[0]) > t_threshT):
mld['t_t1_up']= np.asarray(df['depth'].loc[df['temp1']==isotherm].values)[0]
break
# Calculate the finite difference slope
sigSlope = np.diff(df['sigma'])/np.diff(df['pres'])
t1Slope = np.diff(df['temp1'])/np.diff(df['pres'])
t2Slope = np.diff(df['temp2'])/np.diff(df['pres'])
ms = len(t1Slope)-1
for i in range(ms):
if(abs(t1Slope[i])>g_threshT):
mld['g_t1_down']=df['depth'].iloc[i+1]
break
if(bottomUp==True):
for i in reversed(range(ms)):
if(abs(t1Slope[i])>g_threshT):
mld['g_t1_up']=df['depth'].iloc[i+1]
break
for i in range(ms):
if(abs(sigSlope[i])>g_threshS):
mld['g_sig_down']=df['depth'].iloc[i+1]
break
if(bottomUp==True):
for i in reversed(range(ms)):
if(abs(sigSlope[i])>g_threshS):
mld['g_sig_up']=df['depth'].iloc[i+1]
break
mld_list.append(mld)
# Save the MLD estimates
with open(mldOutfile, 'w') as file:
json.dump(mld_list, file, indent=4)
print(records,'CTD records scraped and dumped')
# run it!
find_mld(mldPath)
# ## Choosing MLD
# A method used by Holte and Talley (2009) is cluster analysis. For this I build off a runction from Raymond Hettinger, to search through each station's MLD estimates and cluster values based on a scaling gap. The largest cluster is then averaged and returned.
#
Top
# In[16]:
# Merge the MLD estimates with the wAP grid for plot-ability.
mld_df = pd.merge(left=pd.read_json(mldOutfile),right=newGrid,on='stationID',how='inner')
# In[17]:
# Cluster based MLD estimation.
def cluster(data, maxgap):
data = data[~np.isnan(data)]
if(data.any()):
data.sort()
groups = [[data[0]]]
for x in data[1:]:
if abs(x - groups[-1][-1]) <= (maxgap+(x/10)):
groups[-1].append(x)
else:
groups.append([x])
# check for largest cluster
maxClusterSize = 0
for group in groups:
if(len(group)>maxClusterSize):
maxClusterSize = len(group)
meanCluster = np.mean(group)
return meanCluster
# In[18]:
mld_df['clustered_MLD']=[cluster(row, maxgap=4) for row in mld_df[['g_sig_up',
'g_t1_up',
't_sig_up',
't_t1_up',
'g_sig_down',
'g_t1_down',
't_sig_down',
't_t1_down']].values]
# # MLD Coverage Relative to Nutrient Samples.
# How do we know if our MLD estimates (orange) have enough coverage to calculate NCP for for each set of annual inorganic nutrient measurements (hollow)? For hollow points-- where we lack CTD data, we will use interpolated MLD (filled contour) to calculate the NCP. For this reason, how MLD is estimated and interpolated can make or break our restults.
#
Top
# In[21]:
interpolate(df=inOrg,tracer='Nitrite',MLD=True)
# ## Interannual MLD
# The interpolate() function can be used for other MLD estimation methods, as well as for interannual averages.
#
Top
# In[30]:
interpolate_average(mld_df,'clustered_MLD')
# ## Calculating Net Community Production (NCP)
# With a MLD, we can calculate how much nutrients are removed from the water column by integrating the combined NO2 NO33 along the MLD. To account for the resolution of the MLD, we can interpolate the NO2 NO33 concentration to the desired MLD.
#
Top
# In[31]:
# Files
ncpOutfileNOX = os.path.join(dsPath+'NCP.json')
ncpOutfileSiO4 = os.path.join(dsPath+'NCP_SiO4.json')
ncpOutfilePO4 = os.path.join(dsPath+'NCP_PO4.json')
# In[32]:
# seach interpolated MLD dictionary--parameterized in compile_NCP(), for values closest to missing station.
def station_MLD(stationID,zi,df,xi_coords,yi_coords):
mld = zi[xi_coords[round(float(df.loc[df.stationID==stationID].lon.values[0]),1)],
yi_coords[round(float(df.loc[df.stationID==stationID].lat.values[0]),1)]]
return(mld.mean())
# In[33]:
# interpolated
def get_NCP(df,station,tracer,mld):
depSlice = df.loc[df['stationID']==station]
if not(depSlice.empty):
mld_tracer = np.interp(mld,depSlice['depth'], depSlice[tracer])
# slice the profile after using the profile beyong the MLD to interpolate.
depSlice = depSlice.loc[depSlice['depth']<=mld]
nuit_profile = np.append(depSlice[tracer].values,mld_tracer)
mld_depth = np.append(depSlice['depth'].values,mld)
return((mld*mld_tracer) - simps(nuit_profile,mld_depth)) # integrate from depth to surface using profile.
else:
return None
# In[34]:
ncpOutfile = os.path.join(dsPath+'NCP.json')
def compile_NCP(method='t_sig_down',tracer='Nitrite',outfile=ncpOutfile,warning=False):
years = [2003,2004,2005,2006,2007,2008,2009,2011,2012,2013,2014]
NCP_list = []
for year in years:
if(warning==True):
print("***",year,"***")
# Querry the databases and clean up
querryMLD = mld_df.loc[(mld_df[method]!=skip) &
(mld_df[method].notnull()) &
(mld_df['stationID'].isin(outOfGrid)==False)&
(mld_df['year']==year)]
querryInOrg = inOrg.loc[(inOrg[tracer]!=skip) &
(inOrg[tracer].notnull()) &
(inOrg['stationID'].isin(outOfGrid)==False)&
(inOrg['year']==year)]
# Grid and interpolate MLD.
x = np.round(querryMLD.lon.values,4)
y = np.round(querryMLD.lat.values,4)
z = querryMLD[method].values
# If it's not empty let's interpolate!
if(len(x>4)):
# define grid.
xi = np.linspace(x.min(),x.max(),360)
yi = np.linspace(y.min(),y.max(),360)
# grid the data.
zi = abs(griddata((x, y), z, (xi[None,:], yi[:,None]), method='cubic'))
# document the linspace coord pairs
xi_coords = {round(float(value),1): index for index, value in enumerate(xi)}
yi_coords = {round(float(value),1): index for index, value in enumerate(yi)}
# Go through each station measurement of inorganic nuitrients
for station in pd.unique(querryInOrg['stationID'].values.ravel()):
# check if the station and year exist in the MLD data
mld_S = querryMLD.loc[querryMLD['stationID']==station]
# if not, check interp...
if(mld_S.empty):
try:
if not(pd.isnull(station_MLD(station,zi,querryInOrg,xi_coords,yi_coords))):
NCP_list.append({'year': year,
'stationID': station,
'MLD' : station_MLD(station,zi,querryInOrg,xi_coords,yi_coords),
'interp':'True',
'Nuit_Deficiency': get_NCP(querryInOrg,station,tracer,station_MLD(station,zi,querryInOrg,xi_coords,yi_coords))})
except (KeyError,IndexError) as e:
if(warning==True):
print(station,"index not found")
# TODO: reference annual value for index error?? reference nearest enighbor for key error?
else:
NCP_list.append({'year': year,
'stationID': station,
'MLD': mld_S[method].mean(),
'interp': 'False',
'Nuit_Deficiency': get_NCP(querryInOrg,station,tracer,np.nanmean(mld_S[method]))})
with open(outfile, 'w') as file:
json.dump(NCP_list, file, indent=4)
compile_NCP(method='clustered_MLD',tracer='Nitrite',outfile=ncpOutfile)
# read the json file into a pandas DF, read as text to avoid nans. Chained solution to this Stackoverflow answer.
# In[35]:
NOX_df = pd.DataFrame(json.loads(open(ncpOutfile,'r').read()))
NOX_df = pd.merge(left=NOX_df,right=newGrid[['stationID','lat','lon']],on='stationID',how='inner')
NOX_df.head()
# # Comparing with other nutrients
# Are these numbers consistent with the Redfield Ratio?
C:N:P = 106:16:1 and for Diatoms C:Si:N:P = 106:15:16:1
#
Top
# In[36]:
compile_NCP(method='clustered_MLD',tracer='SiO4',outfile=ncpOutfileSiO4)
compile_NCP(method='clustered_MLD',tracer='PO4',outfile=ncpOutfilePO4)
SiO4_df = pd.DataFrame(json.loads(open(ncpOutfileSiO4,'r').read()))
PO4_df = pd.DataFrame(json.loads(open(ncpOutfilePO4,'r').read()))
# In[37]:
N_P = NOX_df['Nuit_Deficiency']/PO4_df['Nuit_Deficiency'].loc[PO4_df['Nuit_Deficiency']>.1]
N_P = N_P.replace([np.inf, -np.inf,0], np.nan).dropna()
N_P = N_P.where((N_P<100)&(N_P>-30))
N_P.plot(kind='kde',color='k', alpha=0.5, xlim=(-30,100),title='Density Distribution of N:P')
print("N:P mean",round(N_P.mean(),4),"Vs",16.00,"Redfield Ratio")
# In[38]:
N_Si = NOX_df['Nuit_Deficiency']/SiO4_df['Nuit_Deficiency']
N_Si = N_Si.replace([np.inf, -np.inf,0], np.nan).dropna()
N_Si.plot(kind='kde',color='k', alpha=0.5, xlim=(-10,20),title='Density Distribution of N:Si')
print("N:Si mean",round(N_Si.mean(),4),"Vs",15/16,"Redfield Ratio")
# Because the Redfield Ratios are consistent, we can apply the C:N ratio (106:16) of Marine Biomass to calculate how much carbon was removed from the water column.
# In[39]:
interpolate(df=NOX_df,tracer='Nuit_Deficiency')
# In[40]:
interpolate_average(NOX_df,'Nuit_Deficiency')
# # Discussion
# Pockets of extreme productivity are spaced sporadically (1-3x) between otherwise non-productive waters along the wAP. An inverse pattern is faint in surface interpolations of NO2 + NO3. However, the regions of high net community production do not mesh with those of deplete surface NO2 + NO3, suggesting NCP in the wAP is driven by other (physical, biological or geographic) factors, rather than nutrient stocks alone. For this reason we turn to the physical method(s) used during Mixed Layer Depth (MLD) estimation and interpolation.
#
# Estimation algorithms can improve by accounting for for Sea ice thermodynamics and water column structure. Starting from physics, we get closer to parsing out how the wAP's ecosystem and carbon sink will change as global temperatures rise. Unfortunately missing CTD values leave holes in the physics where interpolation algorithims to fall short at boundary stations (lines 000 and 600). To compensate for this, interannualy averaged MLD could be used as a baseline, and other high resolution datasets such as Argo and the World Ocean atlas could be used to cross reference, validate, and interpolate these gaps.
#
# Physics aside, Redfield Ratios hold up remarkably well-- with SiO4 suggesting January community structures are dominated by Diatoms.
#
# Datasets ouside of CTD and Inorganic nutrients have yet to be incorporated into calculating NCP. Triple Oxygen Isotopes, and inhert Argon would be interesting avenues to explore. Freshwater discharge is also another feature that may help explain mixed layer dynamics as well as potential Iron fertilization.
#
Top
# In[41]:
# This is an extra function that I decided to leave in for curious GEOSPATIALLY-inclined folks!
# Map-based interpolate function, not as pretty as interpolate... but better context
def interpolate_map(df=inOrg,tracer = 'Nitrite',MLD = False):
# Querry the database and clean up
tracer_df = df.loc[(df[tracer]!=skip) &
(df[tracer].notnull()) &
(df['stationID'].isin(outOfGrid)==False)]
# choose a title and see if the tracer analysis depth-dependent.
if(tracer=='Nitrite'):
title = 'Surface NO$_{3}$+NO$_{2}$'
tracer_df = tracer_df[tracer_df['depth']<=surfDepth]
elif(tracer=='PO4'):
title = 'Surface PO$_{4}$'
tracer_df = tracer_df[tracer_df['depth']<=surfDepth]
elif(tracer=='SiO4'):
title = 'Surface SiO$_{4}$'
tracer_df = tracer_df[tracer_df['depth']<=surfDepth]
else:
title = 'Mixed Layer Depth'
tracer_df = tracer_df.loc[(tracer_df[tracer]<200) & (tracer_df[tracer]>0)]
# Interpolate and plot for each year
for year in pd.unique(tracer_df.year.values.ravel()):
map = Basemap(width=1000000,height=640000,
resolution='l',projection='stere',\
lat_ts=50,lat_0=-66.40,lon_0=-69.05)
map.drawparallels(np.arange(-90.,90.,1.),labels=[False,False,False,True],
color='w',dashes=[100,.0001],zorder=0)
map.drawmeridians(np.arange(-180.,181.,1.),labels=[True,False,False,False],
color='w',dashes=[100,.0001],zorder=0)
map.drawcoastlines()
map.fillcontinents()
map.drawmapboundary()
querry = tracer_df[tracer_df['year']==year]
# Define the plotable elements
x = querry.lon.values
y = querry.lat.values
z = querry[tracer].values
labels = querry.stationID
xM,yM = map(x, y)
# If it's not empty let's interpolate!
if(len(x)>10):
# plot data points.
map.scatter(xM,yM,facecolors='none', edgecolors='k',s=26, zorder=2)
gridBounds = ['000.220','100.220','200.220','300.220','400.220','500.220','600.220']
gridLabels = newGrid[newGrid['stationID'].isin(gridBounds)]
x3 = gridLabels.lon.values
y3 = gridLabels.lat.values
xS, yS = map(x3, y3)
labels = gridLabels.gridLine.values.astype(int)
for label, xpt, ypt in zip(labels, xS, yS):
plt.text(xpt-60000, ypt+12000, label,size=14)
if(MLD==True):
querr2 = mld_df[mld_df['year']==year]
x1 = querr2.lon.values
y1 = querr2.lat.values
xM1,yM1 = map(x1,y1)
map.scatter(xM1,yM1, color='orange',s=26,zorder = 1)
else:
# define grid.
xi = np.linspace(xM.min(),xM.max(),300)
yi = np.linspace(yM.min(),yM.max(),300)
# grid the data.
zi = abs(griddata((xM, yM), z, (xi[None,:], yi[:,None]), method='cubic'))
# contour the gridded data.
plt.contourf(xi,yi,zi,12,cmap=cMap)
plt.colorbar() # draw colorbar
plt.title('Interpolated '+title+' '+str(np.asarray(querry.year)[0])+' (%d points)' % len(x))
plt.show()