#!/usr/bin/env python # coding: utf-8 # # Palmer LTER Inorganic Analysis # Antarctica's Western Peninsula is a rapidly changing ecosystem. To quantify these changes and prepare for the Global biogeochemical and ecological costs of these changes, we must look at long terms trends. Here is analysis of 15 years of Inorganic Nutrient data taken from Palmer LTER January cruises. The Cruise follows a regional grid along the Western Antarctica Peninsula (wAP) as follows: # In[90]: Image(filename='palmerGrid.jpg') # The software in this notebook is intended to help standardize data to expedite analysis and high-quality research. Specifically, this iPython notebook transfers the Palmer Inorganic Nutrient dataset into a Pandas dataframe, cleans and standardizes measurements according to a simplified grid, provides annual depth profiles by station, and interpolates station data between and throughout years to get a glimpse of regional inorganic nutrient patterns. CTD data is also used to estimate Mixed Layer Depth (MLD) using a gradient and threshold method. The combined NO2 NO3 is integrated from the surface to the MLD for each station annually to quantify the nutrients removed from the water column due to Phytoplankton and Diatoms. From there, we can employ the stoichiometric ratio of marine biota-- known as the Redfield Ratio, to calculate how much CO2 is taken from the water column from this process called, Net Community Production (NCP). #
# All files necessary to run this notebook can be found in the github repo, all data was downloaded from UCSD's Datazoo, additional information is available from Palmer LTER. #

This Notebook is intended to be downloaded and run on your personal machine OR viewed from Jupyter's nbviewer, because of this the indexing links from the table of contents will not work on github! #

Last edited 01.06.16 by Leon Yin. #

Acknowledgements #
I want to thank Hugh Ducklow and Ryan Abernathey for their expert guidance. # # Table of Contents #
I. Data Prossessing: How can we can compare ship-board measurements?
#
Standardizing the Database
# Saving the Clean Database
#
II. Biogeochemistry: How do intorganic nutrients vary with depth and breadth?
# Interannual Nutrient Profiles
# Interannual Surface Interpolation Algorithm
# Average Surface Interpolation Algorithm
# Interannual Surface Interpolation NO3 + NO2
# Average Surface Interpolation NO3 + NO2
#
III. Physics: Using CTD data to estimate the Mixed Layer Depth.
# Estimating Mixed Layer Depth
# Mixed Layer Depth coverage relative to Nutrient samples
# Interannual Mixed Layer Depth
#
IV. Biogeochemistry: Using MLD and Interannual profiles to calculate Net Community Productivity.
# Calculating Net Community Production
# Comparing Redfield Ratios
# Discussion
# In[1]: import os import json import glob import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib.cm as cm from mpl_toolkits.basemap import Basemap import seaborn import colorsys from scipy.interpolate import griddata from scipy.integrate import simps from numpy import trapz from IPython.display import Image get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: # Global variables seaborn.set(font_scale=1.5) plt.rcParams['figure.figsize'] = (15,9) skip = -999 # skip value in thr dataset mark = ['o','p','d','v','s'] # markers for scatterplots surfDepth = 10 #(m) depth of surface maxDepth = 250 cMap = 'RdBu_r' outOfGrid = ['Trap','Rothera','B','E','notFound','outGrid'] dsPath = 'data_sets/' # data set directory # Let's read the grid into Python through a Pandas dataframe, so we can work with it... Moreover, let's create some functions so we can continue to do so. # In[11]: def read_grd(fileIn,col): if(os.path.isfile(fileIn)): grid = pd.read_csv(fileIn,sep=',',skiprows=1,names=col,na_filter=True) grid = grid[grid.stationID.isin(outOfGrid)==False] return grid else: print (fileIn,"is either unreadable or DNE.") raise SystemExit(0) def draw_wAP(zoom=False,lat=-66.90,lon=-67.05): if(zoom!=1): map = Basemap(width=1800000,height=1000000, resolution='l',projection='stere',\ lat_ts=50,lat_0=lat,lon_0=lon) elif(zoom==1): map = Basemap(width=320000,height=180000, resolution='l',projection='stere',\ lat_ts=50,lat_0=-64.90,lon_0=-66.05) map.drawparallels(np.arange(-90.,90.,1.),labels=[False,False,False,True], color='w',dashes=[100,.0001],zorder=0) map.drawmeridians(np.arange(-180.,181.,1.),labels=[True,False,False,False], color='w',dashes=[100,.0001],zorder=0) map.drawcoastlines() map.fillcontinents() map.drawmapboundary() return map def plt_grd(grid_df): map = draw_wAP(zoom=False,lat=-65.40,lon=-67.05) x1 = grid_df['lon'].values y1 = grid_df['lat'].values labels = grid_df['stationID'] xS, yS = map(x1, y1) map.scatter(xS, yS, marker='o',s=26,color='k') for label, xpt, ypt in zip(labels, xS, yS): plt.text(xpt-70000, ypt-5000, label,size=8) plt.title('Palmer LTER Regional Grid') plt.show() # In[13]: # files gridInfile = os.path.join(dsPath+'Basic Western Antarctic Peninsula Survey Grid.csv') gridCol = ["studyName","gridLine","gridStation","stationID","lat","lon","gridCode"] # In[5]: # first read in the grid standard grid = read_grd(gridInfile,gridCol) plt_grd(grid) # Sometime is not right with the station names, so let's fix it! # In[14]: newGrid = read_grd(gridInfile,gridCol) newGrid.gridStation = newGrid.gridStation.astype(int).apply(lambda x: '{0:0>3}'.format(x)) newGrid.gridLine = newGrid.gridLine.astype(int).apply(lambda x: '{0:0>3}'.format(x)) newGrid['stationID'] = newGrid.gridLine.map(str) +"."+ newGrid.gridStation.map(str) plt_grd(newGrid) # Now let's take a look at the cruise measurements from 2000-onwards. # In[3]: # CSV file from Datazoo inOrgInfile = os.path.join(dsPath+'Dissolved Inorganic Nutrients.csv') # Column formats for inorganic nutrients inOrgCol = ['studyName','eventNum','cast','bottle','datetime','stationID','gridLine',\ 'gridStation','lat','lon','percentIrradiance','depth','PO4','SiO4','NO2','NO3','NH4','Nitrite',\ 'notes'] # In[4]: def read_inOrg(infile,cols): # Import the files if(os.path.isfile(infile)): inOrg = pd.read_csv(infile,skiprows=1,names=cols) # Munging... inOrg = inOrg[(inOrg['lon'].notnull()) & (inOrg['lat'].notnull())& (inOrg['depth']!=skip) & (inOrg['depth'].notnull())& (inOrg['datetime'].notnull())] # Time keeping... inOrg =inOrg[inOrg.datetime>'1999-12-31 00:00:00'] # start date inOrg.datetime=pd.to_datetime(inOrg.datetime,format='%Y-%m-%d %H:%M:%S',exact=True) inOrg['year']=pd.DatetimeIndex(inOrg['datetime']).year inOrg['month']=pd.DatetimeIndex(inOrg['datetime']).month inOrg.lat=abs(inOrg.lat.map('{:,.5f}'.format).astype(float))*-1 # convert all coords to south inOrg.lon=abs(inOrg.lon.map('{:,.5f}'.format).astype(float))*-1 # convert all coords to west inOrg = inOrg.reset_index() print("The imported database contains",len(inOrg.index),"records.") return inOrg else: print (fileIn1,"is either unreadable or DNE.") raise SystemExit(0) inOrg = read_inOrg(inOrgInfile,inOrgCol) inOrg.head() # Let's take a peep at the spatial-temporal distribution of measurments in the grid... # In[9]: def plt_interannual(df,zoom=False): # Initialize the plot map = draw_wAP(zoom) yearList = np.array(df.year.unique()) # List of years from the LTRE yearVar = len(yearList) # How long has this been going on? colorSwatch = cm.YlGnBu(np.linspace(0, 1, yearVar)) # Colors for each year for i in range(yearVar): querry = df[df.year==yearList[i]] # Querry the database per year x1 = querry.lon.values y1 = querry.lat.values x, y = map(x1, y1) # Convert the lat and lon to a Basemap-readable format. map.scatter(x, y, color=colorSwatch[i], marker='o',s=24,label=np.asarray(querry.year)[0]) plt.legend(scatterpoints=1, loc='lower right', ncol=1, fontsize=10) plt.title('Interannual Distribution of Palmer LTER Inorganic Nutrients') plt.show() plt_interannual(inOrg) # It looks like all stations are lined up with the excpetion of some outside of the station lines... But let's not make any assumptions just yet, and take a closer look. # In[10]: plt_interannual(inOrg,zoom=True) # We see that although these points are taken in the same proximity, their coordinates aren't flush to the correct percision... # # In[11]: print("We also see that there are ",len(inOrg.lat.unique()),"and",len(inOrg.lon.unique()),"unique latitude and longitude coordiantes,") print("and that the stationID's are not standardized:\n",inOrg.stationID.unique()[:14]) print("We want to see",len(grid.stationID),"coordinate pairs and stationIDs...") # # Standardizing the Dataset # Before we can do any anaylsis, we need to wrangle the data into a comparable format... #
This task can be accomplished by standizing stationIDs and coordiantes that are within reasonable bounrds of idealized station coordaintes. This function compares each gritty coordinate pair with idealized pairs and outputs a Pandas DF contianing corrected coordinates (lat,lon) and stationID. #

cord2stationID #
Inputs: #
pandas dataframe (DF), latitude numPy array, longitue numPy array #
Outputs: #
Pandas Dataframe containing standardized lat/lon coordinates and stationID. #
ex . updatedDF = coor2stationID(df,df.lat,df.lon) #
Top # In[12]: latThresh = 0.0645 lonThresh = 0.0750 def cord2stationID(df): # Lets standardize the coordinates for SW latIn = -1*abs(df.lat) lonIn = -1*abs(df.lon) # how many points are there? x=len(latIn) # List to hold corrected stations. newStations = [] # Iterate through each lat lon pair in the list for i in range(x): # Find standard latitude that is closest to observed. querry = newGrid[newGrid.lat<=latIn[i]+latThresh] querry = querry[querry.lat>=latIn[i]-latThresh] # If DNE... if querry.empty: newStations.append({'lat':latIn[i],'lon':lonIn[i], 'stationID':'notFound', 'gridLine':'notFound', 'stationLine':'notFound'}) # IF DE, look for standard longitude closest to observed. else: querry = querry[querry.lon<=lonIn[i]+lonThresh] querry = querry[querry.lon>=lonIn[i]-lonThresh] qLen = len(querry.index) # DNE... if querry.empty: newStations.append({'lat':latIn[i],'lon':lonIn[i], 'stationID':'notFound', 'gridLine':'notFound', 'stationLine':'notFound'}) # If there is perfect a match elif (qLen==1): newStations.append({'lat':np.asarray(querry.lat, dtype=np.float)[0], 'lon':np.asarray(querry.lon, dtype=np.float)[0], 'stationID':np.asarray(querry.stationID, dtype=object)[0], 'gridLine':np.asarray(querry.gridStation, dtype=object)[0], 'stationLine':np.asarray(querry.gridLine, dtype=object)[0]}) # the list has multiple values else: qLon = querry.lon.values qLat = querry.lat.values qStation = querry.stationID.values qGridLine = querry.gridLine.values qStationLine = querry.gridStation.values minDist = 1e6 # calculate which lat lon combo is closest to station values for j in range(qLen): lonDiff = abs(lonIn[i]-qLon[j]) latDiff = abs(latIn[i]-qLat[j]) sumDist = lonDiff+latDiff if(sumDist # Now each point along the station lines has standardized coordinates and is separated from non-conventional station points!
Since this clean-up process is memory-intensive, we want to run it once. To save our progress we can save the cleaned up dataframe as a CSV... #

convert2CSV(df,csvout)
#
Parameters: #
df: Pandas dataframe #
csvout:String (filename) #
returns: #
CSV of the Pandas dataframe in the parent directory. #
ex . convert2CSV(df,'output.csv') #
Top # In[15]: def convert2CSV(df,csvout): toCSV=pd.DataFrame.to_csv(df,sep=',',line_terminator='\n') csvout = open(csvout, 'w') # opens the file to write into csvout.write(toCSV) # writes df to csv... print("Database transferred to csv...") csvout.close() # In[16]: # Drop the year and month columns. cleanDF = inOrgClean[['studyName','eventNum','cast','bottle','datetime','stationID','gridLine', 'gridStation','lat','lon','percentIrradiance','depth','PO4','SiO4','NO2', 'NO3','NH4','Nitrite','notes']] # filenames inOrgOutfile = os.path.join(dsPath+'inOrganicNutrientsClean.csv') gridOutfile = os.path.join(dsPath+'Revised Basic Western Antarctic Peninsula Survey Grid.csv') # Run it! convert2CSV(cleanDF, inOrgOutfile) convert2CSV(newGrid,gridOutfile) # # Interannual Nutrient Profiles # Now that we've preprocessed the data, we can finally start doing some analysis! #
Top # In[5]: # Import the clean files inOrgCleanFile = os.path.join(dsPath+'inOrganicNutrientsClean.csv') inOrg = read_inOrg(inOrgCleanFile,inOrgCol) #
vertical_profile(inOrg,maxDepth,drawMLD,method,byStation,station)
#
Parameters: #
inOrg: Pandas DataFrame #
 Inorganic nutrients pandas dataframe (DF), #
maxDepth: Int or Float #
 depth of the profile in meters, can be neg or pos. #
drawMLD: Boolean (default, False) #
 set to True (after compiling mld_df) to display the mixed layer depth of each the profile. #
method: String (default,'t_sig_down') #
 Method of MLD estimation. Read more about MLD estimation methods here #
byStation: Boolean (default, False) #
 set to True to display one station's interannual profiles. #
station: String (default,'200.220') #
When byStation=True, searches for requested station. #
Returns: #
Matplotlib figure containing vertical nutrient profiles for NO3+NO2,PO4 and SiO4. #

ex - vertical_profiles(inOrg,200) # for all stations, and no MLD drawn. #
vertical_profiles(inOrg,200,drawMLD=True,method='g_sig_down',byStation=True,station='300.140') # for station 300.140 with interannual MLD drawn. # In[83]: def vertical_profiles(inOrg,maxDepth,drawMLD=False,method='t_sig_down',byStation=False,station='200.220',byYear=True,year=2010): yearList = np.array(inOrg.year.unique()) # List of years from the LTRE colorSwatch = cm.YlGnBu(np.linspace(0, 1, len(yearList) )) # Colors for plotting color = pd.DataFrame(colorSwatch,columns=['hue','lightness','saturation','other']) color['year'] = yearList scatMark = 25 # Search each station if(byStation!=1): for station in np.sort(pd.unique(inOrg['stationID'].loc[inOrg['stationID'].isin(outOfGrid)==False].values.ravel())): stationData = inOrg[inOrg.stationID==station] # get all values in specific station for all stationVar stationTime = stationData.datetime.unique() # all sample dates from the station # Initiate the plot fig,ax = plt.subplots(ncols=3,figsize=(14,6),sharey=True) # label and limit the 3 axis xlabel = [r'PO$_{4}$ [$\mu$m/L]',r'NO$_{3}$+NO$_{2}$ [$\mu$m/L]',r'SiO$_{4}$ [$\mu$m/L]'] for j in range(3): ax[j].locator_params(axis = 'x',nbins=5) ax[j].set_xlabel(xlabel[j]) ax[0].set_ylim([-abs(maxDepth),0]) ax[0].set_ylabel('Water column height [m]') count = 0 # Plot each year for jj in range(len(stationTime)): target = stationData.loc[stationData.datetime==stationTime[jj]] # get only values from selected stationInterannualVar if(len(target.index)<=3): # make sure there are more than 3 points per time break else: colorYear = (color[['hue','lightness','saturation']][color.year==np.asarray(target.year.unique(), dtype=np.float)[0]]).values # For each chemical tracer chem = ['PO4','Nitrite','SiO4'] for k in range(len(chem)): df = target.loc[(target[chem[k]]!=skip) & (target[chem[k]].notnull())] x = df[chem[k]].values y = df['depth'].values*-1 if(x.any()): labels = np.asarray(target.year, dtype=np.int)[0] ax[k].scatter(x,y ,marker=mark[jj%5],s=scatMark,color=colorYear,zorder=1,linewidth=.2,label=labels) ax[k].plot(x,y,color=colorYear[0],zorder=10) ax[k].legend(scatterpoints=1,loc='lower left',ncol=1,fontsize=10) if(drawMLD==1): MLD = mld_df.loc[(mld_df.stationID==stationID[i]) & (mld_df.year==labels)] if (len(MLD)>1): ax[k].axhline(y=MLD[method].iloc[0]*-1,linewidth=2,color=colorYear[0],alpha=0.8,zorder = 3) count +=1 if(count==0): plt.close() fig.suptitle(r'Interannual Variability of Inorganic Nutrients at Station '+np.asarray(target.stationID)[0]+\ ' ('+str(np.asarray(target.lat)[0])+','+str(np.asarray(target.lon)[0])+')',fontsize=22) plt.show() plt.tight_layout # Or get profile of specified station... else: stationData = inOrg[inOrg.stationID==station] # get all values in specific station for all stationVar stationTime = stationData.datetime.unique() # all sample dates from the station # Initiate the plot fig,ax = plt.subplots(ncols=3,figsize=(14,6),sharey=True) # label and limit the 3 axis xlabel = [r'PO$_{4}$ [$\mu$m/L]',r'NO$_{3}$+NO$_{2}$ [$\mu$m/L]',r'SiO$_{4}$ [$\mu$m/L]'] for j in range(3): ax[j].locator_params(axis = 'x',nbins=5) ax[j].set_xlabel(xlabel[j]) ax[0].set_ylim([-abs(maxDepth),0]) ax[0].set_ylabel('Water column height [m]') count = 0 # Plot each year for jj in range(len(stationTime)): target = stationData.loc[stationData.datetime==stationTime[jj]] # get only values from selected stationInterannualVar colorYear = (color[['hue','lightness','saturation']][color.year==np.asarray(target.year.unique(), dtype=np.float)[0]]).values # For each chemical tracer chem = ['PO4','Nitrite','SiO4'] for k in range(len(chem)): df = target.loc[(target[chem[k]]!=skip) & (target[chem[k]].notnull())] x = df[chem[k]].values y = df['depth'].values*-1 if(x.any()): labels = np.asarray(target.year, dtype=np.int)[0] ax[k].scatter(x,y ,marker=mark[jj%5],s=scatMark,color=colorYear,zorder=1,linewidth=.2,label=labels) ax[k].plot(x,y,color=colorYear[0],zorder=10) ax[k].legend(scatterpoints=1,loc='lower left',ncol=1,fontsize=10) if(drawMLD==1): MLD = mld_df.loc[(mld_df.stationID==station) & (mld_df.year==labels)] if (len(MLD)>1): ax[k].axhline(y=MLD[method].iloc[0]*-1,linewidth=2,color=colorYear[0],alpha=0.8,zorder = 3) count +=1 if(count==0): plt.close() fig.suptitle(r'Interannual Variability of Inorganic Nutrients at Station '+np.asarray(target.stationID)[0]+\ ' ('+str(np.asarray(target.lat)[0])+','+str(np.asarray(target.lon)[0])+')',fontsize=22) plt.show() plt.tight_layout # With vertical_profiles(), we can inspect how each station varies interanually with depth, or by individual station when byStation=True, and station='stationID' are set as arguments. # In[89]: vertical_profiles(inOrg,maxDepth,byStation=True,station='200.100') # To save space, I only showed one, but we can print them all using verticalprofiles(inOrg,maxDepth) Now we know how each station changes with depth over time, but these depth profiles are not informative as standalone figures. To get more insight let's compare each cruise across the years and averaged among years. We can do so by interpolating between stations. # # Interannual Surface Interpolation Algorithm # For now, we're only going to interpolate surface nutrients but this function can also interpoalte MLD and will help us calculate NCP! #
Top # In[20]: def interpolate(df,tracer,MLD=False): # Querry the database and clean up tracer_df = df.loc[(df[tracer]!=skip) & (df[tracer].notnull()) & (df['stationID'].isin(outOfGrid)==False)] # tracer specific values like title, depth-dependence, and colorbar labels. if(tracer=='Nitrite'): title = 'Surface NO$_{3}$+NO$_{2}$' tracer_df = tracer_df[tracer_df['depth']<=surfDepth] cBarLabel = r'$\mu$mol/L' elif(tracer=='PO4'): title = 'Surface PO$_{4}$' tracer_df = tracer_df[tracer_df['depth']<=surfDepth] cBarLabel = r'$\mu$mol/L' elif(tracer=='SiO4'): title = 'Surface SiO$_{4}$' tracer_df = tracer_df[tracer_df['depth']<=surfDepth] cBarLabel = r'$\mu$mol/L' elif(tracer=='Nuit_Deficiency'): title = 'Net Community Production' cBarLabel = r'mol C / m$^{2}$' else: title = 'Mixed Layer Depth' tracer_df = tracer_df.loc[(tracer_df[tracer]<200) & (tracer_df[tracer]>0)] cBarLabel = 'm' # Interpolate and plot for each year for year in pd.unique(tracer_df.year.values.ravel()): querry = tracer_df[tracer_df['year']==year] # Define the plotable elements x = querry.lon.values y = querry.lat.values z = querry[tracer].values # If it's not empty let's interpolate! if(len(x)>5): # plot data points. #plt.scatter(x,y,facecolors='none', edgecolors='k',s=26, zorder=3) if(MLD==True): querr2 = mld_df[mld_df['year']==year] x = querr2.lon.values y = querr2.lat.values z = querr2['clustered_MLD'] #plt.scatter(x,y, color='orange',s=26,zorder = 2) title = 'Mixed Layer Depth' cBarLabel = 'm' # define grid. xi = np.linspace(x.min(),x.max(),300) yi = np.linspace(y.min(),y.max(),300) # grid the data. zi = abs(griddata((x, y), z, (xi[None,:], yi[:,None]), method='cubic')) if(tracer=='Nuit_Deficiency'): # use redfield rations to convert mmol deficiency to carbon in mols zi = (zi*(106./16.))/1000 # contour the gridded data. plt.contourf(xi,yi,zi,12,cmap=cMap,zorder=1) plt.colorbar(orientation='vertical').ax.set_xlabel(cBarLabel) # draw colorbar plt.xlim(min(x),max(x)) plt.ylim(min(y),max(y)) plt.title('Interpolated '+title+' '+str(np.asarray(querry.year)[0])+' (%d points)' % len(x)) plt.show() # # NO2+NO3 Interannual Surface Interpolation # Combined surface NO2+NO3 represents the stock of Nitrogen-based nuitrients available to support Phytoplankton growth. NO2+NO3 samples (hollow points) are used to genreate an interpolation of nuitrient distributions along the wAP each January. Notice any trends? #
Top # In[23]: interpolate(inOrg,'Nitrite') # # Average Surface Interpolation Algorithm # To compare interannual trends in one plot, we can average all nuitrient stocks by station and interpolate again. #
Top # In[22]: def interpolate_average(df,tracer): # Querry the database and clean up tracer_df = df.loc[(df[tracer]!=skip) & (df[tracer].notnull())& (df['stationID'].isin(outOfGrid)==False)] # Title for plot if(tracer=='Nitrite'): title = 'Surface NO$_{2}$+NO$_{3}$' tracer_df = tracer_df.loc[tracer_df['depth']<=surfDepth] cBarLabel = r'$\mu$mol/L' elif(tracer=='PO4'): title = 'SurfacePO$_{4}$' tracer_df = tracer_df.loc[tracer_df['depth']<=surfDepth] cBarLabel = r'$\mu$mol/L' elif(tracer=='SiO4'): title = 'Surface SiO$_{4}$' tracer_df = tracer_df.loc[tracer_df['depth']<=surfDepth] cBarLabel = r'$\mu$mol/L' elif(tracer=='Nuit_Deficiency'): title = 'Net Community Production' cBarLabel = r'mol C / m$^{2}$' else: title = 'MLD' tracer_df = tracer_df.loc[(tracer_df[tracer]<200) & (tracer_df[tracer]>0)] cBarLabel = 'm' # Average surface values across years stationList = tracer_df.stationID.unique() lons = [] lats = [] nuits = [] for stationID in stationList: querry= tracer_df[tracer_df['stationID']==stationID] if(len(querry.year.unique())>=5): lons.append(np.asarray(querry.lon)[0]) lats.append(np.asarray(querry.lat)[0]) nuits.append(abs(np.nanmean(querry[tracer]))) # define grid. xi = np.linspace(min(lons),max(lons),360) yi = np.linspace(min(lats),max(lats),360) # grid the data. zi = abs(griddata((lons, lats), nuits, (xi[None,:], yi[:,None]), method='cubic')) if(tracer=='Nuit_Deficiency'): # use redfield rations to convert mmol deficiency to carbon in mols zi = (zi*(106./16.))/1000 # contour the gridded data. plt.contourf(xi,yi,zi,15,cmap=cMap) plt.colorbar(orientation='vertical').ax.set_xlabel(cBarLabel) # draw colorbar # plot data points. #plt.scatter(lons,lats,facecolors='none', edgecolors='k',s=26) plt.xlim(min(lons),max(lons)) plt.ylim(min(lats),max(lats)) plt.title(r'Summertime Averages of '+title+' (%d points)' % len(nuits)) plt.show() # # NO2+NO3 Interannual Average #
Top # In[23]: interpolate_average(inOrg,'Nitrite') # We can leverage these combined NO2 NO3 measurements to calculate how much inorganic carbon has been taken out of the water column by Phytoplankton. This Biogeochemical measurement is called Net Community Production (NCP), and can be attained with a help from Physical parameters... namely, Mixed Layer Depth (MLD). The integrated area down to the MLD of combined NO2 NO3 represents the total nutrients taken out of the homogenous water column by Phytoplankton. With this number, we can reverse engineer how much CO2 has been sequestered using a stoichiometric ratio of elements found in Marine biota called the Redfield Ratio. # # Estimating MLD # Along with inorganic nutrients, CTD profile are taken on the annual cruise along the wAP. The profile is recorded on the way down (faster) and on the way back up (slower) to the ship. For this reason, MLD estimations are taken from both directions with two methods highlighted in Holte and Talley (2009)-- the Threshold Method by # de Boyer Montegut et al. (2004) and the # Gradient Method by # Dong et al. (2008) using temperature and potential desnity. #
Top # In[8]: # files mldPath = 'package_650/**/*.std' mldOutfile = os.path.join(dsPath+'Palmer_MLD.json') # In[85]: # values for threshold (t_) and gradient(g_) MLD estimations t_threshT = 0.2 # celsius t_threshS = 0.03 # kg/m^3 g_threshT = 0.005 # celsius g_threshS = 0.0005 # kg/m^3 def find_mld(path): mld_list = [] # initialize a list for all dictionaries containing MLD estimates. records = 0 for filename in glob.iglob(path, recursive=True): records += 1 # First let's read each CTD profile into a pandas Dataframe lines = [line.split() for line in open(filename,'r',errors="ignore")] cols = [" ".join(line).lower() for line in lines[1:25] if len(line)<12] rows = lines[len(cols)+1:] df = pd.DataFrame(data=np.reshape(rows,(len(rows),len(cols))),columns=cols) mld = {} # initialize a dictionary to store all the MLD estimates # add propoer leading zeros to lines station,line = lines[0][5].split(".") if("-" not in line): mld['stationID']= station+"."+str("%03d" % (int(line),)) mld['year'] = '20'+filename.split('/')[1][:2] else: mld['stationID']= station+"."+str("%04d" % (int(line),)) mld['year'] = '20'+filename.split('/')[1][:2] # Boolean switches sFound = 0 t1Found = 0 t2Found = 0 pFound = 0 dFound = 0 # check each column in the uploaded file for potential desnity, temps, pressure, and depth for col in cols: if(sFound==0 and 'sigma' in col): sigmaCol = col sFound = 1 elif(t1Found==0 and 'temperature' in col and 'potential' not in col): tempCol = col t1Found = 1 elif(t2Found==0 and 'temperature' in col and 'potential' not in col): temp2Col = col t2Found =1 elif('pressure' in col and pFound==0): presCol = col pFound = 1 elif('depth' in col and dFound==0): depthCol = col dFound = 1 # rename the columns df = df.rename(columns={sigmaCol: 'sigma', tempCol: 'temp1', temp2Col: 'temp2', presCol : 'pres', depthCol : 'depth'}) for col in ['pres','sigma','temp1','temp2','depth']: df[col] = pd.to_numeric(df[col], errors='coerce') #The measurement closest to 10 dbar is used as the reference value, now enter Holte and Talley 2009. ref = df.loc[df['pres']==5].index if(len(ref)==2): df[ref[0]:ref[1]] bottomUp = True elif(len(ref)==1): if(ref[0]<(len(df)/2)): df[ref[0]:] bottomUp = False else: df[:ref[0]] bottomUp = True # Sigma Threshold for isopyc in df['sigma']: if (abs(isopyc-np.asarray(df['sigma'].head(1))[0]) > t_threshS): mld['t_sig_down']= np.asarray(df['depth'].loc[df['sigma']==isopyc].values)[0] break if(bottomUp==True): for isopyc in reversed(df['sigma']): if (abs(isopyc-np.asarray(df['sigma'].tail(1))[0]) > t_threshS): mld['t_sig_up']= np.asarray(df['depth'].loc[df['sigma']==isopyc].values)[0] break # Temp Threshold 1 for isotherm in df['temp1']: if (abs(isotherm-np.asarray(df['temp1'].head(1))[0]) > t_threshT): mld['t_t1_down']= np.asarray(df['depth'].loc[df['temp1']==isotherm].values)[0] break if(bottomUp==True): for isotherm in reversed(df['temp1']): if (abs(isotherm-np.asarray(df['temp1'].tail(1))[0]) > t_threshT): mld['t_t1_up']= np.asarray(df['depth'].loc[df['temp1']==isotherm].values)[0] break # Calculate the finite difference slope sigSlope = np.diff(df['sigma'])/np.diff(df['pres']) t1Slope = np.diff(df['temp1'])/np.diff(df['pres']) t2Slope = np.diff(df['temp2'])/np.diff(df['pres']) ms = len(t1Slope)-1 for i in range(ms): if(abs(t1Slope[i])>g_threshT): mld['g_t1_down']=df['depth'].iloc[i+1] break if(bottomUp==True): for i in reversed(range(ms)): if(abs(t1Slope[i])>g_threshT): mld['g_t1_up']=df['depth'].iloc[i+1] break for i in range(ms): if(abs(sigSlope[i])>g_threshS): mld['g_sig_down']=df['depth'].iloc[i+1] break if(bottomUp==True): for i in reversed(range(ms)): if(abs(sigSlope[i])>g_threshS): mld['g_sig_up']=df['depth'].iloc[i+1] break mld_list.append(mld) # Save the MLD estimates with open(mldOutfile, 'w') as file: json.dump(mld_list, file, indent=4) print(records,'CTD records scraped and dumped') # run it! find_mld(mldPath) # ## Choosing MLD # A method used by Holte and Talley (2009) is cluster analysis. For this I build off a runction from Raymond Hettinger, to search through each station's MLD estimates and cluster values based on a scaling gap. The largest cluster is then averaged and returned. #
Top # In[16]: # Merge the MLD estimates with the wAP grid for plot-ability. mld_df = pd.merge(left=pd.read_json(mldOutfile),right=newGrid,on='stationID',how='inner') # In[17]: # Cluster based MLD estimation. def cluster(data, maxgap): data = data[~np.isnan(data)] if(data.any()): data.sort() groups = [[data[0]]] for x in data[1:]: if abs(x - groups[-1][-1]) <= (maxgap+(x/10)): groups[-1].append(x) else: groups.append([x]) # check for largest cluster maxClusterSize = 0 for group in groups: if(len(group)>maxClusterSize): maxClusterSize = len(group) meanCluster = np.mean(group) return meanCluster # In[18]: mld_df['clustered_MLD']=[cluster(row, maxgap=4) for row in mld_df[['g_sig_up', 'g_t1_up', 't_sig_up', 't_t1_up', 'g_sig_down', 'g_t1_down', 't_sig_down', 't_t1_down']].values] # # MLD Coverage Relative to Nutrient Samples. # How do we know if our MLD estimates (orange) have enough coverage to calculate NCP for for each set of annual inorganic nutrient measurements (hollow)? For hollow points-- where we lack CTD data, we will use interpolated MLD (filled contour) to calculate the NCP. For this reason, how MLD is estimated and interpolated can make or break our restults. #
Top # In[21]: interpolate(df=inOrg,tracer='Nitrite',MLD=True) # ## Interannual MLD # The interpolate() function can be used for other MLD estimation methods, as well as for interannual averages. #
Top # In[30]: interpolate_average(mld_df,'clustered_MLD') # ## Calculating Net Community Production (NCP) # With a MLD, we can calculate how much nutrients are removed from the water column by integrating the combined NO2 NO33 along the MLD. To account for the resolution of the MLD, we can interpolate the NO2 NO33 concentration to the desired MLD. #
Top # In[31]: # Files ncpOutfileNOX = os.path.join(dsPath+'NCP.json') ncpOutfileSiO4 = os.path.join(dsPath+'NCP_SiO4.json') ncpOutfilePO4 = os.path.join(dsPath+'NCP_PO4.json') # In[32]: # seach interpolated MLD dictionary--parameterized in compile_NCP(), for values closest to missing station. def station_MLD(stationID,zi,df,xi_coords,yi_coords): mld = zi[xi_coords[round(float(df.loc[df.stationID==stationID].lon.values[0]),1)], yi_coords[round(float(df.loc[df.stationID==stationID].lat.values[0]),1)]] return(mld.mean()) # In[33]: # interpolated def get_NCP(df,station,tracer,mld): depSlice = df.loc[df['stationID']==station] if not(depSlice.empty): mld_tracer = np.interp(mld,depSlice['depth'], depSlice[tracer]) # slice the profile after using the profile beyong the MLD to interpolate. depSlice = depSlice.loc[depSlice['depth']<=mld] nuit_profile = np.append(depSlice[tracer].values,mld_tracer) mld_depth = np.append(depSlice['depth'].values,mld) return((mld*mld_tracer) - simps(nuit_profile,mld_depth)) # integrate from depth to surface using profile. else: return None # In[34]: ncpOutfile = os.path.join(dsPath+'NCP.json') def compile_NCP(method='t_sig_down',tracer='Nitrite',outfile=ncpOutfile,warning=False): years = [2003,2004,2005,2006,2007,2008,2009,2011,2012,2013,2014] NCP_list = [] for year in years: if(warning==True): print("***",year,"***") # Querry the databases and clean up querryMLD = mld_df.loc[(mld_df[method]!=skip) & (mld_df[method].notnull()) & (mld_df['stationID'].isin(outOfGrid)==False)& (mld_df['year']==year)] querryInOrg = inOrg.loc[(inOrg[tracer]!=skip) & (inOrg[tracer].notnull()) & (inOrg['stationID'].isin(outOfGrid)==False)& (inOrg['year']==year)] # Grid and interpolate MLD. x = np.round(querryMLD.lon.values,4) y = np.round(querryMLD.lat.values,4) z = querryMLD[method].values # If it's not empty let's interpolate! if(len(x>4)): # define grid. xi = np.linspace(x.min(),x.max(),360) yi = np.linspace(y.min(),y.max(),360) # grid the data. zi = abs(griddata((x, y), z, (xi[None,:], yi[:,None]), method='cubic')) # document the linspace coord pairs xi_coords = {round(float(value),1): index for index, value in enumerate(xi)} yi_coords = {round(float(value),1): index for index, value in enumerate(yi)} # Go through each station measurement of inorganic nuitrients for station in pd.unique(querryInOrg['stationID'].values.ravel()): # check if the station and year exist in the MLD data mld_S = querryMLD.loc[querryMLD['stationID']==station] # if not, check interp... if(mld_S.empty): try: if not(pd.isnull(station_MLD(station,zi,querryInOrg,xi_coords,yi_coords))): NCP_list.append({'year': year, 'stationID': station, 'MLD' : station_MLD(station,zi,querryInOrg,xi_coords,yi_coords), 'interp':'True', 'Nuit_Deficiency': get_NCP(querryInOrg,station,tracer,station_MLD(station,zi,querryInOrg,xi_coords,yi_coords))}) except (KeyError,IndexError) as e: if(warning==True): print(station,"index not found") # TODO: reference annual value for index error?? reference nearest enighbor for key error? else: NCP_list.append({'year': year, 'stationID': station, 'MLD': mld_S[method].mean(), 'interp': 'False', 'Nuit_Deficiency': get_NCP(querryInOrg,station,tracer,np.nanmean(mld_S[method]))}) with open(outfile, 'w') as file: json.dump(NCP_list, file, indent=4) compile_NCP(method='clustered_MLD',tracer='Nitrite',outfile=ncpOutfile) # read the json file into a pandas DF, read as text to avoid nans. Chained solution to this Stackoverflow answer. # In[35]: NOX_df = pd.DataFrame(json.loads(open(ncpOutfile,'r').read())) NOX_df = pd.merge(left=NOX_df,right=newGrid[['stationID','lat','lon']],on='stationID',how='inner') NOX_df.head() # # Comparing with other nutrients # Are these numbers consistent with the Redfield Ratio?
C:N:P = 106:16:1 and for Diatoms C:Si:N:P = 106:15:16:1 #
Top # In[36]: compile_NCP(method='clustered_MLD',tracer='SiO4',outfile=ncpOutfileSiO4) compile_NCP(method='clustered_MLD',tracer='PO4',outfile=ncpOutfilePO4) SiO4_df = pd.DataFrame(json.loads(open(ncpOutfileSiO4,'r').read())) PO4_df = pd.DataFrame(json.loads(open(ncpOutfilePO4,'r').read())) # In[37]: N_P = NOX_df['Nuit_Deficiency']/PO4_df['Nuit_Deficiency'].loc[PO4_df['Nuit_Deficiency']>.1] N_P = N_P.replace([np.inf, -np.inf,0], np.nan).dropna() N_P = N_P.where((N_P<100)&(N_P>-30)) N_P.plot(kind='kde',color='k', alpha=0.5, xlim=(-30,100),title='Density Distribution of N:P') print("N:P mean",round(N_P.mean(),4),"Vs",16.00,"Redfield Ratio") # In[38]: N_Si = NOX_df['Nuit_Deficiency']/SiO4_df['Nuit_Deficiency'] N_Si = N_Si.replace([np.inf, -np.inf,0], np.nan).dropna() N_Si.plot(kind='kde',color='k', alpha=0.5, xlim=(-10,20),title='Density Distribution of N:Si') print("N:Si mean",round(N_Si.mean(),4),"Vs",15/16,"Redfield Ratio") # Because the Redfield Ratios are consistent, we can apply the C:N ratio (106:16) of Marine Biomass to calculate how much carbon was removed from the water column. # In[39]: interpolate(df=NOX_df,tracer='Nuit_Deficiency') # In[40]: interpolate_average(NOX_df,'Nuit_Deficiency') # # Discussion # Pockets of extreme productivity are spaced sporadically (1-3x) between otherwise non-productive waters along the wAP. An inverse pattern is faint in surface interpolations of NO2 + NO3. However, the regions of high net community production do not mesh with those of deplete surface NO2 + NO3, suggesting NCP in the wAP is driven by other (physical, biological or geographic) factors, rather than nutrient stocks alone. For this reason we turn to the physical method(s) used during Mixed Layer Depth (MLD) estimation and interpolation. # # Estimation algorithms can improve by accounting for for Sea ice thermodynamics and water column structure. Starting from physics, we get closer to parsing out how the wAP's ecosystem and carbon sink will change as global temperatures rise. Unfortunately missing CTD values leave holes in the physics where interpolation algorithims to fall short at boundary stations (lines 000 and 600). To compensate for this, interannualy averaged MLD could be used as a baseline, and other high resolution datasets such as Argo and the World Ocean atlas could be used to cross reference, validate, and interpolate these gaps. # # Physics aside, Redfield Ratios hold up remarkably well-- with SiO4 suggesting January community structures are dominated by Diatoms. # # Datasets ouside of CTD and Inorganic nutrients have yet to be incorporated into calculating NCP. Triple Oxygen Isotopes, and inhert Argon would be interesting avenues to explore. Freshwater discharge is also another feature that may help explain mixed layer dynamics as well as potential Iron fertilization. #
Top # In[41]: # This is an extra function that I decided to leave in for curious GEOSPATIALLY-inclined folks! # Map-based interpolate function, not as pretty as interpolate... but better context def interpolate_map(df=inOrg,tracer = 'Nitrite',MLD = False): # Querry the database and clean up tracer_df = df.loc[(df[tracer]!=skip) & (df[tracer].notnull()) & (df['stationID'].isin(outOfGrid)==False)] # choose a title and see if the tracer analysis depth-dependent. if(tracer=='Nitrite'): title = 'Surface NO$_{3}$+NO$_{2}$' tracer_df = tracer_df[tracer_df['depth']<=surfDepth] elif(tracer=='PO4'): title = 'Surface PO$_{4}$' tracer_df = tracer_df[tracer_df['depth']<=surfDepth] elif(tracer=='SiO4'): title = 'Surface SiO$_{4}$' tracer_df = tracer_df[tracer_df['depth']<=surfDepth] else: title = 'Mixed Layer Depth' tracer_df = tracer_df.loc[(tracer_df[tracer]<200) & (tracer_df[tracer]>0)] # Interpolate and plot for each year for year in pd.unique(tracer_df.year.values.ravel()): map = Basemap(width=1000000,height=640000, resolution='l',projection='stere',\ lat_ts=50,lat_0=-66.40,lon_0=-69.05) map.drawparallels(np.arange(-90.,90.,1.),labels=[False,False,False,True], color='w',dashes=[100,.0001],zorder=0) map.drawmeridians(np.arange(-180.,181.,1.),labels=[True,False,False,False], color='w',dashes=[100,.0001],zorder=0) map.drawcoastlines() map.fillcontinents() map.drawmapboundary() querry = tracer_df[tracer_df['year']==year] # Define the plotable elements x = querry.lon.values y = querry.lat.values z = querry[tracer].values labels = querry.stationID xM,yM = map(x, y) # If it's not empty let's interpolate! if(len(x)>10): # plot data points. map.scatter(xM,yM,facecolors='none', edgecolors='k',s=26, zorder=2) gridBounds = ['000.220','100.220','200.220','300.220','400.220','500.220','600.220'] gridLabels = newGrid[newGrid['stationID'].isin(gridBounds)] x3 = gridLabels.lon.values y3 = gridLabels.lat.values xS, yS = map(x3, y3) labels = gridLabels.gridLine.values.astype(int) for label, xpt, ypt in zip(labels, xS, yS): plt.text(xpt-60000, ypt+12000, label,size=14) if(MLD==True): querr2 = mld_df[mld_df['year']==year] x1 = querr2.lon.values y1 = querr2.lat.values xM1,yM1 = map(x1,y1) map.scatter(xM1,yM1, color='orange',s=26,zorder = 1) else: # define grid. xi = np.linspace(xM.min(),xM.max(),300) yi = np.linspace(yM.min(),yM.max(),300) # grid the data. zi = abs(griddata((xM, yM), z, (xi[None,:], yi[:,None]), method='cubic')) # contour the gridded data. plt.contourf(xi,yi,zi,12,cmap=cMap) plt.colorbar() # draw colorbar plt.title('Interpolated '+title+' '+str(np.asarray(querry.year)[0])+' (%d points)' % len(x)) plt.show()