The original vendor code provided to OOI to calculate the pCO2 concentration from the Sunburst SAMI-II pCO2 instrument contained an error which resulted in the calculation of erroneous values. This error was identified in 2017 by cross-comparing in-situ samples collected by Cabled Array personnel with measurements from a SAMI-II pCO2 sensor during the Oceans '17 Cabled Array cruise season. The samples were collected during a CTD cast located some 250 meters to the east of the Cabled Array Axial Base Shallow Profiler system while it was actively profiling.
The code below highlights the discrepancy between the values derived using the original code, using corrected code and the discrete samples.
# modules required in the processing below
import datetime
import json
import netrc
import re
import requests
import time
import numexpr as ne
import numpy as np
import pandas as pd
import xarray as xr
from bokeh.io import output_notebook, show
from bokeh.models import Range1d
from bokeh.plotting import figure
from bokeh.palettes import Category10 as palette
from bs4 import BeautifulSoup
import warnings
warnings.filterwarnings('ignore')
Both the original and corrected code can be found on Github in the ion-functions repository using the commit history to view the changes.
Calibration data for the sensor we are going to be looking at from the Cabled Array Axial Base Shallow Profiler (Serial Number C0123) can be found in the OOI Asset Management repository on Github.
The data from the discrete sampling can be found in the OOI Document repository on Alfresco. Two files are used to identify the samples of interest from the CTD cast and the processed pCO2 values. We are after the data from Cast 05 (CTD-05).
# Corrected code copied here from the Github repository (see links above)
def pco2_calc_pco2(light, therm, ea434, eb434, ea620, eb620,
calt, cala, calb, calc, a434blank, a620blank):
"""
Description:
OOI Level 1 Partial Pressure of CO2 (pCO2) in seawater core data
product, which is calculated from the Sunburst SAMI-II CO2 instrument
(PCO2W).
Implemented by:
20??-??-??: J. Newton (Sunburst Sensors, LLC). Original Matlab code.
2013-04-20: Christopher Wingard. Initial python code.
2014-02-19: Christopher Wingard. Updated comments.
2014-03-19: Christopher Wingard. Optimized.
2018-03-04: Christopher Wingard. Updated to correctly calculate pCO2 using
newly formulated code provided by the vendor. Original vendor code
incorrectly calculated the blank correction. Applies additional
corrections to calculations to avoid errors thrown when running a
blank measurement.
Usage:
pco2 = pco2_pco2wat(light, therm, ea434, eb434, ea620, eb620,
calt, cala, calb, calc, a434blank, a620blank)
where
pco2 = measured pco2 in seawater (PCO2WAT_L1) [uatm]
light = array of light measurements
therm = PCO2W thermistor temperature (CO2THRM_L0) [counts]
ea434 = Reagent specific calibration coefficient
eb434 = Reagent specific calibration coefficient
ea620 = Reagent specific calibration coefficient
eb620 = Reagent specific calibration coefficient
calt = Instrument specific calibration coefficient for temperature
cala = Instrument specific calibration coefficient for the pCO2 measurements
calb = Instrument specific calibration coefficient for the pCO2 measurements
calc = Instrument specific calibration coefficient for the pCO2 measurements
a434blank = Blank measurements at 434 nm (CO2ABS1_L0) [counts]
a620blank = Blank measurements to 620 nm (CO2ABS2_L0) [counts]
References:
OOI (2012). Data Product Specification for Partial Pressure of CO2 in
Seawater. Document Control Number 1341-00510.
https://alfresco.oceanobservatories.org/ (See: Company Home >>
OOI >> Controlled >> 1000 System Level >>
1341-00490_Data_Product_SPEC_PCO2WAT_OOI.pdf)
"""
# set constants -- original vendor formulation, reset below
# ea434 = ea434 - 29.3 * calt
# eb620 = eb620 - 70.6 * calt
# e1 = ea620 / ea434
# e2 = eb620 / ea434
# e3 = eb434 / ea434
# set the e constants, values provided by Sunburst
e1 = 0.0043
e2 = 2.136
e3 = 0.2105
# Extract variables from light array
Ratio434 = light[:, 6] # 434nm Ratio
Ratio620 = light[:, 7] # 620nm Ratio
# Convert thermistor counts to degrees C
therm = pco2_thermistor(therm)
# correct the absorbance ratios using the blanks
AR434 = (Ratio434 / a434blank)
AR620 = (Ratio620 / a620blank)
# map out blank measurements and spoof the ratios to avoid throwing an error
m = np.where(AR434 == AR620)[0]
AR434[m] = 0.99999
AR620[m] = 0.99999
# Calculate the final absorbance ratio
A434 = -1 * np.log10(AR434) # 434 absorbance
A620 = -1 * np.log10(AR620) # 620 absorbance
Ratio = A620 / A434 # Absorbance ratio
# calculate pCO2
V1 = Ratio - e1
V2 = e2 - e3 * Ratio
RCO21 = -1 * np.log10(V1 / V2)
RCO22 = (therm - calt) * 0.008 + RCO21
Tcoeff = 0.0075778 - 0.0012389 * RCO22 - 0.00048757 * RCO22**2
Tcor_RCO2 = RCO21 + Tcoeff * (therm - calt)
pco2 = 10.**((-1. * calb + (calb**2 - (4. * cala * (calc - Tcor_RCO2)))**0.5) / (2. * cala))
pco2[m] = fill_value # reset the blanks captured earlier to a fill value
return np.real(pco2)
# used by the above function
def pco2_thermistor(traw):
"""
Description:
Convert the thermistor data from counts to degrees Centigrade from the
pCO2 instrument.
Implemented by:
2013-04-20: Christopher Wingard. Initial code.
2014-02-19: Christopher Wingard. Updated comments.
Usage:
therm = pco2_thermistor(traw)
where
therm = converted thermistor temperature [degC]
traw = raw thermistor temperature (CO2THRM_L0) [counts]
References:
OOI (2012). Data Product Specification for Partial Pressure of CO2 in
Seawater. Document Control Number 1341-00510.
https://alfresco.oceanobservatories.org/ (See: Company Home >>
OOI >> Controlled >> 1000 System Level >>
1341-00490_Data_Product_SPEC_PCO2WAT_OOI.pdf)
"""
# convert raw thermistor readings from counts to degrees Centigrade
Rt = ne.evaluate('log((traw / (4096. - traw)) * 17400.)')
InvT = ne.evaluate('0.0010183 + 0.000241 * Rt + 0.00000015 * Rt**3')
therm = ne.evaluate('(1 / InvT) - 273.15')
return therm
# credentials used to access the data from the production and development servers
netrc = netrc.netrc()
ooinet = netrc.authenticators('ooinet.oceanobservatories.org')
devnet = netrc.authenticators('ooinet-dev-01.oceanobservatories.org')
# sources of data and the reference designator information needed to specify the dataset of interest
OOINET_API_BASE_URL = 'https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv/'
DEVNET_API_BASE_URL = 'https://ooinet-dev-01.oceanobservatories.org/api/m2m/12576/sensor/inv/'
DATA_DESIGNATOR = ('RS03AXPS/' + # Site designator
'SF03A/' + # Node designator
'4F-PCO2WA301/' + # Instrument designator
'streamed/' + # Data delivery method
'pco2w_a_sami_data_record' + '?' + # Data stream name
'beginDT=2017-08-08T00:00:00.000Z&' + # Beginning time range
'endDT=2017-08-08T23:59:59.999Z&' + # Ending time range
'format=application/netcdf') # Specifying we want NetCDF data files
# Function to create a list of the data files of interest
def list_files(url, tag=''):
page = requests.get(url).text
soup = BeautifulSoup(page, 'html.parser')
pattern = re.compile(str(tag))
return [node.get('href') for node in soup.find_all('a', text=pattern)]
# Function to download a NetCDF files, convert to a dataset, clean up and create a final dataset.
def process_file(baseurl, file):
# download and convert the data
url = re.sub('catalog.html\?dataset=', baseurl, file)
ds = xr.open_dataset(url).load() # download the data first, before converting
ds = ds.swap_dims({'obs': 'time'}) # switch to using time as the main dimension
ds = ds.drop(['obs', 'id', 'driver_timestamp', 'ingestion_timestamp', 'internal_timestamp', 'port_timestamp',
'preferred_timestamp', 'provenance', 'checksum', 'unique_id',
'record_time', 'record_type', 'record_length', 'absorbance_ratio_434_qc_executed',
'absorbance_ratio_434_qc_results', 'absorbance_ratio_620_qc_executed',
'absorbance_ratio_620_qc_results', 'pco2w_thermistor_temperature_qc_executed',
'pco2w_thermistor_temperature_qc_results', 'pco2_seawater_qc_executed',
'pco2_seawater_qc_results'
])
return ds
# Get the original, incorrectly computed data first from the production server
r = requests.get((OOINET_API_BASE_URL + DATA_DESIGNATOR), auth=(ooinet[0], ooinet[2]))
original = r.json()
check_complete = original['allURLs'][1] + '/status.txt' # When SOA is actually not that efficient...
for i in range(1000):
r = requests.get(check_complete)
if r.status_code == requests.codes.ok:
print('original completed')
break
else:
time.sleep(.5)
# get the correct data (uses the updated algorithm) from the development server
r = requests.get((DEVNET_API_BASE_URL + DATA_DESIGNATOR), auth=(devnet[0], devnet[2]))
correct = r.json()
check_complete = correct['allURLs'][1] + '/status.txt' # When SOA is actually not that efficient...
for i in range(1000):
r = requests.get(check_complete)
if r.status_code == requests.codes.ok:
print('corrected completed')
break
else:
time.sleep(.5)
original completed corrected completed
# Create a list of the original files requested above using a simple regex as tag to discriminate the files
files = list_files(original['allURLs'][0], '.*PCO2W.*data_record_2017.*\.nc$')
baseurl = 'https://opendap.oceanobservatories.org/thredds/dodsC/'
# Process the original data files and concatenate into a single dataframe
frames = [process_file(baseurl, f) for f in files]
original = xr.concat(frames, 'time')
# Create a list of the corrected files requested above using a simple regex as tag to discriminate the files
files = list_files(correct['allURLs'][0], '.*PCO2W.*data_record_2017.*\.nc$')
baseurl = 'https://opendap-test.oceanobservatories.org/thredds/dodsC/'
# Process the corrected data files and concatenate into a single dataframe
frames = [process_file(baseurl, f) for f in files]
correct = xr.concat(frames, 'time')
# Calibration coefficients and constants copied from Asset Management
cala = 0.0422
calb = 0.6761
calc = -1.579
calt = 4.6539
ea434 = 19706
ea620 = 34
eb434 = 3073
eb620 = 44327
fill_value = np.nan
# using the corrected code, compute the pCO2 concentration from the data downloaded from the production server
pco2 = pco2_calc_pco2(original.light_measurements, original.thermistor_raw, ea434, eb434, ea620,
eb620, calt, cala, calb, calc, original.pco2w_a_absorbance_blank_434,
original.pco2w_a_absorbance_blank_620)
# Load the discrete samples, copying from the spreadsheet linked above and stored into a local pandas dataframe. This was run
# once and now we are just using the stored object
#discrete = pd.read_clipboard()
#discrete.to_pickle('./discrete_samples.pkl')
discrete = pd.read_pickle('./discrete_samples.pkl')
discrete
sampleID | SampleT | SampleD | SampleS | AnalysisT | alk (µeq/kg) | TCO2 (µmol/kg) | alk (µeq/kg).1 | TCO2 (µmol/kg).1 | pco2_in situ (µatm) | co2aq (µmol/kg) | bicarb (µmol/kg) | co3 (µmol/kg) | pHt | omega-C | omega-A | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | CO2_001 | 7.941 | 140 | 33.765 | 22.6 | 2275.38 | 2177.688953 | 2275.38 | 2177.69 | 612.25 | 29.00 | 2065.17 | 83.52 | 7.867 | 1.891 | 1.226 |
1 | CO2_002 | 15.949 | 20 | 32.596 | 22.4 | 2204.23 | 2000.802326 | 2204.23 | 2000.80 | 390.75 | 14.42 | 1840.36 | 146.03 | 8.043 | 3.430 | 2.252 |
2 | CO2_007 | 7.716 | 160 | 33.846 | 22.5 | 2292.33 | 2202.568092 | 2292.33 | 2202.57 | 651.56 | 31.08 | 2091.73 | 79.76 | 7.843 | 1.797 | 1.165 |
3 | CO2_009 | 7.154 | 200 | 33.887 | 22.5 | 2291.60 | 2222.535652 | 2291.60 | 2222.54 | 747.06 | 36.33 | 2116.83 | 69.38 | 7.785 | 1.551 | 1.004 |
4 | CO2_010 | 9.088 | 60 | 32.625 | 22.5 | 2212.22 | 2046.430214 | 2212.22 | 2046.43 | 372.73 | 17.09 | 1908.36 | 120.98 | 8.057 | 2.810 | 1.823 |
5 | CO2_011 | 8.254 | 100 | 33.040 | 22.5 | 2246.98 | 2126.175067 | 2246.98 | 2126.18 | 505.74 | 23.80 | 2006.82 | 95.56 | 7.941 | 2.194 | 1.422 |
6 | CO2_013 | 17.164 | 10 | 32.614 | 22.5 | 2216.73 | 2009.669538 | 2216.73 | 2009.67 | 406.31 | 14.46 | 1846.10 | 149.11 | 8.031 | 3.511 | 2.310 |
7 | CO2_020 | 8.112 | 120 | 33.633 | 23.6 | 2277.55 | 2172.084032 | 2277.55 | 2172.08 | 581.33 | 27.39 | 2056.92 | 87.77 | 7.889 | 1.997 | 1.295 |
8 | CO2_023 | 8.307 | 80 | 32.707 | 23.8 | 2213.80 | 2061.702510 | 2213.80 | 2061.70 | 395.61 | 18.62 | 1930.45 | 112.63 | 8.032 | 2.604 | 1.687 |
9 | CO2_024 | 6.909 | 220 | 33.895 | 23.8 | 2309.39 | 2236.532146 | 2309.39 | 2236.53 | 724.96 | 35.55 | 2129.49 | 71.49 | 7.799 | 1.591 | 1.030 |
10 | DIC_075 | 10.553 | 40 | 32.604 | 24.0 | 2215.04 | 2013.442867 | 2215.04 | 2013.44 | 318.56 | 13.92 | 1856.32 | 143.21 | 8.119 | 3.340 | 2.173 |
11 | DIC_073 | 7.440 | 180 | 33.875 | 24.1 | 2286.99 | 2197.319393 | 2286.99 | 2197.32 | 642.71 | 30.95 | 2086.91 | 79.46 | 7.846 | 1.783 | 1.155 |
# Provide a simple plot of a days worth of data
output_notebook()
# make a list of our columns
colors = palette[3]
# make the figure,
p = figure(title="pCO2 Concentrations versus Temperature", width = 800, height = 800)
p.title.text_font_size = '16pt'
p.xaxis.axis_label = 'pCO2 (uatm)'
p.xaxis.axis_label_text_font_size = "12pt"
p.xaxis.major_label_text_font_size = "12pt"
p.x_range = Range1d(start=200, end=800)
p.yaxis.axis_label = 'Temperature [degC]'
p.yaxis.axis_label_text_font_size = "12pt"
p.yaxis.major_label_text_font_size = "12pt"
p.y_range = Range1d(start=6, end=18)
p.circle(original.pco2_seawater.values, original.pco2w_thermistor_temperature.values, size=10, color=colors[0], legend='Production Server')
p.circle(pco2.values, original.pco2w_thermistor_temperature.values, size=10, color=colors[1], legend='Locally Calculated')
p.circle(discrete.iloc[:, 9].values, discrete['SampleT'].values, size=10, color=colors[2], legend='Discrete Samples')
p.toolbar_location = 'right'
show(p)
# Compare the pCO2 concentration values correctly computed locally with those downloaded from the development server which
# utilizes the updated and corrected code.
p = figure(title="Local versus Development Server pCO2 Concentration", width = 800, height = 800)
p.title.text_font_size = '16pt'
p.xaxis.axis_label = 'Local pCO2 (uatm)'
p.xaxis.axis_label_text_font_size = "12pt"
p.xaxis.major_label_text_font_size = "12pt"
p.x_range = Range1d(start=200, end=800)
p.yaxis.axis_label = 'Development pCO2 (uatm)'
p.yaxis.axis_label_text_font_size = "12pt"
p.yaxis.major_label_text_font_size = "12pt"
p.y_range = Range1d(start=200, end=800)
p.circle(pco2.values, correct.pco2_seawater.values, size=10, color=colors[1])
p.line(np.linspace(200, 800), np.linspace(200, 800), color='black')
p.toolbar_location = 'right'
show(p)
# make the figure,
p = figure(title="pCO2 Concentrations versus Temperature", width = 800, height = 800)
p.title.text_font_size = '16pt'
p.xaxis.axis_label = 'pCO2 (uatm)'
p.xaxis.axis_label_text_font_size = "12pt"
p.xaxis.major_label_text_font_size = "12pt"
p.x_range = Range1d(start=200, end=800)
p.yaxis.axis_label = 'Temperature [degC]'
p.yaxis.axis_label_text_font_size = "12pt"
p.yaxis.major_label_text_font_size = "12pt"
p.y_range = Range1d(start=6, end=18)
p.circle(correct.pco2_seawater.values, correct.pco2w_thermistor_temperature.values, size=10, color=colors[1], legend='Development Server')
p.circle(discrete.iloc[:, 9].values, discrete['SampleT'].values, size=10, color=colors[2], legend='Discrete Samples')
p.toolbar_location = 'right'
show(p)