Notebook

In [1]:

import re
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import netCDF4 as nc
import seaborn as sns
import xarray as xr
from salishsea_tools import geo_tools, geo_tools, tidetools
import functools
from IPython.display import clear_output
import datetime


%matplotlib inline
plt.rcParams['image.cmap'] = 'jet'

In [28]:

station_lon_lat = pd.read_pickle("/ocean/jpetrie/MEOPAR/analysis-james/station_location.pkl")

In [3]:

# Grab plankton data from excel file
plank_df_list = []
for i in range(1,4):
    df = pd.read_excel('/ocean/shared/SoG/PHYTO/Profiles_by_station.xls',i)
    top_index = pd.Index(df["Vol"]).get_loc("Species")
    bottom_index = pd.Index(df["Vol"]).get_loc("Carbon (ng/l)")
    df = pd.concat([df.iloc[:top_index], df.iloc[(bottom_index + 1):]])
    df = df.transpose()
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    df.reset_index(inplace = True)
    plank_df_list.append(df)

In [4]:

# convert plankton dataframe data types

all_plank = pd.concat(plank_df_list,  axis=0, ignore_index=True)

all_plank.loc[all_plank["Date"] == "14/15-Apr-04", "Date"] = "2004-04-14 00:00:00"

all_plank["STATION"] = "S" + all_plank["Site"].astype(str).str.strip()
all_plank["DATE"] = pd.to_datetime(all_plank["Date"], format='%Y-%m-%d', errors = "coerce")
all_plank["DATE_STR"] = [d.strftime('%Y-%m-%d') if not pd.isnull(d) else np.nan for d in all_plank['DATE']] 
all_plank["MONTH"] = all_plank["DATE"].apply(lambda x: x.month)
all_plank["DAY_OF_MONTH"] = all_plank["DATE"].apply(lambda x: x.day)
all_plank["DEPTH"] = pd.to_numeric(all_plank["Depth (m)"], errors = "coerce")
all_plank["DIATOMS(ngC/L)"] = pd.to_numeric(all_plank["Total diatoms"], errors = "coerce")

# Convert from nanogram carbon per litre to millimol Nitrogen per metre^3
# 10^-9 for nanogram Carbon -> gram Carbon
# 0.083259 for gram carbon -> mol Carbon
# 16/106 for mol carbon -> mol nitrogen
# 10^3 for mol nitrogen -> mmol nitrogen
# 1/(1/10^3) for 1/L -> 1/m^3
all_plank["DIATOMS(mmolN/m3)"] = all_plank["DIATOMS(ngC/L)"]*(10**-9)*(0.083259)*(16.0/106)*(10**3)*(10**3)

In [12]:

# Choose which values to add to nowcast dataframe

tracers = ["PHY2", "PHY", "MICZ", "MYRI"]

plot_months = ["mar", "apr"]

plot_hours = np.array([0, 12, 18])

max_depth = 20
result_depths = xr.open_dataset('/data/nsoontie/MEOPAR/NEMO-forcing/grid/deptht_428m.nc').deptht.values
depth_indices = np.where(result_depths < max_depth)[0]

model_points = station_lon_lat["MODEL_POINT"]
model_js = [x[0] for x in model_points] 
model_is = [x[1] for x in model_points] 

stations = station_lon_lat["STATION"]

In [13]:

# Iterate through nowcast green results, grabbing certain tracers, locations, and dates/times
# Create pandas dataframe and save result

load_new_dataset = False

if load_new_dataset:
    nowcast_dir = "/results/SalishSea/nowcast-green/" #"/data/jpetrie/MEOPAR/SalishSea/results/nowcast_results/"

    month_num = {"jan": "01","feb": "02", "mar": "03", "apr": "04", "may": "05", "jun": "06", "jul": "07", "aug": "08", "sep": "09", "oct": "10", "nov": "11", "dec": "12" }

    mixed_format_dates = os.listdir(nowcast_dir)
    number_format_dates = ["20" + x[5:7] + month_num[x[2:5]] + x[0:2] for x in mixed_format_dates]
    sorted_dirs = [mixed_format_date for (number_format_date, mixed_format_date) in sorted(zip(number_format_dates,mixed_format_dates))]
    
    
    dataframe_list = []
    num_files = 0
    start_time = datetime.datetime.now()
    for subdir in sorted_dirs:
        if os.path.isdir(nowcast_dir + '/' + subdir) and re.match("[0-9]{2}[a-z]{3}[0-9]{2}", subdir):
            month_str = subdir[2:5]
            date_str = "20" + subdir[5:7] + month_num[month_str] + subdir[0:2]
            tracer_file = "SalishSea_1h_" + date_str + "_" + date_str + "_ptrc_T.nc"
            tracer_path = nowcast_dir + "/" + subdir + "/" + tracer_file
            if os.path.isfile(tracer_path) and month_str in plot_months:
                grid_t = xr.open_dataset(tracer_path)
                result_hours = pd.DatetimeIndex(grid_t.time_centered.values).hour
                time_indices = np.where([(x in plot_hours) for x in result_hours])
                
                J, T, Z = np.meshgrid(model_js,time_indices,depth_indices, indexing = 'ij')
                I, T, Z = np.meshgrid(model_is,time_indices,depth_indices, indexing = 'ij')
                
                tracer_dataframes = []
                for t in tracers:
                    station_slice = grid_t[t].values[T,Z,J,I]
                    slice_xarray = xr.DataArray(station_slice,
                                     [stations,result_hours[time_indices], result_depths[depth_indices]],
                                     ["STATION", "HOUR", "DEPTH"], 
                                     t)
                    slice_dataframe = slice_xarray.to_dataframe()
                    slice_dataframe.reset_index(inplace = True)
                    tracer_dataframes.append(slice_dataframe)
                merged_tracers = functools.reduce(lambda left,right: pd.merge(left,right,on=["STATION", "HOUR", "DEPTH"]), tracer_dataframes)
                merged_tracers["DATE"] =  pd.to_datetime(date_str, infer_datetime_format=True)
                merged_tracers["MONTH"] = int(month_num[month_str])
                dataframe_list.append(merged_tracers)

                num_files = num_files + 1
                run_time = datetime.datetime.now() - start_time
                clear_output()
                print("Files loaded:" + str(num_files))
                print("Date of most recent nowcast load: " + date_str)
                print("Time loading: ")
                print(run_time)
                print("\n\n\n")
                print(merged_tracers)

    nowcast_df = pd.concat(dataframe_list)    
    t = datetime.datetime.now()
    time_string = str(t.year) +"_"+ str(t.month) +"_"+ str(t.day) +"_"+ str(t.hour) +"_"+ str(t.minute)
    file_path = "/ocean/jpetrie/MEOPAR/analysis-james/nowcast_green_subset/"+ time_string + ".pkl"
    nowcast_df.to_pickle(file_path)
    clear_output()
    print("Files loaded:" + str(num_files))
    print("Done, dataframe saved to: " + file_path)
else: 
    past_dataset_path = "/ocean/jpetrie/MEOPAR/analysis-james/nowcast_green_subset/2016_8_3_16_58.pkl"
    nowcast_df = pd.read_pickle(past_dataset_path)

Files loaded:61
Date of most recent nowcast load: 20160430
Time loading: 
0:20:05.602110




     STATION  HOUR      DEPTH      PHY2       PHY      MICZ      MYRI  \
0       S2-3     0   0.500000  0.858978  0.579502  2.175034  1.048522   
1       S2-3     0   1.500003  0.858018  0.578866  2.173194  1.047739   
2       S2-3     0   2.500011  0.857669  0.578933  2.173153  1.047796   
3       S2-3     0   3.500031  0.857058  0.579023  2.173097  1.047871   
4       S2-3     0   4.500071  0.856111  0.579128  2.173011  1.047967   
5       S2-3     0   5.500151  0.854586  0.579253  2.172877  1.048101   
6       S2-3     0   6.500310  0.851331  0.579432  2.172599  1.048349   
7       S2-3     0   7.500623  0.843373  0.579738  2.171753  1.048893   
8       S2-3     0   8.501236  0.837202  0.579989  2.170861  1.049341   
9       S2-3     0   9.502433  0.818826  0.580670  2.169164  1.050585   
10      S2-3     0  10.504766  0.796035  0.583241  2.171999  1.053328   
11      S2-3     0  11.509312  0.793402  0.583451  2.170536  1.053733   
12      S2-3     0  12.518167  0.660621  0.594791  2.198746  1.077647   
13      S2-3     0  13.535412  0.514204  0.637423  2.613321  1.111456   
14      S2-3     0  14.568982  1.563274  0.659492  2.960599  0.963000   
15      S2-3     0  15.634288  2.546103  0.654407  2.994946  0.732997   
16      S2-3     0  16.761173  2.464055  0.606696  2.845185  0.579576   
17      S2-3     0  18.007135  1.779486  0.551034  2.513018  0.466787   
18      S2-3     0  19.481785  1.096923  0.507210  2.228270  0.356358   
19      S2-3    12   0.500000  0.863263  0.508922  2.282489  0.946986   
20      S2-3    12   1.500003  0.861421  0.507752  2.278562  0.945231   
21      S2-3    12   2.500011  0.861118  0.507843  2.278815  0.945381   
22      S2-3    12   3.500031  0.860060  0.507900  2.278852  0.945483   
23      S2-3    12   4.500071  0.857693  0.507916  2.278610  0.945527   
24      S2-3    12   5.500151  0.848922  0.507817  2.277508  0.945437   
25      S2-3    12   6.500310  0.833308  0.507590  2.275604  0.945123   
26      S2-3    12   7.500623  0.827363  0.507510  2.274766  0.944996   
27      S2-3    12   8.501236  0.804738  0.507273  2.272167  0.943982   
28      S2-3    12   9.502433  0.766395  0.506991  2.268008  0.939979   
29      S2-3    12  10.504766  0.776628  0.509224  2.281454  0.922135   
...      ...   ...        ...       ...       ...       ...       ...   
1851    S6-2    12   8.501236  5.797703  0.738893  3.335549  0.741001   
1852    S6-2    12   9.502433  5.725281  0.702544  3.099444  0.615971   
1853    S6-2    12  10.504766  5.041942  0.643520  2.773491  0.499204   
1854    S6-2    12  11.509312  4.201956  0.596387  2.491383  0.424281   
1855    S6-2    12  12.518167  3.339507  0.560245  2.261248  0.376743   
1856    S6-2    12  13.535412  2.565227  0.513280  1.958782  0.292647   
1857    S6-2    12  14.568982  1.876678  0.477837  1.740677  0.247074   
1858    S6-2    12  15.634288  1.411284  0.449378  1.555480  0.206170   
1859    S6-2    12  16.761173  0.976741  0.416372  1.363059  0.166047   
1860    S6-2    12  18.007135  0.793434  0.393321  1.258644  0.146129   
1861    S6-2    12  19.481785  0.662983  0.370913  1.176061  0.129174   
1862    S6-2    18   0.500000  1.733943  0.624194  2.951987  1.002436   
1863    S6-2    18   1.500003  1.748014  0.626859  2.958006  1.004712   
1864    S6-2    18   2.500011  1.778811  0.630730  2.969169  1.006580   
1865    S6-2    18   3.500031  1.966559  0.657020  3.022567  1.019426   
1866    S6-2    18   4.500071  2.154295  0.668221  3.029219  1.016018   
1867    S6-2    18   5.500151  2.476735  0.682425  3.080170  1.009997   
1868    S6-2    18   6.500310  2.976312  0.701726  3.203726  0.985523   
1869    S6-2    18   7.500623  4.012282  0.729258  3.275351  0.903593   
1870    S6-2    18   8.501236  5.182644  0.749426  3.263317  0.808376   
1871    S6-2    18   9.502433  5.614570  0.762792  3.325359  0.796483   
1872    S6-2    18  10.504766  6.028626  0.759305  3.255300  0.730866   
1873    S6-2    18  11.509312  5.367168  0.688416  2.818776  0.565256   
1874    S6-2    18  12.518167  4.127279  0.603716  2.337869  0.410067   
1875    S6-2    18  13.535412  2.963651  0.539863  1.967383  0.304817   
1876    S6-2    18  14.568982  2.121258  0.498878  1.724933  0.244989   
1877    S6-2    18  15.634288  1.806220  0.485268  1.638287  0.225276   
1878    S6-2    18  16.761173  1.279961  0.458819  1.462881  0.189314   
1879    S6-2    18  18.007135  0.868384  0.414655  1.289099  0.152190   
1880    S6-2    18  19.481785  0.579949  0.344307  1.117548  0.115862   

           DATE  MONTH  
0    2016-04-30      4  
1    2016-04-30      4  
2    2016-04-30      4  
3    2016-04-30      4  
4    2016-04-30      4  
5    2016-04-30      4  
6    2016-04-30      4  
7    2016-04-30      4  
8    2016-04-30      4  
9    2016-04-30      4  
10   2016-04-30      4  
11   2016-04-30      4  
12   2016-04-30      4  
13   2016-04-30      4  
14   2016-04-30      4  
15   2016-04-30      4  
16   2016-04-30      4  
17   2016-04-30      4  
18   2016-04-30      4  
19   2016-04-30      4  
20   2016-04-30      4  
21   2016-04-30      4  
22   2016-04-30      4  
23   2016-04-30      4  
24   2016-04-30      4  
25   2016-04-30      4  
26   2016-04-30      4  
27   2016-04-30      4  
28   2016-04-30      4  
29   2016-04-30      4  
...         ...    ...  
1851 2016-04-30      4  
1852 2016-04-30      4  
1853 2016-04-30      4  
1854 2016-04-30      4  
1855 2016-04-30      4  
1856 2016-04-30      4  
1857 2016-04-30      4  
1858 2016-04-30      4  
1859 2016-04-30      4  
1860 2016-04-30      4  
1861 2016-04-30      4  
1862 2016-04-30      4  
1863 2016-04-30      4  
1864 2016-04-30      4  
1865 2016-04-30      4  
1866 2016-04-30      4  
1867 2016-04-30      4  
1868 2016-04-30      4  
1869 2016-04-30      4  
1870 2016-04-30      4  
1871 2016-04-30      4  
1872 2016-04-30      4  
1873 2016-04-30      4  
1874 2016-04-30      4  
1875 2016-04-30      4  
1876 2016-04-30      4  
1877 2016-04-30      4  
1878 2016-04-30      4  
1879 2016-04-30      4  
1880 2016-04-30      4  

[1881 rows x 9 columns]

In [15]:

nowcast_df["DAY_OF_MONTH"] = nowcast_df["DATE"].apply(lambda x: x.day)

In [18]:

diatom_df = all_plank[["DATE", "DEPTH", "STATION", "MONTH", "DAY_OF_MONTH", "DIATOMS(mmolN/m3)"]]
diatom_df["DATA_TYPE"] = "Measured Diatoms"

def dateToString(date):
    try:
        date_string = date.strftime('%Y-%m-%d')
    except:
        date_string = "NULL"
    return(date_string)

nowcast_df["DATA_TYPE"] = "Simulated PHY2"
nowcast_df["DIATOMS(mmolN/m3)"] = nowcast_df["PHY2"]
combined = pd.concat([diatom_df, nowcast_df.query("HOUR == 12")], ignore_index=True)
combined["IDENTIFIER"] = combined["DATA_TYPE"] + ", Date = " + combined["DATE"].apply(dateToString)
#combined = combined.sort_values(["DATA_TYPE", "DAY_OF_MONTH", "IDENTIFIER"])

/home/jpetrie/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app

In [66]:

# S3 is the only station with multiple depth measurements on the same day for total diatoms
# March and April are the only months with significant diatom population measured at S3

combined_subset = combined.query("STATION == 'S3' & ((MONTH == 4) | (MONTH == 3) ) & DEPTH <= 20 & (DATA_TYPE == 'Measured Diatoms' | DAY_OF_MONTH%8 == 1)")

combined_subset = combined_subset.sort_values(["DEPTH","IDENTIFIER"])

sns.set(font_scale = 2)

fg = sns.FacetGrid(data = combined_subset, hue = "IDENTIFIER" , row = "MONTH" ,size =15)
fg.map(plt.plot, "DIATOMS(mmolN/m3)", "DEPTH").add_legend()
plt.gca().invert_yaxis()