import re
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import netCDF4 as nc
import seaborn as sns
import xarray as xr
from salishsea_tools import geo_tools, geo_tools, tidetools
import functools
from IPython.display import clear_output
import datetime
%matplotlib inline
plt.rcParams['image.cmap'] = 'jet'
station_lon_lat = pd.read_pickle("/ocean/jpetrie/MEOPAR/analysis-james/station_location.pkl")
# Grab plankton data from excel file
plank_df_list = []
for i in range(1,4):
df = pd.read_excel('/ocean/shared/SoG/PHYTO/Profiles_by_station.xls',i)
top_index = pd.Index(df["Vol"]).get_loc("Species")
bottom_index = pd.Index(df["Vol"]).get_loc("Carbon (ng/l)")
df = pd.concat([df.iloc[:top_index], df.iloc[(bottom_index + 1):]])
df = df.transpose()
df.columns = df.iloc[0]
df.drop(df.index[0], inplace=True)
df.reset_index(inplace = True)
plank_df_list.append(df)
# convert plankton dataframe data types
all_plank = pd.concat(plank_df_list, axis=0, ignore_index=True)
all_plank.loc[all_plank["Date"] == "14/15-Apr-04", "Date"] = "2004-04-14 00:00:00"
all_plank["STATION"] = "S" + all_plank["Site"].astype(str).str.strip()
all_plank["DATE"] = pd.to_datetime(all_plank["Date"], format='%Y-%m-%d', errors = "coerce")
all_plank["DATE_STR"] = [d.strftime('%Y-%m-%d') if not pd.isnull(d) else np.nan for d in all_plank['DATE']]
all_plank["MONTH"] = all_plank["DATE"].apply(lambda x: x.month)
all_plank["DAY_OF_MONTH"] = all_plank["DATE"].apply(lambda x: x.day)
all_plank["DEPTH"] = pd.to_numeric(all_plank["Depth (m)"], errors = "coerce")
all_plank["DIATOMS(ngC/L)"] = pd.to_numeric(all_plank["Total diatoms"], errors = "coerce")
# Convert from nanogram carbon per litre to millimol Nitrogen per metre^3
# 10^-9 for nanogram Carbon -> gram Carbon
# 0.083259 for gram carbon -> mol Carbon
# 16/106 for mol carbon -> mol nitrogen
# 10^3 for mol nitrogen -> mmol nitrogen
# 1/(1/10^3) for 1/L -> 1/m^3
all_plank["DIATOMS(mmolN/m3)"] = all_plank["DIATOMS(ngC/L)"]*(10**-9)*(0.083259)*(16.0/106)*(10**3)*(10**3)
# Choose which values to add to nowcast dataframe
tracers = ["PHY2", "PHY", "MICZ", "MYRI"]
plot_months = ["mar", "apr"]
plot_hours = np.array([0, 12, 18])
max_depth = 20
result_depths = xr.open_dataset('/data/nsoontie/MEOPAR/NEMO-forcing/grid/deptht_428m.nc').deptht.values
depth_indices = np.where(result_depths < max_depth)[0]
model_points = station_lon_lat["MODEL_POINT"]
model_js = [x[0] for x in model_points]
model_is = [x[1] for x in model_points]
stations = station_lon_lat["STATION"]
# Iterate through nowcast green results, grabbing certain tracers, locations, and dates/times
# Create pandas dataframe and save result
load_new_dataset = False
if load_new_dataset:
nowcast_dir = "/results/SalishSea/nowcast-green/" #"/data/jpetrie/MEOPAR/SalishSea/results/nowcast_results/"
month_num = {"jan": "01","feb": "02", "mar": "03", "apr": "04", "may": "05", "jun": "06", "jul": "07", "aug": "08", "sep": "09", "oct": "10", "nov": "11", "dec": "12" }
mixed_format_dates = os.listdir(nowcast_dir)
number_format_dates = ["20" + x[5:7] + month_num[x[2:5]] + x[0:2] for x in mixed_format_dates]
sorted_dirs = [mixed_format_date for (number_format_date, mixed_format_date) in sorted(zip(number_format_dates,mixed_format_dates))]
dataframe_list = []
num_files = 0
start_time = datetime.datetime.now()
for subdir in sorted_dirs:
if os.path.isdir(nowcast_dir + '/' + subdir) and re.match("[0-9]{2}[a-z]{3}[0-9]{2}", subdir):
month_str = subdir[2:5]
date_str = "20" + subdir[5:7] + month_num[month_str] + subdir[0:2]
tracer_file = "SalishSea_1h_" + date_str + "_" + date_str + "_ptrc_T.nc"
tracer_path = nowcast_dir + "/" + subdir + "/" + tracer_file
if os.path.isfile(tracer_path) and month_str in plot_months:
grid_t = xr.open_dataset(tracer_path)
result_hours = pd.DatetimeIndex(grid_t.time_centered.values).hour
time_indices = np.where([(x in plot_hours) for x in result_hours])
J, T, Z = np.meshgrid(model_js,time_indices,depth_indices, indexing = 'ij')
I, T, Z = np.meshgrid(model_is,time_indices,depth_indices, indexing = 'ij')
tracer_dataframes = []
for t in tracers:
station_slice = grid_t[t].values[T,Z,J,I]
slice_xarray = xr.DataArray(station_slice,
[stations,result_hours[time_indices], result_depths[depth_indices]],
["STATION", "HOUR", "DEPTH"],
t)
slice_dataframe = slice_xarray.to_dataframe()
slice_dataframe.reset_index(inplace = True)
tracer_dataframes.append(slice_dataframe)
merged_tracers = functools.reduce(lambda left,right: pd.merge(left,right,on=["STATION", "HOUR", "DEPTH"]), tracer_dataframes)
merged_tracers["DATE"] = pd.to_datetime(date_str, infer_datetime_format=True)
merged_tracers["MONTH"] = int(month_num[month_str])
dataframe_list.append(merged_tracers)
num_files = num_files + 1
run_time = datetime.datetime.now() - start_time
clear_output()
print("Files loaded:" + str(num_files))
print("Date of most recent nowcast load: " + date_str)
print("Time loading: ")
print(run_time)
print("\n\n\n")
print(merged_tracers)
nowcast_df = pd.concat(dataframe_list)
t = datetime.datetime.now()
time_string = str(t.year) +"_"+ str(t.month) +"_"+ str(t.day) +"_"+ str(t.hour) +"_"+ str(t.minute)
file_path = "/ocean/jpetrie/MEOPAR/analysis-james/nowcast_green_subset/"+ time_string + ".pkl"
nowcast_df.to_pickle(file_path)
clear_output()
print("Files loaded:" + str(num_files))
print("Done, dataframe saved to: " + file_path)
else:
past_dataset_path = "/ocean/jpetrie/MEOPAR/analysis-james/nowcast_green_subset/2016_8_3_16_58.pkl"
nowcast_df = pd.read_pickle(past_dataset_path)
Files loaded:61 Date of most recent nowcast load: 20160430 Time loading: 0:20:05.602110 STATION HOUR DEPTH PHY2 PHY MICZ MYRI \ 0 S2-3 0 0.500000 0.858978 0.579502 2.175034 1.048522 1 S2-3 0 1.500003 0.858018 0.578866 2.173194 1.047739 2 S2-3 0 2.500011 0.857669 0.578933 2.173153 1.047796 3 S2-3 0 3.500031 0.857058 0.579023 2.173097 1.047871 4 S2-3 0 4.500071 0.856111 0.579128 2.173011 1.047967 5 S2-3 0 5.500151 0.854586 0.579253 2.172877 1.048101 6 S2-3 0 6.500310 0.851331 0.579432 2.172599 1.048349 7 S2-3 0 7.500623 0.843373 0.579738 2.171753 1.048893 8 S2-3 0 8.501236 0.837202 0.579989 2.170861 1.049341 9 S2-3 0 9.502433 0.818826 0.580670 2.169164 1.050585 10 S2-3 0 10.504766 0.796035 0.583241 2.171999 1.053328 11 S2-3 0 11.509312 0.793402 0.583451 2.170536 1.053733 12 S2-3 0 12.518167 0.660621 0.594791 2.198746 1.077647 13 S2-3 0 13.535412 0.514204 0.637423 2.613321 1.111456 14 S2-3 0 14.568982 1.563274 0.659492 2.960599 0.963000 15 S2-3 0 15.634288 2.546103 0.654407 2.994946 0.732997 16 S2-3 0 16.761173 2.464055 0.606696 2.845185 0.579576 17 S2-3 0 18.007135 1.779486 0.551034 2.513018 0.466787 18 S2-3 0 19.481785 1.096923 0.507210 2.228270 0.356358 19 S2-3 12 0.500000 0.863263 0.508922 2.282489 0.946986 20 S2-3 12 1.500003 0.861421 0.507752 2.278562 0.945231 21 S2-3 12 2.500011 0.861118 0.507843 2.278815 0.945381 22 S2-3 12 3.500031 0.860060 0.507900 2.278852 0.945483 23 S2-3 12 4.500071 0.857693 0.507916 2.278610 0.945527 24 S2-3 12 5.500151 0.848922 0.507817 2.277508 0.945437 25 S2-3 12 6.500310 0.833308 0.507590 2.275604 0.945123 26 S2-3 12 7.500623 0.827363 0.507510 2.274766 0.944996 27 S2-3 12 8.501236 0.804738 0.507273 2.272167 0.943982 28 S2-3 12 9.502433 0.766395 0.506991 2.268008 0.939979 29 S2-3 12 10.504766 0.776628 0.509224 2.281454 0.922135 ... ... ... ... ... ... ... ... 1851 S6-2 12 8.501236 5.797703 0.738893 3.335549 0.741001 1852 S6-2 12 9.502433 5.725281 0.702544 3.099444 0.615971 1853 S6-2 12 10.504766 5.041942 0.643520 2.773491 0.499204 1854 S6-2 12 11.509312 4.201956 0.596387 2.491383 0.424281 1855 S6-2 12 12.518167 3.339507 0.560245 2.261248 0.376743 1856 S6-2 12 13.535412 2.565227 0.513280 1.958782 0.292647 1857 S6-2 12 14.568982 1.876678 0.477837 1.740677 0.247074 1858 S6-2 12 15.634288 1.411284 0.449378 1.555480 0.206170 1859 S6-2 12 16.761173 0.976741 0.416372 1.363059 0.166047 1860 S6-2 12 18.007135 0.793434 0.393321 1.258644 0.146129 1861 S6-2 12 19.481785 0.662983 0.370913 1.176061 0.129174 1862 S6-2 18 0.500000 1.733943 0.624194 2.951987 1.002436 1863 S6-2 18 1.500003 1.748014 0.626859 2.958006 1.004712 1864 S6-2 18 2.500011 1.778811 0.630730 2.969169 1.006580 1865 S6-2 18 3.500031 1.966559 0.657020 3.022567 1.019426 1866 S6-2 18 4.500071 2.154295 0.668221 3.029219 1.016018 1867 S6-2 18 5.500151 2.476735 0.682425 3.080170 1.009997 1868 S6-2 18 6.500310 2.976312 0.701726 3.203726 0.985523 1869 S6-2 18 7.500623 4.012282 0.729258 3.275351 0.903593 1870 S6-2 18 8.501236 5.182644 0.749426 3.263317 0.808376 1871 S6-2 18 9.502433 5.614570 0.762792 3.325359 0.796483 1872 S6-2 18 10.504766 6.028626 0.759305 3.255300 0.730866 1873 S6-2 18 11.509312 5.367168 0.688416 2.818776 0.565256 1874 S6-2 18 12.518167 4.127279 0.603716 2.337869 0.410067 1875 S6-2 18 13.535412 2.963651 0.539863 1.967383 0.304817 1876 S6-2 18 14.568982 2.121258 0.498878 1.724933 0.244989 1877 S6-2 18 15.634288 1.806220 0.485268 1.638287 0.225276 1878 S6-2 18 16.761173 1.279961 0.458819 1.462881 0.189314 1879 S6-2 18 18.007135 0.868384 0.414655 1.289099 0.152190 1880 S6-2 18 19.481785 0.579949 0.344307 1.117548 0.115862 DATE MONTH 0 2016-04-30 4 1 2016-04-30 4 2 2016-04-30 4 3 2016-04-30 4 4 2016-04-30 4 5 2016-04-30 4 6 2016-04-30 4 7 2016-04-30 4 8 2016-04-30 4 9 2016-04-30 4 10 2016-04-30 4 11 2016-04-30 4 12 2016-04-30 4 13 2016-04-30 4 14 2016-04-30 4 15 2016-04-30 4 16 2016-04-30 4 17 2016-04-30 4 18 2016-04-30 4 19 2016-04-30 4 20 2016-04-30 4 21 2016-04-30 4 22 2016-04-30 4 23 2016-04-30 4 24 2016-04-30 4 25 2016-04-30 4 26 2016-04-30 4 27 2016-04-30 4 28 2016-04-30 4 29 2016-04-30 4 ... ... ... 1851 2016-04-30 4 1852 2016-04-30 4 1853 2016-04-30 4 1854 2016-04-30 4 1855 2016-04-30 4 1856 2016-04-30 4 1857 2016-04-30 4 1858 2016-04-30 4 1859 2016-04-30 4 1860 2016-04-30 4 1861 2016-04-30 4 1862 2016-04-30 4 1863 2016-04-30 4 1864 2016-04-30 4 1865 2016-04-30 4 1866 2016-04-30 4 1867 2016-04-30 4 1868 2016-04-30 4 1869 2016-04-30 4 1870 2016-04-30 4 1871 2016-04-30 4 1872 2016-04-30 4 1873 2016-04-30 4 1874 2016-04-30 4 1875 2016-04-30 4 1876 2016-04-30 4 1877 2016-04-30 4 1878 2016-04-30 4 1879 2016-04-30 4 1880 2016-04-30 4 [1881 rows x 9 columns]
nowcast_df["DAY_OF_MONTH"] = nowcast_df["DATE"].apply(lambda x: x.day)
diatom_df = all_plank[["DATE", "DEPTH", "STATION", "MONTH", "DAY_OF_MONTH", "DIATOMS(mmolN/m3)"]]
diatom_df["DATA_TYPE"] = "Measured Diatoms"
def dateToString(date):
try:
date_string = date.strftime('%Y-%m-%d')
except:
date_string = "NULL"
return(date_string)
nowcast_df["DATA_TYPE"] = "Simulated PHY2"
nowcast_df["DIATOMS(mmolN/m3)"] = nowcast_df["PHY2"]
combined = pd.concat([diatom_df, nowcast_df.query("HOUR == 12")], ignore_index=True)
combined["IDENTIFIER"] = combined["DATA_TYPE"] + ", Date = " + combined["DATE"].apply(dateToString)
#combined = combined.sort_values(["DATA_TYPE", "DAY_OF_MONTH", "IDENTIFIER"])
/home/jpetrie/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy from ipykernel import kernelapp as app
# S3 is the only station with multiple depth measurements on the same day for total diatoms
# March and April are the only months with significant diatom population measured at S3
combined_subset = combined.query("STATION == 'S3' & ((MONTH == 4) | (MONTH == 3) ) & DEPTH <= 20 & (DATA_TYPE == 'Measured Diatoms' | DAY_OF_MONTH%8 == 1)")
combined_subset = combined_subset.sort_values(["DEPTH","IDENTIFIER"])
sns.set(font_scale = 2)
fg = sns.FacetGrid(data = combined_subset, hue = "IDENTIFIER" , row = "MONTH" ,size =15)
fg.map(plt.plot, "DIATOMS(mmolN/m3)", "DEPTH").add_legend()
plt.gca().invert_yaxis()