#!/usr/bin/env python
# coding: utf-8

# ### Create Timeline Plot of Deployments in the Coordinated Canyon Experiment
# *Use the events measured by the BED Platforms to set Event start and end times for the loading of ADCP data into STOQS*
# 

# Executing this Notebook requires a personal STOQS server. Follow the [steps to build your own development or produciton system](https://github.com/stoqs/stoqs/blob/master/README.md).  The Docker instructions below will give you your own copy of the database and is the preferred method.
# 
# #### Vagrant Instructions
# Once your server is up log into it (after a `cd ~/Vagrants/stoqsvm`) and activate your virtual environment with the usual commands:
# 
#     vagrant ssh -- -X
#     export STOQS_HOME=/vagrant/dev/stoqsgit  # Use STOQS_HOME=/home/vagrant/dev/stoqsgit if not using NFS mount
#     cd $STOQS_HOME && source venv-stoqs/bin/activate
#     
# Connect to your Institution's STOQS database server using read-only credentials. (Note: firewalls typically limit unprivileged access to such resources.)
# 
#     cd stoqs
#     ln -s mbari_campaigns.py campaigns.py
#     export DATABASE_URL=postgis://everyone:guest@kraken.shore.mbari.org:5432/stoqs
#     
# Launch Jupyter Notebook on your system with:
# 
#     cd contrib/notebooks
#     ../../manage.py shell_plus --notebook
#     
# #### Docker Instructions
# With your stoqs server running in Docker as 
# [detailed in the README](https://github.com/stoqs/stoqs#production-deployment-with-docker) load a copy of the cce_2015 database (from your `$STOQS_HOME/docker` directory) :
# 
#     docker-compose exec stoqs createdb -U postgres stoqs_cce2015
#     curl -k https://stoqs.shore.mbari.org/media/pg_dumps/stoqs_cce2015.pg_dump | \
#         docker exec -i stoqs pg_restore -Fc -U postgres -d stoqs_cce2015
#         
# This may take 30 minutes or more to complete - wait for the command prompt. Then launch the Jupyter Notebook server:
# 
#     docker-compose exec stoqs stoqs/manage.py shell_plus --notebook
# 
# A message is displayed giving a token for you to use in a browser on your host, e.g.:
# 
#     http://localhost:8888/?token=<use_the_given_one-time_token>
# 
# In the browser window navigate to this file (`stoqs/contrib/notebooks/timeline_plot_cce2015.ipynb`) and open it. You will then be able to execute the cells and experiment with this notebook.
# 
# ---

# For the CCE campaign add up the hours and collect in a dictionary all the platform start and end times using Matplotlib mdate format and plot them:

# In[1]:


from collections import defaultdict
from datetime import timedelta
import matplotlib.dates as mdates

db = 'stoqs_cce2015'

plat_start_ends = defaultdict(list)
plat_depl_dur = {}
print('Platform Name     # Deployments   Hours')
print('-------------     -------------   -----')
for plat in Platform.objects.using(db).all().order_by('name'):
    time_diff_sum = timedelta(0)
    for act in Activity.objects.using(db).filter(platform=plat):
        time_diff = act.enddate - act.startdate
        time_diff_sum += time_diff
        plat_start_ends[plat].append((mdates.date2num(act.startdate), 
                                      mdates.date2num(act.enddate)))

    plat_depl_dur[plat] = (len(plat_start_ends[plat]), 
                           time_diff_sum.total_seconds() / 3600)
    print(f'{plat.name:20s} {plat_depl_dur[plat][0]:10d} {plat_depl_dur[plat][1]:7.1f}')


# In[2]:


get_ipython().run_line_magic('matplotlib', 'inline')

import matplotlib.pyplot as plt
from matplotlib import pylab
from numpy import arange
import operator

def plot_platforms(ax):
    plat_labels = []

    # Plot in order by platformtype name and platform name
    for ypos, plat in enumerate(
                        sorted(plat_start_ends.keys(),
                               key=operator.attrgetter('platformtype.name', 'name'))):
        plat_labels.append(f'{plat.name} ({plat.platformtype.name})')    
        for bdate, edate in plat_start_ends[plat]:
            dd = edate - bdate
            if dd < 1:
                dd = 1
            ax.barh(ypos+0.5, dd, left=bdate, height=0.8, 
                    align='center', color='#' + plat.color, alpha=1.0) 

    ax.set_title(Campaign.objects.using(db).get(id=1).description)
    ax.set_ylim(-0.5, len(plat_labels) + 0.5)
    ax.set_yticks(arange(len(plat_labels)) + 0.5)
    ax.set_yticklabels(plat_labels)

    ax.grid(True)
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%B %Y'))
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
    plt.gca().xaxis.set_minor_locator(mdates.DayLocator())
    plt.gcf().autofmt_xdate()

pylab.rcParams['figure.figsize'] = (15, 9)
fig, ax = plt.subplots()
plot_platforms(ax)
plt.show()


# There appear to be 10 events measured by the Benthic Event Detectors. Let's find the start times for these events and use _k_-means clustering to group the BEDs event data start times into 10 clusters.

# In[3]:


import numpy as np
from sklearn.cluster import KMeans
bed_starts = np.array(Activity.objects.using(db)
                              .filter(platform__name__contains='BED')
                              .values_list('startdate', flat=True)
                              .order_by('startdate'), dtype=np.datetime64)
km = KMeans(n_clusters=10).fit(bed_starts.reshape(-1, 1))


# Pick the earliest event start time and construct start and end times that we'll use to instruct the STOQS loader that these are the times when we want to load ADCP data from all the moorings into the database.

# In[4]:


events = {}
for bed_start in bed_starts:
    label = km.predict(bed_start.reshape(-1, 1))[0]
    if label not in events.keys():
        events[label] = bed_start
    # Print the clusters of start times and tune n_clusters above to get the optimal set
    ##print(bed_start, label)


# Print `Event()` instances of begining and end times for use in [loadCCE_2015.py](https://github.com/stoqs/stoqs/blob/3a596e6791104054c676a0ba84e81ec02b7ca06b/stoqs/loaders/CCE/loadCCE_2015.py#L23-L32)

# In[5]:


from datetime import datetime, timedelta
event_start_ends = defaultdict(list)
def print_Events(events, before, after, type):
    for start in events.values():
        beg_dt = repr(start.astype(datetime) - before).replace('datetime.', '')
        end_dt = repr(start.astype(datetime) + after).replace('datetime.', '')
        event_start_ends[type].append((mdates.date2num(start.astype(datetime) - before),
                                       mdates.date2num(start.astype(datetime) + after)))
        print(f"        Event({beg_dt}, {end_dt}),")

# Low-resolution region: 1 day before to 2 days after the start of each event
before = timedelta(days=1)
after = timedelta(days=2)
print("lores_event_times = [")
print_Events(events, before, after, 'lores')
print("                    ]")

# High-resolution region: 4 hours before to 14 hours after the start of each event
before = timedelta(hours=4)
after = timedelta(hours=14)
print("hires_event_times = [")
print_Events(events, before, after, 'hires')
print("                    ]")


# Plot timeline again, but this time with events as shaded regions across all the Platforms.

# In[6]:


def plot_events(ax):
    for type in ('lores', 'hires'):
        for bdate, edate in event_start_ends[type]:
            dd = edate - bdate
            if dd < 1:
                dd = 1
            # Plot discovered events as gray lines across all platforms
            ax.barh(0, dd, left=bdate, height=32, 
                    align='center', color='#000000', alpha=0.1) 

pylab.rcParams['figure.figsize'] = (15, 9)
fig, ax2 = plt.subplots()
plot_platforms(ax2)
plot_events(ax2)
plt.show()


# In[ ]: