#!/usr/bin/env python
# coding: utf-8

# In[1]:


import datetime
import json
import os
import pandas as pd
import holoviews as hv
import colorcet as cc
from bokeh import colors, palettes


# In[2]:


# combos.json was extracted from https://github.com/TripSit/combogen.git
with open('combos.json') as f:
    combos = json.load(f)


# In[3]:


# Show more rows when printing all data
pd.options.display.max_rows = 100
# Chord plot settings
hv.extension('bokeh')
get_ipython().run_line_magic('output', 'size=300')


# In[4]:


DATA_DIR = 'data'
# Highly arbitrary: for how long should a combination have been pressed
# to call it "real" and not just a transition to another desired state,
# accidental press, etc.? There doesn't seem to be a smooth cutoff anywhere...
PRESS_CUTOFF_MS = 500
# First/last drugs in the CSV header
HEADER_DRUG_START = 'LSD'
HEADER_DRUG_END = 'SSRIs'


# In[5]:


DRUG_CLASSES = {
    'Psychedelics': [
        'LSD',
        'Mushrooms',
        'DMT',
        'Mescaline',
        'DOx',
        'NBOMes',
        '2C-x',
        '2C-T-x',
        '5-MeO-xxT',
        'Cannabis',
    ],
    # Keep Dissociatives-Depressants-Anti-Depressants adjacent;
    # they have the most dangerous interactions which are
    # easier to read closer together.
    'Stimulants': [
        'Amphetamines',
        'MDMA',
        'Cocaine',
        'Caffeine',
    ],
    'Dissociatives': [
        'Ketamine',
        'MXE',
        'DXM',
        'Nitrous',
    ],
    'Depressants': [
        'Alcohol',
        'GHB/GBL',
        'Opioids',
        'Tramadol',
        'Benzodiazepines',
    ],
    'Anti-Depressants': [
        'MAOIs',
        'SSRIs',
    ],
}
DRUG_COLORS = {
    'Psychedelics': [
        palettes.Category20b[20][1*4 + 1],
        palettes.Category20b[20][1*4 + 3],
    ],
    'Dissociatives': [
        palettes.Category20b[20][4*4 + 1],
        palettes.Category20b[20][4*4 + 3],
    ],
    'Stimulants': [
        palettes.Category20b[20][3*4 + 1],
        palettes.Category20b[20][3*4 + 3],
    ],
    'Depressants': [
        palettes.Category20b[20][0*4 + 1],
        palettes.Category20b[20][0*4 + 3],
    ],
    'Anti-Depressants': [
        palettes.Category20b[20][2*4 + 1],
        palettes.Category20b[20][2*4 + 3],
    ],
}
# Order these from least to most significant; this makes it easy to
# pick max(INTERACTION_LOOKUP[]) as the interaction to show for >2 drugs.
INTERACTION_COLORS = {
    'Low Risk & No Synergy': colors.RGB(32, 103, 45, 0.25),
    'Low Risk & Decrease': colors.RGB(0, 0, 255, 0.25),
    'Low Risk & Synergy': colors.RGB(128, 0, 255, 0.25),
    'Caution': colors.RGB(255, 216, 68, 0.65),
    'Unsafe': colors.RGB(255, 128, 0, 0.75),
    'Dangerous': colors.RGB(255, 0, 0, 1),
}
INTERACTION_LOOKUP = list(INTERACTION_COLORS.keys())
INTERACTION_COLORMAP = [INTERACTION_COLORS[k] for k in INTERACTION_LOOKUP]
INTERACTION_LOOKUP = {k: INTERACTION_LOOKUP.index(k) for k in INTERACTION_LOOKUP}
INTERACTION_COLORMAP_NOALPHA = [color.to_hex() for color in INTERACTION_COLORMAP]


# In[6]:


# Import data from all CSVs in the current directory
data = pd.DataFrame()
for f in os.listdir(DATA_DIR):
    if f.lower().endswith('.csv'):
        f = os.path.join(DATA_DIR, f)
        # Date, Time columns will be concatenated and parsed as a datetime into Date_Time
        data = pd.concat((data, pd.read_csv(f, parse_dates=[['Date', 'Time']])))


# In[7]:


# Sort by datetime
data = data.sort_values('Date_Time')
data.index = data['Date_Time']


# In[8]:


# Count number of buttons pressed in each row
data['num_pressed'] = data.loc[:, HEADER_DRUG_START:HEADER_DRUG_END].sum(axis=1)


# In[9]:


# Move "ms since last" up one row for ms_pressed: how long the given row was pressed
data['ms_pressed'] = data['ms since last'].shift(-1)
data.shape[0]


# In[10]:


# Rows with nothing pressed for more than one minute: call these breaks between sessions,
# giving an approximate number of sessions
data.loc[(data['num_pressed'] == 0) & (data['ms_pressed'] > 60*1000)].shape[0]


# In[11]:


real_multi_press = data.loc[
    # Select rows with more than one drug pressed, held for our arbitrary cutoff
    (data['num_pressed'] > 1) & (data['ms_pressed'] > PRESS_CUTOFF_MS),
    # Exclude all columns except drug presses
    HEADER_DRUG_START:HEADER_DRUG_END
]
real_multi_press.shape[0]


# In[12]:


# Generate data similar to bokeh.sampledata.les_mis.data
# First, a node for each drug with groups for DRUG_CLASSES
nodes = []
# While we're at it, build colormap
drug_colormap = []
for idx, klass in enumerate(DRUG_CLASSES):
    class_colors = DRUG_COLORS[klass]
    for didx, drug in enumerate(DRUG_CLASSES[klass]):
        nodes.append({'name': drug, 'class': klass})
        drug_colormap.append(class_colors[didx % len(class_colors)])


# In[13]:


# Then, links between each pair of drugs with value equal to number of times pressed together
links = []
rmp = real_multi_press
# Compare one drug...
for idx1, node1 in enumerate(nodes):
    name1 = node1['name']
    # ...with all other drugs after it
    for idx2, node2 in enumerate(nodes[idx1+1:]):
        # enumerate() starts idx2 at 0; we want it to start at idx1+1
        idx2 += idx1 + 1
        name2 = node2['name']
        links.append({
            'source': idx1,
            'target': idx2,
            # Number of real presses
            'value': rmp.loc[
                # where node1 was pressed
                (rmp[name1] == 1) &
                # and node2 was pressed
                (rmp[name2] == 1)
            ].shape[0],
            'interaction': INTERACTION_LOOKUP[combos[name1][name2]],
            'source_name': name1,
            'target_name': name2,
        })
links_df = pd.DataFrame(links)
links_df = links_df[['source', 'target', 'value', 'interaction', 'source_name', 'target_name']]
nodes_df = pd.DataFrame(nodes)
nodes_ds = hv.Dataset(nodes_df, 'index')


# In[14]:


chord = hv.Chord((links_df, nodes_ds)).options(label_index='name', color_index='index', edge_color_index='interaction',
    cmap=drug_colormap, edge_cmap=INTERACTION_COLORMAP)
renderer = hv.renderer('bokeh')
renderer.save(chord, 'chord')
chord


# In[15]:


total_tests = links_df['value'].sum()
top_combos = []
for idx, row in links_df.sort_values('value', ascending=False).head(20).iterrows():
    name1 = row['source_name']
    name2 = row['target_name']
    top_combos.append({
        'Combination': '{}, {}'.format(name1, name2),
        'Percent of Tests': row['value'] / total_tests * 100,
        'Interaction': INTERACTION_LOOKUP[combos[name1][name2]]
    })
top_combos = pd.DataFrame(top_combos)
top_combos = top_combos[['Combination', 'Percent of Tests', 'Interaction']]


# In[16]:


tcbars = hv.Bars(top_combos).options(
    invert_axes=True, invert_yaxis=True, show_legend=True,
    color_index='Interaction', cmap=INTERACTION_COLORMAP_NOALPHA)
renderer.save(tcbars, 'top_combos')
tcbars


# In[17]:


# Given presses of more than one button,
# how many times was each drug pressed? 
totals = real_multi_press.sum().to_frame()
totals.columns = ['Tests']
totals.index.name = 'Substance'
bars_cmap = []
for idx, drug in enumerate(totals.index):
    for klass in DRUG_CLASSES:
        if drug in DRUG_CLASSES[klass]:
            dcolors = DRUG_COLORS[klass]
            bars_cmap.append(dcolors[idx % len(dcolors)])


# In[18]:


totbars = hv.Bars(totals).options(
    invert_axes=True, invert_yaxis=True, color_index='Substance', show_legend=True, cmap=bars_cmap)
renderer.save(totbars, 'totals')
totbars


# In[19]:


threes = []
num_nodes = len(nodes)
for idx1 in range(num_nodes):
    n1 = nodes[idx1]['name']
    for idx2 in range(idx1 + 1, num_nodes):
        n2 = nodes[idx2]['name']
        inter1 = INTERACTION_LOOKUP[combos[n1][n2]]
        for idx3 in range(idx2 + 1, num_nodes):
            n3 = nodes[idx3]['name']
            inter2 = INTERACTION_LOOKUP[combos[n1][n3]]
            inter3 = INTERACTION_LOOKUP[combos[n2][n3]]
            interaction = max(inter1, inter2, inter3)
            threes.append({
                'Combination': '{}, {}, {}'.format(n1, n2, n3),
                'Occurrences': rmp.loc[
                    (rmp[n1] == 1) &
                    (rmp[n2] == 1) &
                    (rmp[n3] == 1)
                ].shape[0],
                'Interaction': interaction,
            })
threes_df = pd.DataFrame(threes)


# In[20]:


total_threes = threes_df['Occurrences'].sum()
top_threes = []
for idx, row in threes_df.sort_values('Occurrences', ascending=False).head(20).iterrows():
    top_threes.append({
        'Combination': row['Combination'],
        'Percent of Tests': row['Occurrences'] / total_threes * 100,
        'Interaction': row['Interaction'],
    })
top_threes = pd.DataFrame(top_threes)
top_threes = top_threes[['Combination', 'Percent of Tests', 'Interaction']]
# WTF holoviews/bokeh/whatever?! If a given index in cmap isn't in the Interaction
# column, all colors are reordered, causing incredible frustration.
# So, custom-tailor the colormap to only include the colors actually needed/used.
ugh_colormap = [color for idx, color in enumerate(INTERACTION_COLORMAP_NOALPHA) if idx in top_threes['Interaction'].unique()]


# In[21]:


t3bars = hv.Bars(top_threes).options(
    invert_axes=True, invert_yaxis=True, color_index='Interaction', cmap=ugh_colormap)
renderer.save(t3bars, 'top_threes')
t3bars


# In[22]:


for i in range(24):
    start = datetime.time(i)
    end = datetime.time((i + 1) % 24)
    chunk = rmp.between_time(start, end)
    if chunk.shape[0] < 100:
        continue
    count = chunk.shape[0]
    chunk_sum = chunk.sum()
    idxmax = chunk_sum.idxmax()
    percent = chunk_sum[idxmax] / count * 100
    print('{} to {}: {:4} tests, {} = {:.0f}%'.format(start, end, count, idxmax, percent))


# In[23]:


festivals = {
    'Plunge': rmp.loc['2018-5-23':'2018-5-28'],
    'Priceless': rmp.loc['2018-6-28':'2018-7-2'],
    'BurningMan': rmp.loc['2018-8-26':'2018-9-1'],
}


# In[24]:


from itertools import product
index, groups = ['A', 'B'], ['a', 'b']
keys = product(index, groups)
list(keys)


# In[25]:


drug_order = list(totals.sort_values('Tests', ascending=False).index)
fest_data = []
for name, fest in festivals.items():
    fest_tests = fest.shape[0]
    fest_sum = fest.sum()
    for drug in drug_order:
        fest_data.append((drug, name, fest_sum[drug] / fest_tests * 100))
fest_data
    

# In[26]:


bars = hv.Bars(fest_data, ['Drug', 'Festival'], 'Percent of Tests').options(
    invert_axes=True, invert_yaxis=True, labelled=[])
bars


# In[28]:


# Holoviews sucks at legends; build ours manually.
legend_colors = [{'thing': k, 'val': 0} for k in INTERACTION_COLORS.keys()]
legend_colors = pd.DataFrame(legend_colors)
legend_colors.index = legend_colors['thing']
legend_colors = legend_colors[['val']]
legend_colors.index.name = 'idx'
lgnd = hv.Bars(legend_colors).options(
    color_index='idx', cmap=INTERACTION_COLORMAP_NOALPHA, show_legend=True)
lgnd


# In[29]:


# Holoviews sucks at legends; build ours manually.
legend_colors = [{'thing': k, 'val': 0} for k in DRUG_COLORS.keys()]
legend_colors = pd.DataFrame(legend_colors)
legend_colors.index = legend_colors['thing']
legend_colors = legend_colors[['val']]
legend_colors.index.name = 'idx'
lgnd = hv.Bars(legend_colors).options(
    color_index='idx', cmap=[DRUG_COLORS[k][0] for k in DRUG_COLORS.keys()], show_legend=True)
lgnd


# In[ ]: