#!/usr/bin/env python # coding: utf-8 # In[1]: import datetime import json import os import pandas as pd import holoviews as hv import colorcet as cc from bokeh import colors, palettes # In[2]: # combos.json was extracted from https://github.com/TripSit/combogen.git with open('combos.json') as f: combos = json.load(f) # In[3]: # Show more rows when printing all data pd.options.display.max_rows = 100 # Chord plot settings hv.extension('bokeh') get_ipython().run_line_magic('output', 'size=300') # In[4]: DATA_DIR = 'data' # Highly arbitrary: for how long should a combination have been pressed # to call it "real" and not just a transition to another desired state, # accidental press, etc.? There doesn't seem to be a smooth cutoff anywhere... PRESS_CUTOFF_MS = 500 # First/last drugs in the CSV header HEADER_DRUG_START = 'LSD' HEADER_DRUG_END = 'SSRIs' # In[5]: DRUG_CLASSES = { 'Psychedelics': [ 'LSD', 'Mushrooms', 'DMT', 'Mescaline', 'DOx', 'NBOMes', '2C-x', '2C-T-x', '5-MeO-xxT', 'Cannabis', ], # Keep Dissociatives-Depressants-Anti-Depressants adjacent; # they have the most dangerous interactions which are # easier to read closer together. 'Stimulants': [ 'Amphetamines', 'MDMA', 'Cocaine', 'Caffeine', ], 'Dissociatives': [ 'Ketamine', 'MXE', 'DXM', 'Nitrous', ], 'Depressants': [ 'Alcohol', 'GHB/GBL', 'Opioids', 'Tramadol', 'Benzodiazepines', ], 'Anti-Depressants': [ 'MAOIs', 'SSRIs', ], } DRUG_COLORS = { 'Psychedelics': [ palettes.Category20b[20][1*4 + 1], palettes.Category20b[20][1*4 + 3], ], 'Dissociatives': [ palettes.Category20b[20][4*4 + 1], palettes.Category20b[20][4*4 + 3], ], 'Stimulants': [ palettes.Category20b[20][3*4 + 1], palettes.Category20b[20][3*4 + 3], ], 'Depressants': [ palettes.Category20b[20][0*4 + 1], palettes.Category20b[20][0*4 + 3], ], 'Anti-Depressants': [ palettes.Category20b[20][2*4 + 1], palettes.Category20b[20][2*4 + 3], ], } # Order these from least to most significant; this makes it easy to # pick max(INTERACTION_LOOKUP[]) as the interaction to show for >2 drugs. INTERACTION_COLORS = { 'Low Risk & No Synergy': colors.RGB(32, 103, 45, 0.25), 'Low Risk & Decrease': colors.RGB(0, 0, 255, 0.25), 'Low Risk & Synergy': colors.RGB(128, 0, 255, 0.25), 'Caution': colors.RGB(255, 216, 68, 0.65), 'Unsafe': colors.RGB(255, 128, 0, 0.75), 'Dangerous': colors.RGB(255, 0, 0, 1), } INTERACTION_LOOKUP = list(INTERACTION_COLORS.keys()) INTERACTION_COLORMAP = [INTERACTION_COLORS[k] for k in INTERACTION_LOOKUP] INTERACTION_LOOKUP = {k: INTERACTION_LOOKUP.index(k) for k in INTERACTION_LOOKUP} INTERACTION_COLORMAP_NOALPHA = [color.to_hex() for color in INTERACTION_COLORMAP] # In[6]: # Import data from all CSVs in the current directory data = pd.DataFrame() for f in os.listdir(DATA_DIR): if f.lower().endswith('.csv'): f = os.path.join(DATA_DIR, f) # Date, Time columns will be concatenated and parsed as a datetime into Date_Time data = pd.concat((data, pd.read_csv(f, parse_dates=[['Date', 'Time']]))) # In[7]: # Sort by datetime data = data.sort_values('Date_Time') data.index = data['Date_Time'] # In[8]: # Count number of buttons pressed in each row data['num_pressed'] = data.loc[:, HEADER_DRUG_START:HEADER_DRUG_END].sum(axis=1) # In[9]: # Move "ms since last" up one row for ms_pressed: how long the given row was pressed data['ms_pressed'] = data['ms since last'].shift(-1) data.shape[0] # In[10]: # Rows with nothing pressed for more than one minute: call these breaks between sessions, # giving an approximate number of sessions data.loc[(data['num_pressed'] == 0) & (data['ms_pressed'] > 60*1000)].shape[0] # In[11]: real_multi_press = data.loc[ # Select rows with more than one drug pressed, held for our arbitrary cutoff (data['num_pressed'] > 1) & (data['ms_pressed'] > PRESS_CUTOFF_MS), # Exclude all columns except drug presses HEADER_DRUG_START:HEADER_DRUG_END ] real_multi_press.shape[0] # In[12]: # Generate data similar to bokeh.sampledata.les_mis.data # First, a node for each drug with groups for DRUG_CLASSES nodes = [] # While we're at it, build colormap drug_colormap = [] for idx, klass in enumerate(DRUG_CLASSES): class_colors = DRUG_COLORS[klass] for didx, drug in enumerate(DRUG_CLASSES[klass]): nodes.append({'name': drug, 'class': klass}) drug_colormap.append(class_colors[didx % len(class_colors)]) # In[13]: # Then, links between each pair of drugs with value equal to number of times pressed together links = [] rmp = real_multi_press # Compare one drug... for idx1, node1 in enumerate(nodes): name1 = node1['name'] # ...with all other drugs after it for idx2, node2 in enumerate(nodes[idx1+1:]): # enumerate() starts idx2 at 0; we want it to start at idx1+1 idx2 += idx1 + 1 name2 = node2['name'] links.append({ 'source': idx1, 'target': idx2, # Number of real presses 'value': rmp.loc[ # where node1 was pressed (rmp[name1] == 1) & # and node2 was pressed (rmp[name2] == 1) ].shape[0], 'interaction': INTERACTION_LOOKUP[combos[name1][name2]], 'source_name': name1, 'target_name': name2, }) links_df = pd.DataFrame(links) links_df = links_df[['source', 'target', 'value', 'interaction', 'source_name', 'target_name']] nodes_df = pd.DataFrame(nodes) nodes_ds = hv.Dataset(nodes_df, 'index') # In[14]: chord = hv.Chord((links_df, nodes_ds)).options(label_index='name', color_index='index', edge_color_index='interaction', cmap=drug_colormap, edge_cmap=INTERACTION_COLORMAP) renderer = hv.renderer('bokeh') renderer.save(chord, 'chord') chord # In[15]: total_tests = links_df['value'].sum() top_combos = [] for idx, row in links_df.sort_values('value', ascending=False).head(20).iterrows(): name1 = row['source_name'] name2 = row['target_name'] top_combos.append({ 'Combination': '{}, {}'.format(name1, name2), 'Percent of Tests': row['value'] / total_tests * 100, 'Interaction': INTERACTION_LOOKUP[combos[name1][name2]] }) top_combos = pd.DataFrame(top_combos) top_combos = top_combos[['Combination', 'Percent of Tests', 'Interaction']] # In[16]: tcbars = hv.Bars(top_combos).options( invert_axes=True, invert_yaxis=True, show_legend=True, color_index='Interaction', cmap=INTERACTION_COLORMAP_NOALPHA) renderer.save(tcbars, 'top_combos') tcbars # In[17]: # Given presses of more than one button, # how many times was each drug pressed? totals = real_multi_press.sum().to_frame() totals.columns = ['Tests'] totals.index.name = 'Substance' bars_cmap = [] for idx, drug in enumerate(totals.index): for klass in DRUG_CLASSES: if drug in DRUG_CLASSES[klass]: dcolors = DRUG_COLORS[klass] bars_cmap.append(dcolors[idx % len(dcolors)]) # In[18]: totbars = hv.Bars(totals).options( invert_axes=True, invert_yaxis=True, color_index='Substance', show_legend=True, cmap=bars_cmap) renderer.save(totbars, 'totals') totbars # In[19]: threes = [] num_nodes = len(nodes) for idx1 in range(num_nodes): n1 = nodes[idx1]['name'] for idx2 in range(idx1 + 1, num_nodes): n2 = nodes[idx2]['name'] inter1 = INTERACTION_LOOKUP[combos[n1][n2]] for idx3 in range(idx2 + 1, num_nodes): n3 = nodes[idx3]['name'] inter2 = INTERACTION_LOOKUP[combos[n1][n3]] inter3 = INTERACTION_LOOKUP[combos[n2][n3]] interaction = max(inter1, inter2, inter3) threes.append({ 'Combination': '{}, {}, {}'.format(n1, n2, n3), 'Occurrences': rmp.loc[ (rmp[n1] == 1) & (rmp[n2] == 1) & (rmp[n3] == 1) ].shape[0], 'Interaction': interaction, }) threes_df = pd.DataFrame(threes) # In[20]: total_threes = threes_df['Occurrences'].sum() top_threes = [] for idx, row in threes_df.sort_values('Occurrences', ascending=False).head(20).iterrows(): top_threes.append({ 'Combination': row['Combination'], 'Percent of Tests': row['Occurrences'] / total_threes * 100, 'Interaction': row['Interaction'], }) top_threes = pd.DataFrame(top_threes) top_threes = top_threes[['Combination', 'Percent of Tests', 'Interaction']] # WTF holoviews/bokeh/whatever?! If a given index in cmap isn't in the Interaction # column, all colors are reordered, causing incredible frustration. # So, custom-tailor the colormap to only include the colors actually needed/used. ugh_colormap = [color for idx, color in enumerate(INTERACTION_COLORMAP_NOALPHA) if idx in top_threes['Interaction'].unique()] # In[21]: t3bars = hv.Bars(top_threes).options( invert_axes=True, invert_yaxis=True, color_index='Interaction', cmap=ugh_colormap) renderer.save(t3bars, 'top_threes') t3bars # In[22]: for i in range(24): start = datetime.time(i) end = datetime.time((i + 1) % 24) chunk = rmp.between_time(start, end) if chunk.shape[0] < 100: continue count = chunk.shape[0] chunk_sum = chunk.sum() idxmax = chunk_sum.idxmax() percent = chunk_sum[idxmax] / count * 100 print('{} to {}: {:4} tests, {} = {:.0f}%'.format(start, end, count, idxmax, percent)) # In[23]: festivals = { 'Plunge': rmp.loc['2018-5-23':'2018-5-28'], 'Priceless': rmp.loc['2018-6-28':'2018-7-2'], 'BurningMan': rmp.loc['2018-8-26':'2018-9-1'], } # In[24]: from itertools import product index, groups = ['A', 'B'], ['a', 'b'] keys = product(index, groups) list(keys) # In[25]: drug_order = list(totals.sort_values('Tests', ascending=False).index) fest_data = [] for name, fest in festivals.items(): fest_tests = fest.shape[0] fest_sum = fest.sum() for drug in drug_order: fest_data.append((drug, name, fest_sum[drug] / fest_tests * 100)) fest_data # In[26]: bars = hv.Bars(fest_data, ['Drug', 'Festival'], 'Percent of Tests').options( invert_axes=True, invert_yaxis=True, labelled=[]) bars # In[28]: # Holoviews sucks at legends; build ours manually. legend_colors = [{'thing': k, 'val': 0} for k in INTERACTION_COLORS.keys()] legend_colors = pd.DataFrame(legend_colors) legend_colors.index = legend_colors['thing'] legend_colors = legend_colors[['val']] legend_colors.index.name = 'idx' lgnd = hv.Bars(legend_colors).options( color_index='idx', cmap=INTERACTION_COLORMAP_NOALPHA, show_legend=True) lgnd # In[29]: # Holoviews sucks at legends; build ours manually. legend_colors = [{'thing': k, 'val': 0} for k in DRUG_COLORS.keys()] legend_colors = pd.DataFrame(legend_colors) legend_colors.index = legend_colors['thing'] legend_colors = legend_colors[['val']] legend_colors.index.name = 'idx' lgnd = hv.Bars(legend_colors).options( color_index='idx', cmap=[DRUG_COLORS[k][0] for k in DRUG_COLORS.keys()], show_legend=True) lgnd # In[ ]: