import datetime
import json
import os
import pandas as pd
import holoviews as hv
import colorcet as cc
from bokeh import colors, palettes
# combos.json was extracted from https://github.com/TripSit/combogen.git
with open('combos.json') as f:
combos = json.load(f)
# Show more rows when printing all data
pd.options.display.max_rows = 100
# Chord plot settings
hv.extension('bokeh')
%output size=300
DATA_DIR = 'data'
# Highly arbitrary: for how long should a combination have been pressed
# to call it "real" and not just a transition to another desired state,
# accidental press, etc.? There doesn't seem to be a smooth cutoff anywhere...
PRESS_CUTOFF_MS = 500
# First/last drugs in the CSV header
HEADER_DRUG_START = 'LSD'
HEADER_DRUG_END = 'SSRIs'
DRUG_CLASSES = {
'Psychedelics': [
'LSD',
'Mushrooms',
'DMT',
'Mescaline',
'DOx',
'NBOMes',
'2C-x',
'2C-T-x',
'5-MeO-xxT',
'Cannabis',
],
# Keep Dissociatives-Depressants-Anti-Depressants adjacent;
# they have the most dangerous interactions which are
# easier to read closer together.
'Stimulants': [
'Amphetamines',
'MDMA',
'Cocaine',
'Caffeine',
],
'Dissociatives': [
'Ketamine',
'MXE',
'DXM',
'Nitrous',
],
'Depressants': [
'Alcohol',
'GHB/GBL',
'Opioids',
'Tramadol',
'Benzodiazepines',
],
'Anti-Depressants': [
'MAOIs',
'SSRIs',
],
}
DRUG_COLORS = {
'Psychedelics': [
palettes.Category20b[20][1*4 + 1],
palettes.Category20b[20][1*4 + 3],
],
'Dissociatives': [
palettes.Category20b[20][4*4 + 1],
palettes.Category20b[20][4*4 + 3],
],
'Stimulants': [
palettes.Category20b[20][3*4 + 1],
palettes.Category20b[20][3*4 + 3],
],
'Depressants': [
palettes.Category20b[20][0*4 + 1],
palettes.Category20b[20][0*4 + 3],
],
'Anti-Depressants': [
palettes.Category20b[20][2*4 + 1],
palettes.Category20b[20][2*4 + 3],
],
}
# Order these from least to most significant; this makes it easy to
# pick max(INTERACTION_LOOKUP[]) as the interaction to show for >2 drugs.
INTERACTION_COLORS = {
'Low Risk & No Synergy': colors.RGB(32, 103, 45, 0.25),
'Low Risk & Decrease': colors.RGB(0, 0, 255, 0.25),
'Low Risk & Synergy': colors.RGB(128, 0, 255, 0.25),
'Caution': colors.RGB(255, 216, 68, 0.65),
'Unsafe': colors.RGB(255, 128, 0, 0.75),
'Dangerous': colors.RGB(255, 0, 0, 1),
}
INTERACTION_LOOKUP = list(INTERACTION_COLORS.keys())
INTERACTION_COLORMAP = [INTERACTION_COLORS[k] for k in INTERACTION_LOOKUP]
INTERACTION_LOOKUP = {k: INTERACTION_LOOKUP.index(k) for k in INTERACTION_LOOKUP}
INTERACTION_COLORMAP_NOALPHA = [color.to_hex() for color in INTERACTION_COLORMAP]
# Import data from all CSVs in the current directory
data = pd.DataFrame()
for f in os.listdir(DATA_DIR):
if f.lower().endswith('.csv'):
f = os.path.join(DATA_DIR, f)
# Date, Time columns will be concatenated and parsed as a datetime into Date_Time
data = pd.concat((data, pd.read_csv(f, parse_dates=[['Date', 'Time']])))
# Sort by datetime
data = data.sort_values('Date_Time')
data.index = data['Date_Time']
# Count number of buttons pressed in each row
data['num_pressed'] = data.loc[:, HEADER_DRUG_START:HEADER_DRUG_END].sum(axis=1)
# Move "ms since last" up one row for ms_pressed: how long the given row was pressed
data['ms_pressed'] = data['ms since last'].shift(-1)
data.shape[0]
54463
# Rows with nothing pressed for more than one minute: call these breaks between sessions,
# giving an approximate number of sessions
data.loc[(data['num_pressed'] == 0) & (data['ms_pressed'] > 60*1000)].shape[0]
519
real_multi_press = data.loc[
# Select rows with more than one drug pressed, held for our arbitrary cutoff
(data['num_pressed'] > 1) & (data['ms_pressed'] > PRESS_CUTOFF_MS),
# Exclude all columns except drug presses
HEADER_DRUG_START:HEADER_DRUG_END
]
real_multi_press.shape[0]
6467
# Generate data similar to bokeh.sampledata.les_mis.data
# First, a node for each drug with groups for DRUG_CLASSES
nodes = []
# While we're at it, build colormap
drug_colormap = []
for idx, klass in enumerate(DRUG_CLASSES):
class_colors = DRUG_COLORS[klass]
for didx, drug in enumerate(DRUG_CLASSES[klass]):
nodes.append({'name': drug, 'class': klass})
drug_colormap.append(class_colors[didx % len(class_colors)])
# Then, links between each pair of drugs with value equal to number of times pressed together
links = []
rmp = real_multi_press
# Compare one drug...
for idx1, node1 in enumerate(nodes):
name1 = node1['name']
# ...with all other drugs after it
for idx2, node2 in enumerate(nodes[idx1+1:]):
# enumerate() starts idx2 at 0; we want it to start at idx1+1
idx2 += idx1 + 1
name2 = node2['name']
links.append({
'source': idx1,
'target': idx2,
# Number of real presses
'value': rmp.loc[
# where node1 was pressed
(rmp[name1] == 1) &
# and node2 was pressed
(rmp[name2] == 1)
].shape[0],
'interaction': INTERACTION_LOOKUP[combos[name1][name2]],
'source_name': name1,
'target_name': name2,
})
links_df = pd.DataFrame(links)
links_df = links_df[['source', 'target', 'value', 'interaction', 'source_name', 'target_name']]
nodes_df = pd.DataFrame(nodes)
nodes_ds = hv.Dataset(nodes_df, 'index')
chord = hv.Chord((links_df, nodes_ds)).options(label_index='name', color_index='index', edge_color_index='interaction',
cmap=drug_colormap, edge_cmap=INTERACTION_COLORMAP)
renderer = hv.renderer('bokeh')
renderer.save(chord, 'chord')
chord
total_tests = links_df['value'].sum()
top_combos = []
for idx, row in links_df.sort_values('value', ascending=False).head(20).iterrows():
name1 = row['source_name']
name2 = row['target_name']
top_combos.append({
'Combination': '{}, {}'.format(name1, name2),
'Percent of Tests': row['value'] / total_tests * 100,
'Interaction': INTERACTION_LOOKUP[combos[name1][name2]]
})
top_combos = pd.DataFrame(top_combos)
top_combos = top_combos[['Combination', 'Percent of Tests', 'Interaction']]
tcbars = hv.Bars(top_combos).options(
invert_axes=True, invert_yaxis=True, show_legend=True,
color_index='Interaction', cmap=INTERACTION_COLORMAP_NOALPHA)
renderer.save(tcbars, 'top_combos')
tcbars
# Given presses of more than one button,
# how many times was each drug pressed?
totals = real_multi_press.sum().to_frame()
totals.columns = ['Tests']
totals.index.name = 'Substance'
bars_cmap = []
for idx, drug in enumerate(totals.index):
for klass in DRUG_CLASSES:
if drug in DRUG_CLASSES[klass]:
dcolors = DRUG_COLORS[klass]
bars_cmap.append(dcolors[idx % len(dcolors)])
totbars = hv.Bars(totals).options(
invert_axes=True, invert_yaxis=True, color_index='Substance', show_legend=True, cmap=bars_cmap)
renderer.save(totbars, 'totals')
totbars
threes = []
num_nodes = len(nodes)
for idx1 in range(num_nodes):
n1 = nodes[idx1]['name']
for idx2 in range(idx1 + 1, num_nodes):
n2 = nodes[idx2]['name']
inter1 = INTERACTION_LOOKUP[combos[n1][n2]]
for idx3 in range(idx2 + 1, num_nodes):
n3 = nodes[idx3]['name']
inter2 = INTERACTION_LOOKUP[combos[n1][n3]]
inter3 = INTERACTION_LOOKUP[combos[n2][n3]]
interaction = max(inter1, inter2, inter3)
threes.append({
'Combination': '{}, {}, {}'.format(n1, n2, n3),
'Occurrences': rmp.loc[
(rmp[n1] == 1) &
(rmp[n2] == 1) &
(rmp[n3] == 1)
].shape[0],
'Interaction': interaction,
})
threes_df = pd.DataFrame(threes)
total_threes = threes_df['Occurrences'].sum()
top_threes = []
for idx, row in threes_df.sort_values('Occurrences', ascending=False).head(20).iterrows():
top_threes.append({
'Combination': row['Combination'],
'Percent of Tests': row['Occurrences'] / total_threes * 100,
'Interaction': row['Interaction'],
})
top_threes = pd.DataFrame(top_threes)
top_threes = top_threes[['Combination', 'Percent of Tests', 'Interaction']]
# WTF holoviews/bokeh/whatever?! If a given index in cmap isn't in the Interaction
# column, all colors are reordered, causing incredible frustration.
# So, custom-tailor the colormap to only include the colors actually needed/used.
ugh_colormap = [color for idx, color in enumerate(INTERACTION_COLORMAP_NOALPHA) if idx in top_threes['Interaction'].unique()]
t3bars = hv.Bars(top_threes).options(
invert_axes=True, invert_yaxis=True, color_index='Interaction', cmap=ugh_colormap)
renderer.save(t3bars, 'top_threes')
t3bars
for i in range(24):
start = datetime.time(i)
end = datetime.time((i + 1) % 24)
chunk = rmp.between_time(start, end)
if chunk.shape[0] < 100:
continue
count = chunk.shape[0]
chunk_sum = chunk.sum()
idxmax = chunk_sum.idxmax()
percent = chunk_sum[idxmax] / count * 100
print('{} to {}: {:4} tests, {} = {:.0f}%'.format(start, end, count, idxmax, percent))
00:00:00 to 01:00:00: 920 tests, LSD = 24% 01:00:00 to 02:00:00: 382 tests, Alcohol = 26% 02:00:00 to 03:00:00: 456 tests, Alcohol = 30% 03:00:00 to 04:00:00: 509 tests, Alcohol = 28% 04:00:00 to 05:00:00: 200 tests, MDMA = 30% 05:00:00 to 06:00:00: 147 tests, Ketamine = 37% 19:00:00 to 20:00:00: 288 tests, Cannabis = 23% 20:00:00 to 21:00:00: 274 tests, LSD = 21% 21:00:00 to 22:00:00: 512 tests, Cannabis = 25% 22:00:00 to 23:00:00: 1166 tests, Alcohol = 26% 23:00:00 to 00:00:00: 1264 tests, LSD = 23%
festivals = {
'Plunge': rmp.loc['2018-5-23':'2018-5-28'],
'Priceless': rmp.loc['2018-6-28':'2018-7-2'],
'BurningMan': rmp.loc['2018-8-26':'2018-9-1'],
}
from itertools import product
index, groups = ['A', 'B'], ['a', 'b']
keys = product(index, groups)
list(keys)
[('A', 'a'), ('A', 'b'), ('B', 'a'), ('B', 'b')]
drug_order = list(totals.sort_values('Tests', ascending=False).index)
fest_data = []
for name, fest in festivals.items():
fest_tests = fest.shape[0]
fest_sum = fest.sum()
for drug in drug_order:
fest_data.append((drug, name, fest_sum[drug] / fest_tests * 100))
fest_data
[('LSD', 'Plunge', 23.64864864864865), ('Alcohol', 'Plunge', 30.067567567567565), ('MDMA', 'Plunge', 21.70608108108108), ('Cannabis', 'Plunge', 20.18581081081081), ('Mushrooms', 'Plunge', 17.22972972972973), ('Cocaine', 'Plunge', 19.763513513513516), ('Ketamine', 'Plunge', 10.050675675675675), ('Caffeine', 'Plunge', 12.58445945945946), ('Nitrous', 'Plunge', 12.753378378378377), ('Amphetamines', 'Plunge', 8.783783783783784), ('DMT', 'Plunge', 6.25), ('SSRIs', 'Plunge', 7.179054054054054), ('GHB/GBL', 'Plunge', 5.320945945945946), ('Benzodiazepines', 'Plunge', 6.672297297297297), ('Mescaline', 'Plunge', 7.094594594594595), ('Opioids', 'Plunge', 6.756756756756757), ('Tramadol', 'Plunge', 5.743243243243244), ('MAOIs', 'Plunge', 5.574324324324325), ('2C-x', 'Plunge', 4.054054054054054), ('MXE', 'Plunge', 3.5472972972972974), ('2C-T-x', 'Plunge', 3.040540540540541), ('5-MeO-xxT', 'Plunge', 2.7871621621621623), ('NBOMes', 'Plunge', 3.6317567567567566), ('DXM', 'Plunge', 2.5337837837837838), ('DOx', 'Plunge', 3.3783783783783785), ('LSD', 'Priceless', 21.057692307692307), ('Alcohol', 'Priceless', 21.153846153846153), ('MDMA', 'Priceless', 19.807692307692307), ('Cannabis', 'Priceless', 17.78846153846154), ('Mushrooms', 'Priceless', 12.788461538461537), ('Cocaine', 'Priceless', 10.096153846153847), ('Ketamine', 'Priceless', 13.557692307692307), ('Caffeine', 'Priceless', 10.192307692307692), ('Nitrous', 'Priceless', 11.25), ('Amphetamines', 'Priceless', 7.115384615384615), ('DMT', 'Priceless', 8.75), ('SSRIs', 'Priceless', 11.153846153846155), ('GHB/GBL', 'Priceless', 7.403846153846154), ('Benzodiazepines', 'Priceless', 5.096153846153846), ('Mescaline', 'Priceless', 4.519230769230769), ('Opioids', 'Priceless', 5.288461538461538), ('Tramadol', 'Priceless', 6.25), ('MAOIs', 'Priceless', 6.346153846153846), ('2C-x', 'Priceless', 3.653846153846154), ('MXE', 'Priceless', 5.288461538461538), ('2C-T-x', 'Priceless', 2.8846153846153846), ('5-MeO-xxT', 'Priceless', 3.75), ('NBOMes', 'Priceless', 1.7307692307692308), ('DXM', 'Priceless', 2.307692307692308), ('DOx', 'Priceless', 1.6346153846153848), ('LSD', 'BurningMan', 24.231678486997634), ('Alcohol', 'BurningMan', 21.77304964539007), ('MDMA', 'BurningMan', 19.26713947990544), ('Cannabis', 'BurningMan', 18.037825059101657), ('Mushrooms', 'BurningMan', 14.444444444444443), ('Cocaine', 'BurningMan', 13.14420803782506), ('Ketamine', 'BurningMan', 12.056737588652481), ('Caffeine', 'BurningMan', 10.141843971631205), ('Nitrous', 'BurningMan', 9.1725768321513), ('Amphetamines', 'BurningMan', 10.8274231678487), ('DMT', 'BurningMan', 9.71631205673759), ('SSRIs', 'BurningMan', 5.460992907801419), ('GHB/GBL', 'BurningMan', 6.524822695035461), ('Benzodiazepines', 'BurningMan', 6.193853427895981), ('Mescaline', 'BurningMan', 5.957446808510639), ('Opioids', 'BurningMan', 5.791962174940898), ('Tramadol', 'BurningMan', 4.775413711583925), ('MAOIs', 'BurningMan', 4.184397163120567), ('2C-x', 'BurningMan', 3.309692671394799), ('MXE', 'BurningMan', 2.624113475177305), ('2C-T-x', 'BurningMan', 3.2387706855791962), ('5-MeO-xxT', 'BurningMan', 2.931442080378251), ('NBOMes', 'BurningMan', 2.5295508274231677), ('DXM', 'BurningMan', 2.458628841607565), ('DOx', 'BurningMan', 2.3640661938534278)]
bars = hv.Bars(fest_data, ['Drug', 'Festival'], 'Percent of Tests').options(
invert_axes=True, invert_yaxis=True, labelled=[])
bars
# Holoviews sucks at legends; build ours manually.
legend_colors = [{'thing': k, 'val': 0} for k in INTERACTION_COLORS.keys()]
legend_colors = pd.DataFrame(legend_colors)
legend_colors.index = legend_colors['thing']
legend_colors = legend_colors[['val']]
legend_colors.index.name = 'idx'
lgnd = hv.Bars(legend_colors).options(
color_index='idx', cmap=INTERACTION_COLORMAP_NOALPHA, show_legend=True)
lgnd
# Holoviews sucks at legends; build ours manually.
legend_colors = [{'thing': k, 'val': 0} for k in DRUG_COLORS.keys()]
legend_colors = pd.DataFrame(legend_colors)
legend_colors.index = legend_colors['thing']
legend_colors = legend_colors[['val']]
legend_colors.index.name = 'idx'
lgnd = hv.Bars(legend_colors).options(
color_index='idx', cmap=[DRUG_COLORS[k][0] for k in DRUG_COLORS.keys()], show_legend=True)
lgnd