import json
import matplotlib.pyplot
import pandas
import numpy
import seaborn
import mpld3
%matplotlib inline
path = 'data/all-features/metapaths.json'
with open(path) as fp:
metapaths = json.load(fp)
auroc_df = pandas.read_table('data/all-features/auroc.tsv')
auroc_df.head(2)
metapath | nonzero | seconds_per_query | auroc | auroc_permuted | delta_auroc | pval_auroc | length | |
---|---|---|---|---|---|---|---|---|
0 | CbGaD | 0.312 | 0.0145 | 0.715 | 0.580 | 0.13500 | 0.000003 | 2 |
1 | CbGdD | 0.149 | 0.0136 | 0.512 | 0.515 | -0.00332 | 0.921000 | 2 |
cols = ['sequential_complexity', 'optimal_join_complexity', 'midpoint_join_complexity']
rows = [[item['abbreviation']] + [item[col] for col in cols] for item in metapaths]
complexity_df = pandas.DataFrame(rows, columns=['metapath'] + cols)
complexity_df = auroc_df.merge(complexity_df)
complexity_df['log10_seconds_per_query'] = numpy.log10(complexity_df['seconds_per_query'])
complexity_df.head(2)
metapath | nonzero | seconds_per_query | auroc | auroc_permuted | delta_auroc | pval_auroc | length | sequential_complexity | optimal_join_complexity | midpoint_join_complexity | log10_seconds_per_query | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | CbGaD | 0.312 | 0.0145 | 0.715 | 0.580 | 0.13500 | 0.000003 | 2 | 0.620478 | 0.713766 | 0.876638 | -1.838632 |
1 | CbGdD | 0.149 | 0.0136 | 0.512 | 0.515 | -0.00332 | 0.921000 | 2 | 1.206737 | 0.966103 | 0.966103 | -1.866461 |
matplotlib.pyplot.figure(figsize=(10, 7))
ax = seaborn.regplot('sequential_complexity', 'log10_seconds_per_query', data=complexity_df,
lowess=True, scatter_kws={'alpha': 0.5}, line_kws={'color': 'black'}, ci=False)
points = ax.collections[0]
labels = complexity_df.metapath.tolist()
tooltip = mpld3.plugins.PointLabelTooltip(points, labels)
mpld3.plugins.connect(ax.figure, tooltip)
mpld3.display()
matplotlib.pyplot.figure(figsize=(10, 7))
ax = seaborn.regplot('optimal_join_complexity', 'log10_seconds_per_query', data=complexity_df,
lowess=True, scatter_kws={'alpha': 0.5}, line_kws={'color': 'black'}, ci=False)
points = ax.collections[0]
labels = complexity_df.metapath.tolist()
tooltip = mpld3.plugins.PointLabelTooltip(points, labels)
mpld3.plugins.connect(ax.figure, tooltip)
mpld3.display()
matplotlib.pyplot.figure(figsize=(10, 7))
ax = seaborn.regplot('midpoint_join_complexity', 'log10_seconds_per_query', data=complexity_df,
lowess=True, scatter_kws={'alpha': 0.5}, line_kws={'color': 'black'}, ci=False)
points = ax.collections[0]
labels = complexity_df.metapath.tolist()
tooltip = mpld3.plugins.PointLabelTooltip(points, labels)
mpld3.plugins.connect(ax.figure, tooltip)
mpld3.display()