# inside docker
!python benchmark.py --frameworks dl4j tensorflow gensim originalc --file /benmark_nn_frameworks/data/text8-split --epochs 4 --batch_size 32 --workers 7 --size 100 --platform local
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
from plotly.graph_objs import *
import json
init_notebook_mode()
with open('./local-report.json','r') as f:
report = json.loads(f.read())
print report['systeminfo']
CPU INFO Model name:Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz, Architecture:x86_64, CPU(s):8, MemTotal:16382532 kB, GPU INFO Model Name : GeForce GTX 1060, Total FB Memory : 3011 MiB, CUDA Version : CUDA Version 8.0.61
print json.dumps(report['trainingparams'], indent=2)
{ "epochs": 4, "frameworks": [ "gensim", "originalc", "tensorflow", "dl4j" ], "workers": 3, "negative": 5, "batch_size": 32, "sample": 0.001, "platform": "local", "window": 5, "fname": "/benmark_nn_frameworks/data/text8-split", "min_count": 5, "alpha": 0.025, "sg": 1, "size": 100 }
Time to train(in seconds) and peak memory(in MiB) results
X = list(report['time'].keys())
Y = list(report['time'].values())
data = [Bar(
x = X,
y = Y,
orientation = 'v',
)]
layout = Layout(
title = 'Time Report',
xaxis = dict(title = 'Framework'),
yaxis = dict(title = 'Training time (in seconds)'),
annotations=[
dict(x=xi,y=yi,
text=str(yi),
xanchor='center',
yanchor='bottom',
showarrow=False,
) for xi, yi in zip(X, Y)]
)
fig = dict(data=data, layout=layout)
iplot(fig)
X = list(report['memory'].keys())
Y = list(report['memory'].values())
data = [Bar(
x = X,
y = Y,
orientation = 'v',
)]
layout = Layout(
title = 'Memory Report',
xaxis = dict(title = 'Framework'),
yaxis = dict(title = 'Peak memory (in MB)'),
annotations=[
dict(x=xi,y=yi,
text=str(yi),
xanchor='center',
yanchor='bottom',
showarrow=False,
) for xi, yi in zip(X, Y)]
)
fig = dict(data=data, layout=layout)
iplot(fig)
Results of evaluation on the popular Word Similarities task. This task measures how well the notion of word similarity according to humans is captured by the word vector representations. Two lists are obtained by sorting the word pairs according to human similarity and vector-space similarity. Spearman’s correlation/rho between these ranked lists is the used to signify how well the vector space agrees with human judgement.
data = []
for framework in report['frameworks']:
X = [x[0] for x in report['wordpairs'][framework]]
Y = [x[1] for x in report['wordpairs'][framework]]
trace = Bar(
x = X,
y = Y,
name =framework
)
data.append(trace)
layout = Layout(
title = 'Word Pairs Evaluation Report',
xaxis = dict(title = 'Dataset', tickangle = -45),
yaxis = dict(title = 'Spearman\'s Rho'),
barmode = 'group'
)
fig = dict(data=data, layout=layout)
iplot(fig)
Results of evaluation on the popular Word Analogy task. The aim of this task is to find the missing word b' in the relation: a is to a' as b is to b'. In other words we look at the most similar word vector to b' (= a' + b - a) and compare it with the human analogy and report the accuracy.
data = []
for framework in report['frameworks']:
X = [x[0] for x in report['qa'][framework]]
Y = [x[1] for x in report['qa'][framework]]
trace = Bar(
x = X,
y = Y,
name =framework
)
data.append(trace)
layout = Layout(
title = 'Analogies Task(Questions&Answers) Report',
xaxis = dict(tickangle = -45),
yaxis = dict(title = 'Accuracy (in %)'),
barmode = 'group'
)
fig = dict(data=data, layout=layout)
iplot(fig)