This notebook is a behind-the-scenes benchmarking notebook, mainly for use by developers. The recommended way for users to interact with the dataset is via the Measurement
object and its associated context manager. See the corresponding notebook for a comprehensive toturial on how to use those.
%matplotlib inline
from pathlib import Path
import numpy as np
import qcodes as qc
from qcodes.dataset import (
ParamSpec,
initialise_or_create_database_at,
load_or_create_experiment,
new_data_set,
)
qc.config.core.db_location
'~/experiments.db'
initialise_or_create_database_at(
Path.cwd().parent / "example_output" / "benchmarking.db"
)
exp = load_or_create_experiment("benchmarking", sample_name="the sample is a lie")
exp
benchmarking#the sample is a lie#1@/home/runner/work/Qcodes/Qcodes/docs/examples/example_output/benchmarking.db ---------------------------------------------------------------------------------------------------------------
Now we can create a dataset. Note two things:
- if we don't specfiy a exp_id, but we have an experiment in the experiment container the dataset will go into that one.
- dataset can be created from the experiment object
dataSet = new_data_set("benchmark_data", exp_id=exp.exp_id)
exp
benchmarking#the sample is a lie#1@/home/runner/work/Qcodes/Qcodes/docs/examples/example_output/benchmarking.db --------------------------------------------------------------------------------------------------------------- 1-benchmark_data-1-None-0
In this benchmark we will assueme that we are doing a 2D loop and investigate the performance implications of writing to the dataset
x_shape = 100
y_shape = 100
%%time
for x in range(x_shape):
for y in range(y_shape):
z = np.random.random_sample(1)
CPU times: user 18.6 ms, sys: 0 ns, total: 18.6 ms Wall time: 18.3 ms
and store in memory
x_data = np.zeros((x_shape, y_shape))
y_data = np.zeros((x_shape, y_shape))
z_data = np.zeros((x_shape, y_shape))
%%time
for x in range(x_shape):
for y in range(y_shape):
x_data[x, y] = x
y_data[x, y] = y
z_data[x, y] = np.random.random_sample()
CPU times: user 15.1 ms, sys: 2 μs, total: 15.1 ms Wall time: 14.9 ms
double_dataset = new_data_set(
"doubledata",
exp_id=exp.exp_id,
specs=[
ParamSpec("x", "numeric"),
ParamSpec("y", "numeric"),
ParamSpec("z", "numeric"),
],
)
double_dataset.mark_started()
Note that this is so slow that we are only doing a 10th of the computation
%%time
for x in range(x_shape // 10):
for y in range(y_shape):
double_dataset.add_results([{"x": x, "y": y, "z": np.random.random_sample()}])
CPU times: user 189 ms, sys: 74.4 ms, total: 263 ms Wall time: 702 ms
single_dataset = new_data_set(
"singledata",
exp_id=exp.exp_id,
specs=[ParamSpec("x", "array"), ParamSpec("y", "array"), ParamSpec("z", "array")],
)
single_dataset.mark_started()
x_data = np.zeros(y_shape)
y_data = np.zeros(y_shape)
z_data = np.zeros(y_shape)
%%time
for x in range(x_shape):
for y in range(y_shape):
x_data[y] = x
y_data[y] = y
z_data[y] = np.random.random_sample(1)
single_dataset.add_results([{"x": x_data, "y": y_data, "z": z_data}])
CPU times: user 43.5 ms, sys: 5.03 ms, total: 48.5 ms Wall time: 64.1 ms
<timed exec>:5: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
zero_dataset = new_data_set(
"zerodata",
exp_id=exp.exp_id,
specs=[ParamSpec("x", "array"), ParamSpec("y", "array"), ParamSpec("z", "array")],
)
zero_dataset.mark_started()
x_data = np.zeros((x_shape, y_shape))
y_data = np.zeros((x_shape, y_shape))
z_data = np.zeros((x_shape, y_shape))
%%time
for x in range(x_shape):
for y in range(y_shape):
x_data[x, y] = x
y_data[x, y] = y
z_data[x, y] = np.random.random_sample(1)
zero_dataset.add_results([{"x": x_data, "y": y_data, "z": z_data}])
CPU times: user 18.7 ms, sys: 897 μs, total: 19.6 ms Wall time: 19.8 ms
<timed exec>:5: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
array1D_dataset = new_data_set(
"array1Ddata",
exp_id=exp.exp_id,
specs=[ParamSpec("x", "array"), ParamSpec("y", "array"), ParamSpec("z", "array")],
)
array1D_dataset.mark_started()
y_setpoints = np.arange(y_shape)
%%timeit
for x in range(x_shape):
x_data[x, :] = x
array1D_dataset.add_results(
[{"x": x_data[x, :], "y": y_setpoints, "z": np.random.random_sample(y_shape)}]
)
43.6 ms ± 1.89 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
x_data = np.zeros((x_shape, y_shape))
y_data = np.zeros((x_shape, y_shape))
z_data = np.zeros((x_shape, y_shape))
y_setpoints = np.arange(y_shape)
array0D_dataset = new_data_set(
"array0Ddata",
exp_id=exp.exp_id,
specs=[ParamSpec("x", "array"), ParamSpec("y", "array"), ParamSpec("z", "array")],
)
array0D_dataset.mark_started()
%%timeit
for x in range(x_shape):
x_data[x, :] = x
y_data[x, :] = y_setpoints
z_data[x, :] = np.random.random_sample(y_shape)
array0D_dataset.add_results([{"x": x_data, "y": y_data, "z": z_data}])
1.86 ms ± 29.1 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
data = []
for i in range(100):
for j in range(100):
data.append({"x": i, "y": j, "z": np.random.random_sample()})
many_Data = new_data_set(
"many_data",
exp_id=exp.exp_id,
specs=[
ParamSpec("x", "numeric"),
ParamSpec("y", "numeric"),
ParamSpec("z", "numeric"),
],
)
many_Data.mark_started()
%%timeit
many_Data.add_results(data)
17.8 ms ± 544 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)