#!/usr/bin/env python # coding: utf-8 # # Data Sets Tutorial # This tutorial demonstrates how to create and use `DataSet` objects. At its core Gate Set Tomography finds a gate set which best fits some experimental data, and in pyGSTi a `DataSet` is used to hold that data. `DataSet`s are essentially nested dictionaries which associate a count (a number, typically an integer) with (gate string, SPAM label) pairs so that `dataset[gateString][spamLabel]` can be used to read & write the number of `spamLabel` outcomes of the experiment given by the sequence `gateString`. # # There are a few important differences between a `DataSet` and a dictionary-of-dictionaries: # - `DataSet` objects can be in one of two modes: *static* or *non-static*. When in *non-static* mode, data can be freely modified within the set, making this mode to use during the data-entry. In the *static* mode, data cannot be modified and the `DataSet` is essentially read-only. The `done_adding_data` method of a `DataSet` switches from non-static to static mode, and should be called, as the name implies, once all desired data has been added (or modified). Once a `DataSet` is static, it is read-only for the rest of its life; to modify its data the best one can do is make a non-static *copy* via the `copy_nonstatic` member and modify the copy. # # - When data for a gate string is present in a `DataSet`, counts must exist for *all* SPAM labels. That is, for a given gate string, you cannot store counts for only a subset of the SPAM labels. Because of this condition, dictionary-access syntax of the SPAM label (i.e. `dataset[gateString][spamLabel]`) *cannot* be used to write counts for new `gateString` keys; One must either assign an entire dictionary of SPAM label-count pairs to `dataset[gateString]` or use the `add_`*xxx* methods (these methods add data for *all* SPAM labels at once). # # Once a `DataSet` is constructed, filled with data, and made *static*, it is typically passed as a parameter to one of pyGSTi's algorithm or driver routines to find a `GateSet` estimate based on the data. This tutorial focuses on how to construct a `DataSet` and modify its data. Later tutorials will demonstrate the different GST algorithms. # In[ ]: from __future__ import print_function # In[ ]: import pygsti # ## Creating a `DataSet` # There three basic ways to create `DataSet` objects in `pygsti`: # * By creating an empty `DataSet` object and manually adding counts corresponding to gate strings. Remember that the `add_`*xxx* methods must be used to add data for gate strings not yet in the `DataSet`. Once the data is added, be sure to call `done_adding_data`, as this restructures the internal storage of the `DataSet` to optimize the access operations used by algorithms. # * By loading from a text-format dataset file via `pygsti.io.load_dataset`. The result is a ready-to-use-in-algorithms *static* `DataSet`, so there's no need to call `done_adding_data` this time. # * By using a `GateSet` to generate "fake" data via `generate_fake_data`. This can be useful for doing simulations of GST, and comparing to your experimental results. # # We do each of these in turn in the cells below. # In[ ]: #1) Creating a data set from scratch # Note that tuples may be used in lieu of GateString objects ds1 = pygsti.objects.DataSet(spamLabels=['plus','minus']) ds1.add_count_dict( ('Gx',), {'plus': 10, 'minus': 90} ) ds1.add_count_dict( ('Gx','Gy'), {'plus': 40, 'minus': 60} ) ds1[('Gy',)] = {'plus': 10, 'minus': 90} # dictionary assignment #Modify existing data using dictionary-like access ds1[('Gx',)]['plus'] = 15 ds1[('Gx',)]['minus'] = 85 #GateString objects can be used. gs = pygsti.objects.GateString( ('Gx','Gy')) ds1[gs]['plus'] = 45 ds1[gs]['minus'] = 55 ds1.done_adding_data() # In[ ]: #2) By creating and loading a text-format dataset file. The first # row is a directive which specifies what the columns (after the # first one) holds. Other allowed values are "plus frequency", # "minus count", etc. Note that "plus" and "minus" in are the # SPAM labels and must match those of any GateSet used in # conjuction with this DataSet. dataset_txt = \ """## Columns = plus count, count total {} 0 100 Gx 10 90 GxGy 40 60 Gx^4 20 90 """ with open("tutorial_files/Example_TinyDataset.txt","w") as tinydataset: tinydataset.write(dataset_txt) ds2 = pygsti.io.load_dataset("tutorial_files/Example_TinyDataset.txt") # In[ ]: #3) By generating fake data (using our example gate list string from the previous tutorial) #Load the example gate set from Tutorial 01 gateset = pygsti.io.load_gateset("tutorial_files/Example_Gateset.txt") #Depolarize it (Tutorial 01) depol_gateset = gateset.depolarize(gate_noise=0.1) #Load the example gatestring list from Tutorial 02 gatestring_list = pygsti.io.load_gatestring_list("tutorial_files/Example_GatestringList.txt") #Generate fake data (Tutorial 00) ds3 = pygsti.construction.generate_fake_data(depol_gateset, gatestring_list, nSamples=1000, sampleError='binomial', seed=100) ds3b = pygsti.construction.generate_fake_data(depol_gateset, gatestring_list, nSamples=50, sampleError='binomial', seed=100) #Write the ds3 and ds3b datasets to a file for later tutorials pygsti.io.write_dataset("tutorial_files/Example_Dataset.txt", ds3, spamLabelOrder=['plus','minus']) pygsti.io.write_dataset("tutorial_files/Example_Dataset_LowCnts.txt", ds3b) # ## Viewing `DataSets` # In[ ]: #It's easy to just print them: print("Dataset1:\n", ds1) print("Dataset2:\n", ds2) print("Dataset3 is too big to print, so here it is truncated to Dataset2's strings\n", ds3.truncate(ds2.keys())) # ## Iteration over data sets # In[ ]: # A DataSet's keys() method returns a list of GateString objects ds1.keys() # In[ ]: # There are many ways to iterate over a DataSet. Here's one: for gatestring in ds1.keys(): dsRow = ds1[gatestring] for spamlabel in dsRow.keys(): print("Gatestring = %s, SPAM label = %s, count = %d" % \ (str(gatestring).ljust(5), str(spamlabel).ljust(6), dsRow[spamlabel])) # In[ ]: