#!/usr/bin/env python # coding: utf-8 # # Exporting Burst Data # # *This notebook is part of a [tutorial series](https://github.com/OpenSMFS/FRETBursts_notebooks) for the [FRETBursts](http://opensmfs.github.io/FRETBursts/) burst analysis software.* # > In this notebook, show a few example of how to export [FRETBursts](http://opensmfs.github.io/FRETBursts/) # > burst data to a file. # #
# Please cite FRETBursts in publications or presentations! #
# # Loading the software # We start by loading **`FRETBursts`**: # In[1]: from fretbursts import * # In[2]: sns = init_notebook() # # Downloading the data file # The full list of smFRET measurements used in the [FRETBursts tutorials](https://github.com/OpenSMFS/FRETBursts_notebooks) # can be found on [Figshare](http://dx.doi.org/10.6084/m9.figshare.1456362). # # This is the file we will download: # In[3]: url = 'http://files.figshare.com/2182601/0023uLRpitc_NTP_20dT_0.5GndCl.hdf5' #
# You can change the url variable above to download your own data file. # This is useful if you are executing FRETBursts online and you want to use # your own data file. See # First Steps. #
# # Here, we download the data file and put it in a folder named `data`, # inside the notebook folder: # In[4]: download_file(url, save_dir='./data') # > **NOTE**: If you modified the `url` variable providing an invalid URL # > the previous command will fail. In this case, edit the cell containing # > the `url` and re-try the download. # # Loading the data file # Here, we can directly define the file name to be loaded: # In[5]: filename = "./data/0023uLRpitc_NTP_20dT_0.5GndCl.hdf5" filename # Let's check that the file exists: # In[6]: import os if os.path.isfile(filename): print("Perfect, file found!") else: print("Sorry, file:\n%s not found" % filename) # In[7]: d = loader.photon_hdf5(filename) # # μs-ALEX parameters # # At this point, timestamps and detectors numbers are contained in the `ph_times_t` and `det_t` attributes of `d`. Let's print them: # In[8]: d.ph_times_t, d.det_t # We need to define some ALEX parameters: # In[9]: d.add(det_donor_accept = (0, 1), alex_period = 4000, offset = 700, D_ON = (2180, 3900), A_ON = (200, 1800)) # Here the parameters are: # # - `det_donor_accept`: donor and acceptor channels # - `alex_period`: length of excitation period (in timestamps units) # - `D_ON` and `A_ON`: donor and acceptor excitation windows # - `offset`: the offset between the start of alternation and start of timestamping # (see also [Definition of alternation periods](http://photon-hdf5.readthedocs.org/en/latest/phdata.html#definition-of-alternation-periods)). # # To check that the above parameters are correct, we need to plot the histogram of timestamps (modulo the alternation period) and superimpose the two excitation period definitions to it: # In[10]: bpl.plot_alternation_hist(d) # If the previous alternation histogram looks correct, # the corresponding definitions of the excitation periods can be applied to the data using the following command: # In[11]: loader.alex_apply_period(d) # If the previous histogram does not look right, the parameters in the `d.add(...)` cell can be modified and checked by running the histogram plot cell until everything looks fine. Don't forget to apply the # parameters with `loader.usalex_apply_period(d)` as a last step. # # > **NOTE:** After applying the ALEX parameters a new array of # > timestamps containing only photons inside the excitation periods # > is created (name `d.ph_times_m`). To save memory, by default, # > the old timestamps array (`d.ph_times_t`) is deleted. Therefore, # > in the following, when we talk about all-photon selection we always # > refer to all photons inside both excitation periods. # # Background and burst search # In[12]: d.calc_bg(bg.exp_fit, time_s=30, tail_min_us='auto', F_bg=1.7) # In[13]: d.burst_search(L=10, m=10, F=6) # First we filter the bursts to avoid creating big files: # In[14]: ds = d.select_bursts(select_bursts.size, th1=60) # # Exporting Burst Data # By burst-data we mean all the scalar burst parameters, e.g. size, duration, background, etc... # # We can easily get a table (a pandas DataFrame) with all the burst data as follows: # In[15]: bursts = bext.burst_data(ds, include_bg=True, include_ph_index=True) bursts.head(5) # display first 5 bursts # Once we have the DataFrame, saving it to disk in CSV format is trivial: # In[16]: bursts.to_csv('%s_burst_data.csv' % filename.replace('.hdf5', '')) # # Exporting Bursts Timestamps # A convenient way to deal with timestamps (and nanotimes if available) # of photons inside bursts is exporting them as a DataFrame. # The function `bext.burst_photons` ([documentation](http://fretbursts.readthedocs.io/en/latest/plugins.html#fretbursts.burstlib_ext.burst_photons)) # will export this "photon data" # with one row per photon. The columns are `timestamp` and, if available, # `nanotime`. # In[17]: burstph = bext.burst_photons(ds) burstph.head() # This DataFrame has a two-level index (the first two columns): one is # the burst number and the other is the photon number within each burst. # The photon number starts at 0 for the first photon of each burst. # # As with any DataFrame, saving to disk is trivial: # In[18]: burstph.to_csv('photon_data.csv') # To read the data back use: # In[19]: import pandas as pd # In[20]: burstph2 = pd.read_csv('photon_data.csv', index_col=(0, 1)) burstph2.head() # Verify the data we read is the same: # In[21]: (burstph2 == burstph).all() # ## Using the timestamps DataFrame # # Operations on the photon-data DataFrame are very simple. # # ### Transform the timestamps # # An example of a transformation is rescaling to get timestamps in seconds: # In[22]: burstph['times_s'] = burstph.timestamp * d.clk_p burstph.head() # ### Compute burst-wise quantities # # We can compute burst-wise quantities in one step without looping # using standard [pandas group-by/apply](https://pandas.pydata.org/pandas-docs/stable/groupby.html) operations. # # To do that, we group rows (photons) by `'burst'` and compute an arbitrary # function on each burst (for example the mean time): # In[23]: burstph.groupby('burst')['times_s'].mean() # # Another example of exporting bursts timestamps # # > **NOTE**: This section is provided as an example. Exporting timestamps in this way # > is not recommended. Use the approach in the previous section instead. # > The example here is an old example reported for educational purposes only. # Exporting timestamps and other photon-data for each bursts is a little trickier # because the data is less uniform (i.e. each burst has a different number of photons). # In the following example, we will save a `csv` file with variable-length columns. # Each burst is represented by to lines: one line for timestamps and one line for the # photon-stream (excitation/emission period) the timestamps belongs to. # # Let's start by creating an array of photon streams containing # one of the values 0, 1, 2 or 3 for each timestamp. # These values will correspond to the DexDem, DexAem, AexDem, AemAem # photon streams respectively. # In[24]: ds.A_ex # In[25]: #{0: DexDem, 1:DexAem, 2: AexDem, 3: AemAem} # In[26]: (ds.A_ex[0].view('int8') << 1) + ds.A_em[0].view('int8') # Now we define an header documenting the file format. We will also include the # filename of the measurement. # # This is just an example including nanotimes: # In[27]: header = """\ # BPH2CSV: %s # Lines per burst: 3 # - timestamps (int64): in 12.5 ns units # - nanotimes (int16): in raw TCSPC unit (3.3ps) # - stream (uint8): the photon stream according to the mapping {0: DexDem, 1: DexAem, 2: AexDem, 3: AemAem} """ % filename print(header) # And this is header we are going to use: # In[28]: header = """\ # BPH2CSV: %s # Lines per burst: 2 # - timestamps (int64): in 12.5 ns units # - stream (uint8): the photon stream according to the mapping {0: DexDem, 1: DexAem, 2: AexDem, 3: AemAem} """ % filename print(header) # We can now save the data to disk: # In[29]: out_fname = '%s_burst_timestamps.csv' % filename.replace('.hdf5', '') dx = ds ich = 0 bursts = dx.mburst[ich] timestamps = dx.ph_times_m[ich] stream = (dx.A_ex[ich].view('int8') << 1) + dx.A_em[ich].view('int8') with open(out_fname, 'wt') as f: f.write(header) for times, period in zip(bl.iter_bursts_ph(timestamps, bursts), bl.iter_bursts_ph(stream, bursts)): times.tofile(f, sep=',') f.write('\n') period.tofile(f, sep=',') f.write('\n') # ## Read the file back # # For consistency check, we can read back the data we just saved. # As an exercise we will put the results in a pandas DataFrame # which is more convenient than an array for holding this data. # In[30]: import pandas as pd # We start reading the header and computing # some file-specific constants. # In[31]: with open(out_fname) as f: lines = [] lines.append(f.readline()) while lines[-1].startswith('#'): lines.append(f.readline()) header = ''.join(lines[:-1]) print(header) # In[32]: stream_map = {0: 'DexDem', 1: 'DexAem', 2: 'AexDem', 3: 'AemAem'} nrows = int(header.split('\n')[1].split(':')[1].strip()) header_len = len(header.split('\n')) - 1 header_len, nrows # As a test, we load the data for the first burst into a dataframe, converting the numerical column "streams" # into photon-stream names (strings). The new column is of type categorical, so it will take very little space: # In[33]: burstph = (pd.read_csv(out_fname, skiprows=header_len, nrows=nrows, header=None).T .rename(columns={0: 'timestamp', 1: 'stream'})) StreamCategorical = pd.api.types.CategoricalDtype(categories=['DexDem', 'DexAem', 'AexDem', 'AemAem'], ordered=True) burstph.stream = (burstph.stream .apply(lambda x: stream_map[pd.to_numeric(x)]) .astype(StreamCategorical)) burstph # For reading the whole file I use a different approach. First, I load the entire file # in two lists of lists (one for timestamps and one for the stream). Next, I create # a single DataFrame with a third column indicating the burst index. # In[34]: import csv from builtins import int # python 2 workaround, can be removed on python 3 # Read data in two list of lists t_list, s_list = [], [] with open(out_fname) as f: for i in range(header_len): f.readline() csvreader = csv.reader(f) for row in csvreader: t_list.append([int(v) for v in row]) s_list.append([int(v) for v in next(csvreader)]) # Turn the inner list into pandas.DataFrame d_list = [] for ib, (t, s) in enumerate(zip(t_list, s_list)): d_list.append( pd.DataFrame({'timestamp': t, 'stream': s}, columns=['timestamp', 'stream']) .assign(iburst=ib) ) # Concatenate dataframes burstph = pd.concat(d_list, ignore_index=True) # Convert stream column into categorical StreamCategorical = pd.api.types.CategoricalDtype(categories=['DexDem', 'DexAem', 'AexDem', 'AemAem'], ordered=True) burstph.stream = (burstph.stream .apply(lambda x: stream_map[pd.to_numeric(x)]) .astype(StreamCategorical)) burstph # In[35]: burstph.dtypes # This was just an example. There are certainly more efficient ways # to read the file into a DataFrame. Please feel free to contribute # new methods to illustrate a different (more efficient or simpler) # approach.