#!/usr/bin/env python
# coding: utf-8
# # Exporting Burst Data
#
# *This notebook is part of a [tutorial series](https://github.com/OpenSMFS/FRETBursts_notebooks) for the [FRETBursts](http://opensmfs.github.io/FRETBursts/) burst analysis software.*
# > In this notebook, show a few example of how to export [FRETBursts](http://opensmfs.github.io/FRETBursts/)
# > burst data to a file.
#
#
# Please cite FRETBursts in publications or presentations!
#
# # Loading the software
# We start by loading **`FRETBursts`**:
# In[1]:
from fretbursts import *
# In[2]:
sns = init_notebook()
# # Downloading the data file
# The full list of smFRET measurements used in the [FRETBursts tutorials](https://github.com/OpenSMFS/FRETBursts_notebooks)
# can be found on [Figshare](http://dx.doi.org/10.6084/m9.figshare.1456362).
#
# This is the file we will download:
# In[3]:
url = 'http://files.figshare.com/2182601/0023uLRpitc_NTP_20dT_0.5GndCl.hdf5'
#
# You can change the
url
variable above to download your own data file.
# This is useful if you are executing FRETBursts online and you want to use
# your own data file. See
#
First Steps.
#
#
# Here, we download the data file and put it in a folder named `data`,
# inside the notebook folder:
# In[4]:
download_file(url, save_dir='./data')
# > **NOTE**: If you modified the `url` variable providing an invalid URL
# > the previous command will fail. In this case, edit the cell containing
# > the `url` and re-try the download.
# # Loading the data file
# Here, we can directly define the file name to be loaded:
# In[5]:
filename = "./data/0023uLRpitc_NTP_20dT_0.5GndCl.hdf5"
filename
# Let's check that the file exists:
# In[6]:
import os
if os.path.isfile(filename):
print("Perfect, file found!")
else:
print("Sorry, file:\n%s not found" % filename)
# In[7]:
d = loader.photon_hdf5(filename)
# # μs-ALEX parameters
#
# At this point, timestamps and detectors numbers are contained in the `ph_times_t` and `det_t` attributes of `d`. Let's print them:
# In[8]:
d.ph_times_t, d.det_t
# We need to define some ALEX parameters:
# In[9]:
d.add(det_donor_accept = (0, 1),
alex_period = 4000,
offset = 700,
D_ON = (2180, 3900),
A_ON = (200, 1800))
# Here the parameters are:
#
# - `det_donor_accept`: donor and acceptor channels
# - `alex_period`: length of excitation period (in timestamps units)
# - `D_ON` and `A_ON`: donor and acceptor excitation windows
# - `offset`: the offset between the start of alternation and start of timestamping
# (see also [Definition of alternation periods](http://photon-hdf5.readthedocs.org/en/latest/phdata.html#definition-of-alternation-periods)).
#
# To check that the above parameters are correct, we need to plot the histogram of timestamps (modulo the alternation period) and superimpose the two excitation period definitions to it:
# In[10]:
bpl.plot_alternation_hist(d)
# If the previous alternation histogram looks correct,
# the corresponding definitions of the excitation periods can be applied to the data using the following command:
# In[11]:
loader.alex_apply_period(d)
# If the previous histogram does not look right, the parameters in the `d.add(...)` cell can be modified and checked by running the histogram plot cell until everything looks fine. Don't forget to apply the
# parameters with `loader.usalex_apply_period(d)` as a last step.
#
# > **NOTE:** After applying the ALEX parameters a new array of
# > timestamps containing only photons inside the excitation periods
# > is created (name `d.ph_times_m`). To save memory, by default,
# > the old timestamps array (`d.ph_times_t`) is deleted. Therefore,
# > in the following, when we talk about all-photon selection we always
# > refer to all photons inside both excitation periods.
# # Background and burst search
# In[12]:
d.calc_bg(bg.exp_fit, time_s=30, tail_min_us='auto', F_bg=1.7)
# In[13]:
d.burst_search(L=10, m=10, F=6)
# First we filter the bursts to avoid creating big files:
# In[14]:
ds = d.select_bursts(select_bursts.size, th1=60)
# # Exporting Burst Data
# By burst-data we mean all the scalar burst parameters, e.g. size, duration, background, etc...
#
# We can easily get a table (a pandas DataFrame) with all the burst data as follows:
# In[15]:
bursts = bext.burst_data(ds, include_bg=True, include_ph_index=True)
bursts.head(5) # display first 5 bursts
# Once we have the DataFrame, saving it to disk in CSV format is trivial:
# In[16]:
bursts.to_csv('%s_burst_data.csv' % filename.replace('.hdf5', ''))
# # Exporting Bursts Timestamps
# A convenient way to deal with timestamps (and nanotimes if available)
# of photons inside bursts is exporting them as a DataFrame.
# The function `bext.burst_photons` ([documentation](http://fretbursts.readthedocs.io/en/latest/plugins.html#fretbursts.burstlib_ext.burst_photons))
# will export this "photon data"
# with one row per photon. The columns are `timestamp` and, if available,
# `nanotime`.
# In[17]:
burstph = bext.burst_photons(ds)
burstph.head()
# This DataFrame has a two-level index (the first two columns): one is
# the burst number and the other is the photon number within each burst.
# The photon number starts at 0 for the first photon of each burst.
#
# As with any DataFrame, saving to disk is trivial:
# In[18]:
burstph.to_csv('photon_data.csv')
# To read the data back use:
# In[19]:
import pandas as pd
# In[20]:
burstph2 = pd.read_csv('photon_data.csv', index_col=(0, 1))
burstph2.head()
# Verify the data we read is the same:
# In[21]:
(burstph2 == burstph).all()
# ## Using the timestamps DataFrame
#
# Operations on the photon-data DataFrame are very simple.
#
# ### Transform the timestamps
#
# An example of a transformation is rescaling to get timestamps in seconds:
# In[22]:
burstph['times_s'] = burstph.timestamp * d.clk_p
burstph.head()
# ### Compute burst-wise quantities
#
# We can compute burst-wise quantities in one step without looping
# using standard [pandas group-by/apply](https://pandas.pydata.org/pandas-docs/stable/groupby.html) operations.
#
# To do that, we group rows (photons) by `'burst'` and compute an arbitrary
# function on each burst (for example the mean time):
# In[23]:
burstph.groupby('burst')['times_s'].mean()
# # Another example of exporting bursts timestamps
#
# > **NOTE**: This section is provided as an example. Exporting timestamps in this way
# > is not recommended. Use the approach in the previous section instead.
# > The example here is an old example reported for educational purposes only.
# Exporting timestamps and other photon-data for each bursts is a little trickier
# because the data is less uniform (i.e. each burst has a different number of photons).
# In the following example, we will save a `csv` file with variable-length columns.
# Each burst is represented by to lines: one line for timestamps and one line for the
# photon-stream (excitation/emission period) the timestamps belongs to.
#
# Let's start by creating an array of photon streams containing
# one of the values 0, 1, 2 or 3 for each timestamp.
# These values will correspond to the DexDem, DexAem, AexDem, AemAem
# photon streams respectively.
# In[24]:
ds.A_ex
# In[25]:
#{0: DexDem, 1:DexAem, 2: AexDem, 3: AemAem}
# In[26]:
(ds.A_ex[0].view('int8') << 1) + ds.A_em[0].view('int8')
# Now we define an header documenting the file format. We will also include the
# filename of the measurement.
#
# This is just an example including nanotimes:
# In[27]:
header = """\
# BPH2CSV: %s
# Lines per burst: 3
# - timestamps (int64): in 12.5 ns units
# - nanotimes (int16): in raw TCSPC unit (3.3ps)
# - stream (uint8): the photon stream according to the mapping {0: DexDem, 1: DexAem, 2: AexDem, 3: AemAem}
""" % filename
print(header)
# And this is header we are going to use:
# In[28]:
header = """\
# BPH2CSV: %s
# Lines per burst: 2
# - timestamps (int64): in 12.5 ns units
# - stream (uint8): the photon stream according to the mapping {0: DexDem, 1: DexAem, 2: AexDem, 3: AemAem}
""" % filename
print(header)
# We can now save the data to disk:
# In[29]:
out_fname = '%s_burst_timestamps.csv' % filename.replace('.hdf5', '')
dx = ds
ich = 0
bursts = dx.mburst[ich]
timestamps = dx.ph_times_m[ich]
stream = (dx.A_ex[ich].view('int8') << 1) + dx.A_em[ich].view('int8')
with open(out_fname, 'wt') as f:
f.write(header)
for times, period in zip(bl.iter_bursts_ph(timestamps, bursts),
bl.iter_bursts_ph(stream, bursts)):
times.tofile(f, sep=',')
f.write('\n')
period.tofile(f, sep=',')
f.write('\n')
# ## Read the file back
#
# For consistency check, we can read back the data we just saved.
# As an exercise we will put the results in a pandas DataFrame
# which is more convenient than an array for holding this data.
# In[30]:
import pandas as pd
# We start reading the header and computing
# some file-specific constants.
# In[31]:
with open(out_fname) as f:
lines = []
lines.append(f.readline())
while lines[-1].startswith('#'):
lines.append(f.readline())
header = ''.join(lines[:-1])
print(header)
# In[32]:
stream_map = {0: 'DexDem', 1: 'DexAem', 2: 'AexDem', 3: 'AemAem'}
nrows = int(header.split('\n')[1].split(':')[1].strip())
header_len = len(header.split('\n')) - 1
header_len, nrows
# As a test, we load the data for the first burst into a dataframe, converting the numerical column "streams"
# into photon-stream names (strings). The new column is of type categorical, so it will take very little space:
# In[33]:
burstph = (pd.read_csv(out_fname, skiprows=header_len, nrows=nrows, header=None).T
.rename(columns={0: 'timestamp', 1: 'stream'}))
StreamCategorical = pd.api.types.CategoricalDtype(categories=['DexDem', 'DexAem', 'AexDem', 'AemAem'], ordered=True)
burstph.stream = (burstph.stream
.apply(lambda x: stream_map[pd.to_numeric(x)])
.astype(StreamCategorical))
burstph
# For reading the whole file I use a different approach. First, I load the entire file
# in two lists of lists (one for timestamps and one for the stream). Next, I create
# a single DataFrame with a third column indicating the burst index.
# In[34]:
import csv
from builtins import int # python 2 workaround, can be removed on python 3
# Read data in two list of lists
t_list, s_list = [], []
with open(out_fname) as f:
for i in range(header_len):
f.readline()
csvreader = csv.reader(f)
for row in csvreader:
t_list.append([int(v) for v in row])
s_list.append([int(v) for v in next(csvreader)])
# Turn the inner list into pandas.DataFrame
d_list = []
for ib, (t, s) in enumerate(zip(t_list, s_list)):
d_list.append(
pd.DataFrame({'timestamp': t, 'stream': s}, columns=['timestamp', 'stream'])
.assign(iburst=ib)
)
# Concatenate dataframes
burstph = pd.concat(d_list, ignore_index=True)
# Convert stream column into categorical
StreamCategorical = pd.api.types.CategoricalDtype(categories=['DexDem', 'DexAem', 'AexDem', 'AemAem'], ordered=True)
burstph.stream = (burstph.stream
.apply(lambda x: stream_map[pd.to_numeric(x)])
.astype(StreamCategorical))
burstph
# In[35]:
burstph.dtypes
# This was just an example. There are certainly more efficient ways
# to read the file into a DataFrame. Please feel free to contribute
# new methods to illustrate a different (more efficient or simpler)
# approach.