This notebook is part of PyBroMo a python-based single-molecule Brownian motion diffusion simulator that simulates confocal smFRET experiments.
In this notebook we show how to generated smFRET data files from raw timestamps.
%matplotlib inline
from pathlib import Path
import numpy as np
import tables
import matplotlib.pyplot as plt
import seaborn as sns
import pybromo as pbm
print('Numpy version:', np.__version__)
print('PyTables version:', tables.__version__)
print('PyBroMo version:', pbm.__version__)
In this section we show how to save a single smFRET data file. In the next section we will perform the same steps in a loop to generate a sequence of smFRET data files.
The start by loading the timestamps for donor and acceptor channel. The FRET efficiency is determined by the max emission rate ratio (k). We also need to choose the background rate.
As a memo, let's write some formulas related to the FRET efficiency:
$$ k = \frac{F_a}{F_d} \quad,\qquad E = \frac{k}{k+1} \qquad\Rightarrow\qquad k = \frac{E}{1-E}$$S = pbm.ParticlesSimulation.from_datafile('0168', mode='w')
#S = pbm.ParticlesSimulation.from_datafile('0168', mode)
def em_rates_DA_from_E(em_rate_tot, E_values):
E_values = np.asarray(E_values)
em_rates_a = E_values * em_rate_tot
em_rates_d = em_rate_tot - em_rates_a
return em_rates_d, em_rates_a
def em_rates_from_E(em_rate_tot, E_values):
em_rates_d, em_rates_a = em_rates_DA_from_E(em_rate_tot, E_values)
return np.unique(np.hstack([em_rates_d, em_rates_a]))
em_rate_tot = 300e3
E_list = np.array([0, 0.2, 0.3, 0.4, 0.49, 0.6, 0.7, 0.8])
em_rate_list = em_rates_from_E(em_rate_tot, E_list)
em_rate_list
bg_rate_d, bg_rate_a = 900, 700
bg_rates = [bg_rate_a, bg_rate_d]
rs = np.random.RandomState(123)
for bg in bg_rates:
for em_rate in em_rate_list:
print("- Simulating timestamps @%3d kcps, background %.1f kcps" %(
em_rate*1e-3, bg*1e-3), flush=True)
S.simulate_timestamps_mix(max_rates=(em_rate,), populations=(slice(0, 20),),
bg_rate=bg, rs=rs, overwrite=True)
for k in S.ts_store.h5file.root.timestamps._v_children.keys():
if not k.endswith('_par'):
print(k)
E_sim = 0.49
em_rate_d, em_rate_a = em_rates_DA_from_E(em_rate_tot, E_sim)
em_rate_d, em_rate_a
donor_names = S.timestamps_match_mix((em_rate_d,), populations=(slice(0, 20),), bg_rate=bg_rate_d)
donor_names
acceptor_names = S.timestamps_match_mix((em_rate_a,), populations=(slice(0, 20),), bg_rate=bg_rate_a)
acceptor_names
ts_d, ts_par_d = S.get_timestamps_part(donor_names[0])
ts_a, ts_par_a = S.get_timestamps_part(acceptor_names[0])
ts_d.attrs['clk_p']
Now, we need to create a single array with donor + acceptor timestamps:
ts, a_ch, part = pbm.timestamps.merge_da(ts_d, ts_par_d, ts_a, ts_par_a)
ts.shape, a_ch.shape, part
Perform some safety checks and plot:
assert a_ch.sum() == ts_a.shape[0]
assert (-a_ch).sum() == ts_d.shape[0]
assert a_ch.size == ts_a.shape[0] + ts_d.shape[0]
plt.plot(ts)
bins = np.arange(0, 1, 1e-3)
plt.hist(ts*ts_d.attrs['clk_p'], bins=bins, histtype='step');
bins = np.arange(0, 1, 1e-3)
counts_d, _ = np.histogram(ts[~a_ch]*ts_d.attrs['clk_p'], bins=bins)
counts_a, _ = np.histogram(ts[a_ch]*ts_d.attrs['clk_p'], bins=bins)
plt.plot(bins[:-1], counts_d, 'g')
plt.plot(bins[:-1], -counts_a, 'r')
To save the data in Photon-HDF5 format we use the library phconvert:
import phconvert as phc
print('Phconvert version: ', phc.__version__)
We neeed a file name. We could use a random name, but it is better to generate it programmatically, by joining the filename of the browniam motion simulation with specific FRET simulation info:
fret_string = '_E%03d_EmD%dk_EmA%03dk_BgD%d_BgA%d' %\
(E_sim*100, em_rate_d*1e-3, em_rate_a*1e-3,
bg_rate_d, bg_rate_a)
fret_string
filename_smfret = S.store.filepath.stem.replace('pybromo', 'smFRET') + fret_string + '.hdf5'
filename_smfret
fret_sim_fname = Path(filename_smfret)
fret_sim_fname
# inputs: E_sim, ts, a_ch, ts_d (clk_p), S.ts_store.filename, S.t_max
photon_data = dict(
timestamps = ts,
timestamps_specs = dict(timestamps_unit=ts_d.attrs['clk_p']),
detectors = a_ch,
measurement_specs = dict(
measurement_type = 'smFRET',
detectors_specs = dict(spectral_ch1 = np.atleast_1d(False),
spectral_ch2 = np.atleast_1d(True))))
setup = dict(
num_pixels = 2,
num_spots = 1,
num_spectral_ch = 2,
num_polarization_ch = 1,
num_split_ch = 1,
modulated_excitation = False,
lifetime = False)
provenance = dict(filename=S.ts_store.filename,
software='PyBroMo', software_version=pbm.__version__)
identity = dict(
author='Author Name',
author_affiliation='Research Institution or Company')
description = 'Simulated freely-diffusing smFRET experiment, E = %.2f%%' % E_sim
acquisition_duration = S.t_max
data = dict(
acquisition_duration = round(acquisition_duration),
description = description,
photon_data = photon_data,
setup=setup,
provenance=provenance,
identity=identity)
phc.hdf5.save_photon_hdf5(data, h5_fname=str(fret_sim_fname), overwrite=True)
h5file = tables.open_file(str(fret_sim_fname))
phc.hdf5.print_children(h5file.root.photon_data)
h5file.close()
We have seen how to create a single smFRET file. In this section we generate a sequence of smFRET files for different FRET efficiencies.
def make_photon_hdf5(ts, a_ch, clk_p, E_sim):
# globals: S.ts_store.filename, S.t_max
photon_data = dict(
timestamps = ts,
timestamps_specs = dict(timestamps_unit=clk_p),#ts_d.attrs['clk_p']),
detectors = a_ch,
measurement_specs = dict(
measurement_type = 'smFRET',
detectors_specs = dict(spectral_ch1 = np.atleast_1d(False),
spectral_ch2 = np.atleast_1d(True))))
setup = dict(
num_pixels = 2,
num_spots = 1,
num_spectral_ch = 2,
num_polarization_ch = 1,
num_split_ch = 1,
modulated_excitation = False,
lifetime = False)
provenance = dict(filename=S.ts_store.filename,
software='PyBroMo', software_version=pbm.__version__)
identity = dict(
author='Author Name',
author_affiliation='Research Institution or Company')
description = 'Simulated freely-diffusing smFRET experiment, E = %.2f%%' % E_sim
acquisition_duration = S.t_max
data = dict(
acquisition_duration = round(acquisition_duration),
description = description,
photon_data = photon_data,
setup=setup,
provenance=provenance,
identity=identity)
return data
E_list
em_rates_d, em_rates_a = em_rates_DA_from_E(em_rate_tot, E_list)
em_rates_d, em_rates_a
%%timeit -n1 -r1
for E_sim, em_d, em_a in zip(E_list, em_rates_d, em_rates_a):
print('E = %d%%, em_d = %6.1f, em_a = %6.1f' % \
(E_sim*100, em_d, em_a))
# Build the file name
fret_string = '_E%03d_EmD%dk_EmA%03dk_BgD%d_BgA%d' %\
(E_sim*100, em_rate_d*1e-3, em_rate_a*1e-3,
bg_rate_d, bg_rate_a)
filename_smfret = S.store.filepath.stem.replace('pybromo', 'smFRET') + fret_string + '.hdf5'
fret_sim_fname = Path(filename_smfret)
# Merge D and A timestamps
donor_name = S.timestamps_match_mix((em_rate_d,), populations=(slice(0, 20),), bg_rate=bg_rate_d)[0]
accept_name = S.timestamps_match_mix((em_rate_a,), populations=(slice(0, 20),), bg_rate=bg_rate_a)[0]
ts_d, ts_par_d = S.get_timestamps_part(donor_name)
ts_a, ts_par_a = S.get_timestamps_part(accept_name)
ts, a_ch, ts_part = pbm.timestamps.merge_da(ts_d, ts_par_d, ts_a, ts_par_a)
assert a_ch.sum() == ts_a.shape[0]
assert (-a_ch).sum() == ts_d.shape[0]
assert a_ch.size == ts_a.shape[0] + ts_d.shape[0]
# Save to Photon-HDF5
data = make_photon_hdf5(ts, a_ch, ts_d.attrs['clk_p'], E_sim)
phc.hdf5.save_photon_hdf5(data, h5_fname=str(fret_sim_fname), overwrite=True)
S.ts_store.close()
As a final check we analyze the created files with FRETBursts smFRET burst analysis program.
import fretbursts as fb
filepath = list(Path('./').glob('smFRET_016*E020*'))[0]
str(filepath)
d = fb.loader.photon_hdf5(str(filepath))
d
d.A_em
fb.dplot(d, fb.timetrace);
d.calc_bg(fun=fb.bg.exp_fit, tail_min_us='auto', F_bg=1.7)
d.bg_dd, d.bg_ad
d.burst_search(F=7)
d.num_bursts
ds = d.select_bursts(fb.select_bursts.size, th1=200)
ds.num_bursts
fb.dplot(ds, fb.hist_fret)
plt.axvline(0.2);
fb.dplot(ds, fb.timetrace, bursts=True);
plt.ylim(-100, 150);
plt.xlim(0.25, 0.5);
fb.bext.burst_data(ds)