#!/usr/bin/env python # coding: utf-8 # We are looking for a sample size such that the probability that the effect size is greater than zero in 95%. # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import pymc3 as pm import matplotlib.pylab as plt # In[185]: titers = np.arange(10, 320) # In[186]: invlogit = lambda x: 1/(1+np.exp(-x)) logit = lambda p: np.log(p/(1-p)) # Find parameters that result in neutralization of 80 # In[187]: β_1_true = np.array([3, -0.0323]) p1 = invlogit(β_1_true[0] + β_1_true[1]*titers) (logit(0.6) - β_1_true[0]) / β_1_true[1] # Find parameters that result in neutralization of 160 # In[188]: β_2_true = np.array([3, -0.0162]) p2 = invlogit(β_2_true[0] + β_2_true[1]*titers) (logit(0.6) - β_2_true[0]) / β_2_true[1] # This is what the "true" curves look like for both groups # In[190]: plt.plot(titers, p1, label='80') plt.plot(titers, p2, label='160') plt.ylim(0, 1) plt.xlabel('dilution') plt.ylabel('% neutralization') plt.legend() # These parameters can then be used to simulate data: # In[203]: dilutions = np.array([10, 20, 40, 80, 160, 320, 640]) n = 30 x1 = np.random.binomial(n, invlogit(β_1_true[0] + β_1_true[1]*dilutions)) x2 = np.random.binomial(n, invlogit(β_2_true[0] + β_2_true[1]*dilutions)) # Here is one simulated dataset: # In[204]: plt.plot(dilutions, x1/n, 'bo', label='group 1') plt.plot(dilutions, x2/n, 'ro', label='group 2') plt.ylim(0, 1) plt.xlabel('dilution') plt.ylabel('% neutralization') plt.legend() # Now, lets simulate 100 datasets, and see how often we are able to detect a difference, where "detect a difference" means that the probability of group 2 being greater than group 1 is greater than 95%. # In[217]: n_sim = 100 probs = np.empty(n_sim) for sim in range(n_sim): # Simulate data x1 = np.random.binomial(n, invlogit(β_1_true[0] + β_1_true[1]*dilutions)) x2 = np.random.binomial(n, invlogit(β_2_true[0] + β_2_true[1]*dilutions)) # Estimate model with pm.Model() as mod: β1 = pm.Normal('β1', 0, sd=10, shape=2) β2 = pm.Normal('β2', 0, sd=10, shape=2) # Number of times β1 is greater than β2 (i.e. false positive) p = pm.Deterministic('p', β1[1]>β2[1]) pm.Binomial('x1', n=30, p=invlogit(β1[0] + β1[1]*dilutions), observed=x1) pm.Binomial('x2', n=30, p=invlogit(β2[0] + β2[1]*dilutions), observed=x2) # Run model tr = pm.sample(2000, init=None, step=pm.NUTS(), progressbar=None) # Tally count probs[sim] = tr['p', 1000:].mean() # Here is the estimated power, based on these simulations: # In[219]: (probs<0.05).mean()