#!/usr/bin/env python
# coding: utf-8

# We are looking for a sample size such that the probability that the effect size is greater than zero in 95%.

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
import numpy as np
import pymc3 as pm
import matplotlib.pylab as plt


# In[185]:


titers = np.arange(10, 320)


# In[186]:


invlogit = lambda x: 1/(1+np.exp(-x))
logit = lambda p: np.log(p/(1-p))


# Find parameters that result in neutralization of 80

# In[187]:


β_1_true = np.array([3, -0.0323])
p1 = invlogit(β_1_true[0] + β_1_true[1]*titers)
(logit(0.6) - β_1_true[0]) / β_1_true[1]


# Find parameters that result in neutralization of 160

# In[188]:


β_2_true = np.array([3, -0.0162])
p2 = invlogit(β_2_true[0] + β_2_true[1]*titers)
(logit(0.6) - β_2_true[0]) / β_2_true[1]


# This is what the "true" curves look like for both groups

# In[190]:


plt.plot(titers, p1, label='80')
plt.plot(titers, p2, label='160')
plt.ylim(0, 1)
plt.xlabel('dilution')
plt.ylabel('% neutralization')
plt.legend()


# These parameters can then be used to simulate data:

# In[203]:


dilutions = np.array([10, 20, 40, 80, 160, 320, 640])

n = 30
x1 = np.random.binomial(n, invlogit(β_1_true[0] + β_1_true[1]*dilutions))
x2 = np.random.binomial(n, invlogit(β_2_true[0] + β_2_true[1]*dilutions))


# Here is one simulated dataset:

# In[204]:


plt.plot(dilutions, x1/n, 'bo', label='group 1')
plt.plot(dilutions, x2/n, 'ro', label='group 2')
plt.ylim(0, 1)
plt.xlabel('dilution')
plt.ylabel('% neutralization')
plt.legend()


# Now, lets simulate 100 datasets, and see how often we are able to detect a difference, where "detect a difference" means that the probability of group 2 being greater than group 1 is greater than 95%.

# In[217]:


n_sim = 100

probs = np.empty(n_sim)
for sim in range(n_sim):

    # Simulate data
    x1 = np.random.binomial(n, invlogit(β_1_true[0] + β_1_true[1]*dilutions))
    x2 = np.random.binomial(n, invlogit(β_2_true[0] + β_2_true[1]*dilutions))

    # Estimate model
    with pm.Model() as mod:

        β1 = pm.Normal('β1', 0, sd=10, shape=2)
        β2 = pm.Normal('β2', 0, sd=10, shape=2)

        # Number of times β1 is greater than β2 (i.e. false positive)
        p = pm.Deterministic('p',  β1[1]>β2[1])

        pm.Binomial('x1', n=30, p=invlogit(β1[0] + β1[1]*dilutions), observed=x1)
        pm.Binomial('x2', n=30, p=invlogit(β2[0] + β2[1]*dilutions), observed=x2)
        
        # Run model
        tr = pm.sample(2000, init=None, step=pm.NUTS(), progressbar=None)
        
        # Tally count
        probs[sim] = tr['p', 1000:].mean()


# Here is the estimated power, based on these simulations:

# In[219]:


(probs<0.05).mean()