#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
import numpy as np
import pymc3 as pm
import arviz as az
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)


# Some fake data

# In[2]:


MALE, FEMALE = 0, 1
y = np.array([34, 42])
gender = np.array([MALE, FEMALE])
N = np.array([104, 110])


# Here's the simplest model -- two independent probabilities.

# In[3]:


with pm.Model() as affection_status:
    
    π = pm.Beta('π', 1, 1, shape=2)
    δ = pm.Deterministic('δ', π[1] - π[0])
    like = pm.Binomial('like', N, π, observed=y)


# In[4]:


with affection_status:
    
    trace = pm.sample(1000, njobs=2)


# Here's the posterior distribution of the probability difference

# In[11]:


az.plot_posterior(trace, var_names=['δ'], ref_val=0);


# Paramter estimates

# In[6]:


pm.summary(trace).round(2)


# Another approach is a GLM:

# In[7]:


with pm.Model() as affection_status_glm:
    
    μ = pm.Normal('μ', 0, sd=5)
    θ = pm.Normal('θ', 0, sd=1)
    π = pm.math.invlogit(μ + θ*gender)
    like = pm.Binomial('like', N, π, observed=y)


# In[8]:


with affection_status_glm:
    
    trace_glm = pm.sample(1000, njobs=2)


# Here you get an effect size, on the logit scale (so perhaps harder to interpret)

# In[10]:


az.plot_posterior(trace_glm, var_names=['θ'], ref_val=0);