#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('pylab', 'inline') import pandas as pd # # Logistic Regression = Binomial regression with logit function # # This notebook shows (empirically) that performing a logistic regression for # binary data is equivalent to a binomial regression with logit link. # In[23]: ungrouped_data = pd.read_csv("../data/Beetles.dat", sep="\t") ungrouped_data # In[34]: grouped_data = pd.read_csv("../data/Beetles2.dat", sep= "\t") grouped_data['alive'] = grouped_data['n'] - grouped_data['dead'] grouped_data # # Logistic # In[25]: import statsmodels.api as sm #spector_data.exog = sm.add_constant(spector_data.exog) logit_mod = sm.Logit(ungrouped_data['y'], sm.add_constant(ungrouped_data['x'])) logit_res = logit_mod.fit() # In[26]: print(logit_res.summary()) # # Binomial # In[27]: glm_model = sm.GLM(grouped_data['dead']/grouped_data['n'], sm.add_constant(grouped_data['logdose']), family=sm.families.Binomial(link=sm.families.links.logit), weights = grouped_data['n']) glm_fit = glm_model.fit() print(glm_fit.summary()) # The above is an abuse of statsmodels. I don't even know why it works. # In[35]: glm_model = sm.GLM(grouped_data[['dead', 'alive']] , sm.add_constant(grouped_data['logdose']), family=sm.families.Binomial(link=sm.families.links.logit), ) glm_fit = glm_model.fit() print(glm_fit.summary()) # In[16]: glm_model = sm.GLM(grouped_data['y'], sm.add_constant(grouped_data['x']), family=sm.families.Binomial(link=sm.families.links.logit)) glm_fit = glm_model.fit() print(glm_fit.summary()) # In[ ]: