In [1]:
%pylab inline
import os
import os.path as op
import glob
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from statsmodels.formula.api import ols

# some, perhaps, relevant links/sources:
# http://stats.stackexchange.com/questions/24314/when-is-a-repeated-measures-anova-preferred-over-a-mixed-effects-model
# http://nbviewer.ipython.org/urls/umich.box.com/shared/static/6tfc1e0q6jincsv5pgfa.ipynb
# http://jpktd.blogspot.jp/2013/03/multiple-comparison-and-tukey-hsd-or_25.html
# http://statsmodels.sourceforge.net/devel/anova.html

resdir = '/Users/cplanting/ABR-vis/'
data_bera = pd.read_csv(op.join(resdir,'amp.csv'), sep=';',header=0)
print data_bera.head()

#bin data source: http://stackoverflow.com/questions/16947336/binning-a-dataframe-in-pandas-in-python
bins = np.linspace(5, 85, 9)
int_binned = data_bera.groupby(pd.cut(data_bera.intensity, bins))
def int_round(x,mult):
    #round to closest multiple of 10; 5.0 -> 0.0; 5.1 -> 10.0
    return np.round(x / mult) * mult

data_bera['intensity (dB SPL)'] = int_round(data_bera['intensity'],10.0)
g0 = sns.factorplot("intensity (dB SPL)","amplitude","stimulus_type",kind='bar',
                    ci=68,n_boot=50000, estimator=np.mean, 
                    data=tmp, size=6, palette="Set1")
Populating the interactive namespace from numpy and matplotlib
   sub_id  intensity initials     ear stimulus_type  latency  amplitude
0       1         80       JM  Rechts         Chirp     5.77        620
1       1         80       JM  Rechts        Clicks     5.57        450
2       1         80       JM   Links         Chirp     5.57        725
3       1         80       JM   Links        Clicks     5.73        500
4       1         70       JM  Rechts        Clicks     5.93        475
In [5]:
g = sns.lmplot(x="intensity (dB SPL)", y="amplitude", hue="stimulus_type", x_estimator=np.mean, data=data_bera,
               y_jitter=10,palette="Set1")
In [16]:
g = sns.lmplot(x="intensity (dB SPL)", y="latency", hue="stimulus_type", x_estimator=np.mean, data=data_bera,
               x_jitter=4,palette="Set1")
level = [10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90]
lat = [8.73,8.42,8.12,7.82,7.52,7.24,6.98,6.74,6.52,6.33,6.16,6.01,5.9,5.8,5.71,5.64,5.58]
level2 = [20,30,40,50,60,70,80,90]
lat2 = [7.51, 7.13, 6.59, 6.17, 6.01, 5.8, 5.62, 5.55]
lat2_sd = [0.42, 0.37, 0.43, 0.27, 0.27, 0.2, 0.12,0.12]
plot(level,lat,'k-',lw=3,label='Norm Click (Prosser, 1988)')
plot(level2,lat2,'g-',lw=3,label='UMCG norm Click')

lev_chirp = [10,20,30,40,60,80]
lat_chirp = [7.01, 6.42, 5.79, 5.17, 3.93, 2.85]
sd_chirp = [0.49, 0.36, 0.35, 0.40, 0.62, 0.84]
errorbar(lev_chirp,lat_chirp,yerr=sd_chirp,color='k',label='Norm Chirp (Elberling, 2010)')
legend()
g.ax.set_ylim([0,12])
Out[16]:
(0, 12)
In [11]:
model = sm.MixedLM.from_formula("amplitude ~ intensity * stimulus_type", tmp, groups=tmp["sub_id"])
result = model.fit()
print result.summary()
                       Mixed Linear Model Regression Results
==================================================================================
Model:                      MixedLM         Dependent Variable:         amplitude 
No. Observations:           291             Method:                     REML      
No. Groups:                 16              Scale:                      15421.7194
Min. group size:            3               Likelihood:                 -1834.6913
Max. group size:            29              Converged:                  Yes       
Mean group size:            18.2                                                  
----------------------------------------------------------------------------------
                                    Coef.   Std.Err.   z    P>|z|  [0.025   0.975]
----------------------------------------------------------------------------------
Intercept                            95.938   53.345  1.798 0.072   -8.616 200.492
stimulus_type[T.Clicks]            -108.567   35.500 -3.058 0.002 -178.145 -38.989
intensity                             8.538    0.471 18.112 0.000    7.614   9.462
intensity:stimulus_type[T.Clicks]    -1.245    0.683 -1.824 0.068   -2.584   0.093
Intercept RE                      36195.683  117.297                              
==================================================================================

In [28]:
model = sm.MixedLM.from_formula("latency ~ intensity * stimulus_type", tmp, groups=tmp["sub_id"])
result = model.fit()
print result.summary()
                    Mixed Linear Model Regression Results
=============================================================================
Model:                    MixedLM        Dependent Variable:        latency  
No. Observations:         291            Method:                    REML     
No. Groups:               16             Scale:                     0.1906   
Min. group size:          3              Likelihood:                -214.5807
Max. group size:          29             Converged:                 Yes      
Mean group size:          18.2                                               
-----------------------------------------------------------------------------
                                  Coef.  Std.Err.    z    P>|z| [0.025 0.975]
-----------------------------------------------------------------------------
Intercept                          9.995    0.205  48.702 0.000  9.592 10.397
stimulus_type[T.Clicks]           -0.587    0.125  -4.706 0.000 -0.832 -0.343
intensity                         -0.053    0.002 -31.782 0.000 -0.056 -0.049
intensity:stimulus_type[T.Clicks]  0.009    0.002   3.592 0.000  0.004  0.013
Intercept RE                       0.559    0.509                            
=============================================================================

In [26]:
model_anova = ols('latency ~ stimulus_type*intensity',data=tmp).fit()
table = sm.stats.anova_lm(model_anova, typ=2) # Type 2 ANOVA DataFrame
print model_anova.summary()
print table
#interestingly, by using plain ANOVA (without repeated measures), the effect dissolve in the between-subject variance! 
#therefor use MixedLM
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                latency   R-squared:                       0.561
Model:                            OLS   Adj. R-squared:                  0.556
Method:                 Least Squares   F-statistic:                     122.1
Date:                Thu, 03 Dec 2015   Prob (F-statistic):           5.73e-51
Time:                        21:36:15   Log-Likelihood:                -354.79
No. Observations:                 291   AIC:                             717.6
Df Residuals:                     287   BIC:                             732.3
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
=====================================================================================================
                                        coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------------------------
Intercept                             9.6101      0.150     64.016      0.000         9.315     9.906
stimulus_type[T.Clicks]              -0.7803      0.234     -3.329      0.001        -1.242    -0.319
intensity                            -0.0473      0.003    -15.697      0.000        -0.053    -0.041
stimulus_type[T.Clicks]:intensity     0.0131      0.005      2.918      0.004         0.004     0.022
==============================================================================
Omnibus:                       49.017   Durbin-Watson:                   0.507
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               68.644
Skew:                           1.122   Prob(JB):                     1.24e-15
Kurtosis:                       3.788   Cond. No.                         320.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
                             sum_sq   df           F        PR(>F)
stimulus_type              1.792566    1    2.635921  1.055693e-01
intensity                232.944801    1  342.539073  7.076573e-51
stimulus_type:intensity    5.791957    1    8.516917  3.797030e-03
Residual                 195.175275  287         NaN           NaN
In [22]:
spect_file = '/Users/cplanting/ProjectsScience/ABR/raw_data/spectrum_click_chirp_bone.csv'
spectrum_df = pd.read_csv(spect_file,sep=';',header=0,decimal=',')
#spectrum_df.head
Out[22]:
<bound method DataFrame.head of           f  WN  click  chirp
0      12.5  15   12.5   20.0
1      16.0   6    3.5   13.0
2      20.0   6   -3.0    0.0
3      25.0  -1   -8.5   -7.0
4      31.5  -5   -5.0   -7.0
5      40.0   2    1.5   -5.0
6      50.0   3   -7.0   -9.0
7      63.0   7  -12.0  -10.0
8      80.0  15    3.0  -10.0
9     100.0  19   -3.2  -12.0
10    125.0  25   10.5  -10.0
11    160.0  30   15.8   -9.0
12    200.0  35   20.0    0.0
13    250.0  41   26.8    9.0
14    315.0  48   33.3   30.0
15    400.0  55   41.5   43.0
16    500.0  57   43.5   46.0
17    630.0  55   40.3   43.0
18    800.0  53   38.1   40.5
19   1000.0  52   37.4   40.5
20   1250.0  52   38.2   42.0
21   1600.0  53   39.1   42.0
22   2000.0  50   36.8   40.0
23   2500.0  48   33.3   36.0
24   3150.0  47   31.0   35.0
25   4000.0  45   29.5   35.0
26   5000.0  36   19.0   24.0
27   6300.0  30   10.5   17.0
28   8000.0  27    6.0   15.0
29  10000.0  24    5.5   12.0
30  12500.0  16    6.5    7.0
31  16000.0   9    7.5    7.5
32  20000.0   8    8.5    8.5>
In [23]:
fig, ax = plt.subplots(figsize=(8, 6))
semilogx(spectrum_df.f,spectrum_df.WN,'k--',lw=2,label='White Noise')
semilogx(spectrum_df.f,spectrum_df.click,'b-',lw=3,label='click')
semilogx(spectrum_df.f,spectrum_df.chirp,'r-',lw=3,label='chirp')
ax.set_xlabel('Frequency (Hz)')
ax.set_ylabel('dB re 1 muN')
plt.legend()
Out[23]:
<matplotlib.legend.Legend at 0x10b51e1d0>
In [4]:
# now same but with individual point
g = sns.lmplot(x="intensity (dB SPL)", y="amplitude", hue="stimulus_type", order=1,data=data_bera,
               y_jitter=10,palette="Set1")
In [3]:
g = sns.lmplot(x="intensity (dB SPL)", y="latency", hue="stimulus_type", order=2,data=data_bera,
               y_jitter=1,palette="Set1")

#Prosser Arslan 1998
level = [10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90]
lat = [8.73,8.42,8.12,7.82,7.52,7.24,6.98,6.74,6.52,6.33,6.16,6.01,5.9,5.8,5.71,5.64,5.58]
level2 = [20,30,40,50,60,70,80,90]

#http://www.baaudiology.org/files/9313/8597/9894/R_04_Abeir_Osman_-_Characteristics_of_Auditory_Brainstem_Response.pdf
lat2 = [7.51, 7.13, 6.59, 6.17, 6.01, 5.8, 5.62, 5.55]
lat2_sd = [0.42, 0.37, 0.43, 0.27, 0.27, 0.2, 0.12,0.12]
plot(level,lat,'k-',lw=3,label='Prosser, 1988')
Out[3]:
[<matplotlib.lines.Line2D at 0x103e5b950>]