import os
import pandas as pd
import numpy as np
%load_ext rpy2.ipython
%R library(IAT)
array(['IAT', 'tools', 'stats', 'graphics', 'grDevices', 'utils', 'datasets', 'methods', 'base'], dtype='<U9')
import pyiat
Example data from the Death Implicit Association Test
Nock, M.K., Park, J.M., Finn, C.T., Deliberto, T.L., Dour, H.J., & Banaji, M.R. (2010). Measuring the suicidal mind: Implicit cognition predicts suicidal behavior. Psychological Science, 21(4), 511–517. https://doi.org/10.1177/0956797610364762
pyiat will work with any IAT data.
d=pd.read_csv('iat_data.csv',index_col=0)
d.head()
block | condition | trial_word | latency | errors | correct | response | subjnum | |
---|---|---|---|---|---|---|---|---|
0 | 0 | Death,Life | Dead | 809 | 0 | 1 | Death | 1 |
1 | 0 | Death,Life | Deceased | 852 | 0 | 1 | Death | 1 |
2 | 0 | Death,Life | Alive | 606 | 0 | 1 | Life | 1 |
3 | 0 | Death,Life | Living | 420 | 0 | 1 | Life | 1 |
4 | 0 | Death,Life | Suicide | 1347 | 0 | 1 | Death | 1 |
#Number of trials per subject
#Note that Subject 1 has too few trials
d.groupby('subjnum').subjnum.count().head()
subjnum 1 40 2 200 3 200 4 200 5 200 Name: subjnum, dtype: int64
#Number of subjects in this data set
d.subjnum.unique()
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21])
#Conditions
d.condition.unique()
array(['Death,Life', 'Not Me,Me', 'Life,Death', 'Life/Not Me,Death/Me', 'Death/Not Me,Life/Me'], dtype=object)
#Blocks
d.block.unique()
array([0, 1, 2, 3, 4, 5, 6])
#Correct coded as 1, errors coded as 0 in correct column
d.correct.unique()
array([1, 0])
Blocks 0,1 & 4 - which contain conditions 'Death,Life', 'Not Me,Me', 'Life,Death' are practice blocks, meaning they do not contain relevant data because they do not contrast the different categories.
Therefore, we will enter blocks 2,3,5,6 and conditions 'Life/Not Me,Death/Me', 'Death/Not Me,Life/Me' into analyze_iat.
We are entering the "correct" column, which contains 1 for correct and 0 for errors. We could enter the "errors" column and then just set the error_or_correct argument to 'error.'
Finally, we have the option to return the total number and percentage of trials that are removed because they are either too fast (default : 400ms) or too slow (default : 10000ms). This will return the number and percentage across all subjects and across just subjects that do not receive a flag indicating they had poor performance on some metric.
d1,fs1=pyiat.analyze_iat(d,subject='subjnum',rt='latency',condition='condition',correct='correct'\
,cond1='Death/Not Me,Life/Me',cond2='Life/Not Me,Death/Me'\
,block='block',blocks=[2,3,5,6],fastslow_stats=True)
d1.iloc[:,0:14].head()
overall_num_trls_incl_fastslow_rt | Death/Not Me,Life/Me_num_trls_incl_fastslow_rt | Life/Not Me,Death/Me_num_trls_incl_fastslow_rt | Death/Not Me,Life/Me_bl1_num_trls_incl_fastslow_rt | Life/Not Me,Death/Me_bl1_num_trls_incl_fastslow_rt | Death/Not Me,Life/Me_bl2_num_trls_incl_fastslow_rt | Life/Not Me,Death/Me_bl2_num_trls_incl_fastslow_rt | overall_num_trls_excl_fastslow_rt | Death/Not Me,Life/Me_num_trls_excl_fastslow_rt | Life/Not Me,Death/Me_num_trls_excl_fastslow_rt | Death/Not Me,Life/Me_bl1_num_trls_excl_fastslow_rt | Life/Not Me,Death/Me_bl1_num_trls_excl_fastslow_rt | Death/Not Me,Life/Me_bl2_num_trls_excl_fastslow_rt | Life/Not Me,Death/Me_bl2_num_trls_excl_fastslow_rt | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
subjnum | ||||||||||||||
2 | 120 | 60 | 60 | 20 | 40 | 20 | 40 | 119 | 59 | 60 | 20 | 39 | 20 | 40 |
3 | 120 | 60 | 60 | 20 | 40 | 20 | 40 | 120 | 60 | 60 | 20 | 40 | 20 | 40 |
4 | 120 | 60 | 60 | 20 | 40 | 20 | 40 | 118 | 58 | 60 | 19 | 39 | 20 | 40 |
5 | 120 | 60 | 60 | 20 | 40 | 20 | 40 | 120 | 60 | 60 | 20 | 40 | 20 | 40 |
6 | 120 | 60 | 60 | 20 | 40 | 20 | 40 | 119 | 59 | 60 | 20 | 39 | 20 | 40 |
d1.iloc[:,14:21].head()
overall_error_rate | Death/Not Me,Life/Me_error_rate | Life/Not Me,Death/Me_error_rate | Death/Not Me,Life/Me_bl1_error_rate | Life/Not Me,Death/Me_bl1_error_rate | Death/Not Me,Life/Me_bl2_error_rate | Life/Not Me,Death/Me_bl2_error_rate | |
---|---|---|---|---|---|---|---|
subjnum | |||||||
2 | 0.083333 | 0.083333 | 0.083333 | 0.10 | 0.075 | 0.05 | 0.10 |
3 | 0.058333 | 0.033333 | 0.083333 | 0.05 | 0.025 | 0.05 | 0.10 |
4 | 0.041667 | 0.050000 | 0.033333 | 0.00 | 0.075 | 0.00 | 0.05 |
5 | 0.125000 | 0.066667 | 0.183333 | 0.05 | 0.075 | 0.15 | 0.20 |
6 | 0.150000 | 0.100000 | 0.200000 | 0.05 | 0.125 | 0.20 | 0.20 |
d1.iloc[:,21:28].head()
overall_fast_rt_rate_400ms | Death/Not Me,Life/Me_fast_rt_rate_400ms | Life/Not Me,Death/Me_fast_rt_rate_400ms | Death/Not Me,Life/Me_bl1_fast_rt_rate_400ms | Life/Not Me,Death/Me_bl1_fast_rt_rate_400ms | Death/Not Me,Life/Me_bl2_fast_rt_rate_400ms | Life/Not Me,Death/Me_bl2_fast_rt_rate_400ms | |
---|---|---|---|---|---|---|---|
subjnum | |||||||
2 | 0.008333 | 0.016667 | 0.0 | 0.00 | 0.025 | 0.0 | 0.0 |
3 | 0.000000 | 0.000000 | 0.0 | 0.00 | 0.000 | 0.0 | 0.0 |
4 | 0.016667 | 0.033333 | 0.0 | 0.05 | 0.025 | 0.0 | 0.0 |
5 | 0.000000 | 0.000000 | 0.0 | 0.00 | 0.000 | 0.0 | 0.0 |
6 | 0.008333 | 0.016667 | 0.0 | 0.00 | 0.025 | 0.0 | 0.0 |
d1.iloc[:,28:35].head()
overall_slow_rt_rate_10000ms | Death/Not Me,Life/Me_slow_rt_rate_10000ms | Life/Not Me,Death/Me_slow_rt_rate_10000ms | Death/Not Me,Life/Me_bl1_slow_rt_rate_10000ms | Life/Not Me,Death/Me_bl1_slow_rt_rate_10000ms | Death/Not Me,Life/Me_bl2_slow_rt_rate_10000ms | Life/Not Me,Death/Me_bl2_slow_rt_rate_10000ms | |
---|---|---|---|---|---|---|---|
subjnum | |||||||
2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
d1.iloc[:,35].to_frame().head()
num_blocks | |
---|---|
subjnum | |
2 | 4 |
3 | 4 |
4 | 4 |
5 | 4 |
6 | 4 |
d1.iloc[:,36:58].head()
overall_error_rate_flag | Death/Not Me,Life/Me_error_rate_flag | Life/Not Me,Death/Me_error_rate_flag | Death/Not Me,Life/Me_bl1_error_rate_flag | Life/Not Me,Death/Me_bl1_error_rate_flag | Death/Not Me,Life/Me_bl2_error_rate_flag | Life/Not Me,Death/Me_bl2_error_rate_flag | overall_fast_rt_rate_400ms_flag | Death/Not Me,Life/Me_fast_rt_rate_400ms_flag | Life/Not Me,Death/Me_fast_rt_rate_400ms_flag | ... | Death/Not Me,Life/Me_bl2_fast_rt_rate_400ms_flag | Life/Not Me,Death/Me_bl2_fast_rt_rate_400ms_flag | overall_slow_rt_rate_10000ms_flag | Death/Not Me,Life/Me_slow_rt_rate_10000ms_flag | Life/Not Me,Death/Me_slow_rt_rate_10000ms_flag | Death/Not Me,Life/Me_bl1_slow_rt_rate_10000ms_flag | Life/Not Me,Death/Me_bl1_slow_rt_rate_10000ms_flag | Death/Not Me,Life/Me_bl2_slow_rt_rate_10000ms_flag | Life/Not Me,Death/Me_bl2_slow_rt_rate_10000ms_flag | num_blocks_flag | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
subjnum | |||||||||||||||||||||
2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 22 columns
d1.iloc[:,58].to_frame().head()
iat_flag | |
---|---|
subjnum | |
2 | 0 |
3 | 0 |
4 | 0 |
5 | 0 |
6 | 0 |
d1.iloc[:,59:62].head()
dscore1 | dscore2 | dscore | |
---|---|---|---|
subjnum | |||
2 | -0.664655 | 0.055078 | -0.304788 |
3 | 0.535351 | -0.305440 | 0.114955 |
4 | -0.074985 | 0.052010 | -0.011487 |
5 | 0.361880 | 0.032090 | 0.196985 |
6 | -0.035555 | -0.212646 | -0.124100 |
#Prepare data to enter into r package - need to have blocks be a string and need to divide data into 2 separate
#dataframes for people that received "Death,Me" first and for those that received "Life,Me" first.
d['block_str']=d.block.astype(str)
d1_r_subn=d[(d.condition=='Death/Not Me,Life/Me')&(d.block>4)].subjnum.unique()
d1_r=d[d.subjnum.isin(d1_r_subn)]
d2_r_subn=d[(d.condition=='Life/Not Me,Death/Me')&(d.block>4)].subjnum.unique()
d2_r=d[d.subjnum.isin(d2_r_subn)]
%R -i d1_r
%R -i d2_r
%%R
dscore_first <- cleanIAT(my_data = d1_r,
block_name = "block_str",
trial_blocks = c("2","3", "5", "6"),
session_id = "subjnum",
trial_latency = "latency",
trial_error = "errors",
v_error = 1, v_extreme = 2, v_std = 1)
dscore_second <- cleanIAT(my_data = d2_r,
block_name = "block_str",
trial_blocks = c("2","3", "5", "6"),
session_id = "subjnum",
trial_latency = "latency",
trial_error = "errors",
v_error = 1, v_extreme = 2, v_std = 1)
r_dsc <- rbind(dscore_first, dscore_second)
%R -o dscore_first
%R -o dscore_second
#Then we need to combine the separate dataframes
#One of these the scores are flipped so need to flip back
dscore_second.IAT=dscore_second.IAT*-1
iat_r_dsc=pd.concat([dscore_first,dscore_second])
iat_r_dsc.index=iat_r_dsc.subjnum
iat_r_dsc=iat_r_dsc.sort_index()
py_r_iat=pd.concat([d1.dscore,iat_r_dsc.IAT],axis=1)
py_r_iat.head()
dscore | IAT | |
---|---|---|
subjnum | ||
2 | -0.304788 | -0.304788 |
3 | 0.114955 | 0.114955 |
4 | -0.011487 | -0.011487 |
5 | 0.196985 | 0.196985 |
6 | -0.124100 | -0.124100 |
#Correlation between pyiat (dscore) and R package (IAT) = 1
py_r_iat.corr()
dscore | IAT | |
---|---|---|
dscore | 1.0 | 1.0 |
IAT | 1.0 | 1.0 |
fs1
fast_slow_rt | |
---|---|
fast_rt_count_all_subs | 28.000000 |
fast_rt_pct_all_subs | 0.012281 |
slow_rt_count_all_subs | 0.000000 |
slow_rt_pct_all_subs | 0.000000 |
fast_rt_count_included_subs | 26.000000 |
fast_rt_pct_included_subs | 0.012037 |
slow_rt_count_included_subs | 0.000000 |
slow_rt_pct_included_subs | 0.000000 |
d2,fs2=pyiat.analyze_iat(d,subject='subjnum',rt='latency',condition='condition',correct='correct'\
,cond1='Death/Not Me,Life/Me',cond2='Life/Not Me,Death/Me'\
,block='block',blocks=[2,3,5,6],fastslow_stats=True,each_stim=True,stimulus='trial_word')
d2.iloc[:,59:].head()
Alive | Breathing | Dead | Deceased | Die | I | Living | Mine | Myself | Other | Self | Suicide | Their | Them | They | Thrive | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
subjnum | ||||||||||||||||
2 | -0.985360 | 0.888583 | -0.928888 | 0.849257 | -0.583566 | 0.095183 | 0.912821 | -0.271138 | -0.582636 | -0.656637 | -0.915718 | 1.180729 | -0.260372 | 0.409289 | 0.346441 | -0.886016 |
3 | -0.228326 | -1.093680 | 0.546770 | -0.725671 | -0.674954 | 0.234189 | 0.124746 | 0.601094 | 0.263791 | 0.626292 | 0.204474 | 1.102111 | -0.066274 | 0.106796 | 0.528139 | 0.376461 |
4 | 1.287682 | 1.170938 | -0.084933 | 0.705147 | 0.084087 | -1.290731 | -0.473819 | NaN | -1.231712 | -0.134547 | -0.027644 | 0.269901 | 0.797983 | -0.611150 | 1.367861 | 0.225041 |
5 | 0.670642 | -1.481677 | -0.131380 | 0.218148 | -0.085340 | -0.711548 | 0.285979 | 0.043037 | -0.705098 | -0.186102 | 1.121443 | 1.547784 | 0.703224 | -0.342463 | -1.156475 | -0.176667 |
6 | 0.331365 | 0.955485 | 1.175333 | -1.099263 | 0.200367 | 0.176193 | -1.653920 | 0.317128 | -1.264377 | -0.748313 | -0.923371 | 1.169169 | -1.157834 | 0.025647 | -0.730981 | 1.373918 |
The unweighted algorithm does not require the 'block' or 'blocks' arguments.
d3,fs3=pyiat.analyze_iat(d,subject='subjnum',rt='latency',condition='condition',correct='correct'\
,cond1='Death/Not Me,Life/Me',cond2='Life/Not Me,Death/Me'\
,fastslow_stats=True,weighted=False)
d3.iloc[:,24:].head()
iat_flag | dscore | |
---|---|---|
subjnum | ||
2 | 0 | -0.186741 |
3 | 0 | -0.028275 |
4 | 0 | 0.014719 |
5 | 0 | 0.134814 |
6 | 0 | -0.169741 |
d4,fs4=pyiat.analyze_iat(d,subject='subjnum',rt='latency',condition='condition',correct='correct'\
,cond1='Death/Not Me,Life/Me',cond2='Life/Not Me,Death/Me'\
,fastslow_stats=True,each_stim=True,stimulus='trial_word',weighted=False)
d4.iloc[:,26:].head()
Breathing | Dead | Deceased | Die | I | Living | Mine | Myself | Other | Self | Suicide | Their | Them | They | Thrive | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
subjnum | |||||||||||||||
2 | 0.255883 | -1.014222 | 0.509232 | -0.390387 | 1.295332 | 0.737379 | 0.339328 | -0.741623 | -0.449167 | -0.959577 | 0.780971 | -0.360803 | -0.347474 | 0.392907 | -0.369755 |
3 | -0.737611 | 0.064561 | -1.067423 | -0.224777 | -0.587997 | -0.678621 | 0.402110 | -0.183775 | 0.587851 | 0.219246 | 0.949326 | 0.718642 | 1.212748 | 0.437229 | 0.442829 |
4 | 0.875262 | -0.095717 | 0.501377 | -0.825456 | -1.028155 | 0.087735 | 0.979749 | -0.947307 | -0.509266 | 0.937908 | -0.645969 | 0.368724 | 0.017880 | 0.718572 | -0.223557 |
5 | -1.503671 | 0.865254 | -0.596999 | 0.793221 | -0.866229 | -0.337516 | 0.981948 | 0.018283 | -0.019362 | 0.679210 | 1.026762 | 0.506938 | 0.476360 | -0.821311 | -0.582995 |
6 | 0.789436 | 0.954122 | -1.141469 | 0.270374 | -0.756038 | -1.658917 | 0.891638 | -0.968083 | -0.980864 | -0.731248 | 1.212799 | -0.793774 | -0.172779 | -0.712242 | 1.247653 |
pyiat can also produce D scores and poor performance flags for the Brief IAT (BIAT), according to Nosek, et al., 2014 scoring procedures (trials greater than 2 sec are changed to 2 sec, trials less than 400ms are changed to 400ms), with some options. The first x (default 4) trials of each block can be removed or not or you can determine how many trials should be removed from the beginning of each block, if any. You can set the pct flags. One issue with BIAT flags in pyiat is that currently flags for fast and slow trials use the same cutoff pct. Recommended scoring procedures (Nosek et al. 2014) recommend a flag for fast trials but not slow. This is not currently possible in pyiat. However, you can see the pct of slow and fast trials and create your own flags from this information.
The BIAT code can take either 2, 4, or 6 blocks (depending what is listed in the blocks argument) and will dynamically adjust to however many blocks it is given.
Results of pyiat BIAT D scores were checked against D scores sent to me from U of Virginia.
bd=pd.read_csv('biat_data.csv',index_col=0)
bd.head()
block_num | pair | trl_number | word | resp | RT | errors | subn | |
---|---|---|---|---|---|---|---|---|
20 | 0 | (unnamed)/Life,Me/Death | 0 | Die | Me/Death | 771 | 0 | 1 |
21 | 0 | (unnamed)/Life,Me/Death | 1 | Suicide | Me/Death | 393 | 0 | 1 |
22 | 0 | (unnamed)/Life,Me/Death | 2 | Alive | (unnamed)/Life | 859 | 0 | 1 |
23 | 0 | (unnamed)/Life,Me/Death | 3 | Thrive | (unnamed)/Life | 809 | 0 | 1 |
24 | 0 | (unnamed)/Life,Me/Death | 4 | Living | (unnamed)/Life | 585 | 0 | 1 |
biatd1,biatfsl=pyiat.analyze_iat(bd,subject='subn',rt='RT',condition='pair',\
correct='errors',error_or_correct='error'\
,cond2='(unnamed)/Death,Me/Life',cond1='(unnamed)/Life,Me/Death'\
,block='block_num',blocks=[0, 1, 2, 3,4,5],biat=True,biat_rmv_xtrls=4,biat_trl_num='trl_number',fastslow_stats=True)
biatd1.iloc[:,-5:].head()
iat_flag | dscore1 | dscore2 | dscore3 | dscore | |
---|---|---|---|---|---|
subn | |||||
1 | 2 | 0.390122 | NaN | NaN | NaN |
2 | 0 | -0.224629 | -0.566525 | -0.306011 | -0.365722 |
3 | 2 | 0.367767 | -0.031676 | -0.279295 | 0.018932 |
4 | 4 | -0.288485 | 0.420364 | -0.402823 | -0.090315 |
5 | 1 | -0.616905 | 0.714779 | NaN | NaN |
D scores can be obtained for each word in the BIAT as well but if you choose weighted, it will result in odd\repeated values, assumedly b/c words are presented a single time in a block.
biatd1stim,biatfslstim=pyiat.analyze_iat(bd,subject='subn',rt='RT',condition='pair',\
correct='errors',error_or_correct='error'\
,cond2='(unnamed)/Death,Me/Life',cond1='(unnamed)/Life,Me/Death'\
,block='block_num',blocks=[0, 1, 2, 3,4,5],biat=True,biat_rmv_xtrls=4,\
biat_trl_num='trl_number',fastslow_stats=True,each_stim=True,stimulus='word',weighted=False)
#The first subject had only one block and you can see the repeated numbers
biatd1stim.iloc[:,-15:].head()
Breathing | Dead | Deceased | Die | I | Living | Mine | Myself | Other | Self | Suicide | Their | Them | They | Thrive | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
subn | |||||||||||||||
1 | 1.414214 | 1.414214 | 1.414214 | 1.414214 | -1.414214 | 1.414214 | 1.414214 | -1.414214 | -1.414214 | 1.414214 | 1.414214 | -1.414214 | 1.414214 | 1.414214 | -1.414214 |
2 | -0.856718 | 0.581147 | -0.721409 | -0.137757 | -1.248524 | -1.285648 | 0.077723 | 0.234020 | 0.830699 | 0.246106 | 0.283150 | -0.460540 | -1.554008 | -0.419208 | -1.388049 |
3 | -1.193973 | 0.638540 | 0.026860 | -0.483864 | -0.771323 | 1.165362 | 0.273211 | -0.869386 | -0.863290 | -0.675966 | 0.481588 | 0.292579 | 0.312901 | -0.940451 | 0.669422 |
4 | -0.424451 | 0.612571 | -0.386662 | -0.720745 | -0.253506 | -0.461442 | -0.387281 | 1.044736 | 0.304453 | -1.085809 | -0.314566 | 0.220312 | 0.074056 | 0.112152 | 0.327524 |
5 | -0.987824 | 0.933022 | 1.239651 | 0.241605 | 1.054212 | 1.450163 | -0.970743 | 0.835395 | -0.417985 | -1.134178 | 0.518189 | 0.993871 | 0.608800 | -1.163136 | 1.319345 |