In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sb
import numpy as np
import pandas as pd
import pymc as pm
from scipy.misc import comb
In [2]:
np.random.seed(42)

Data import and cleaning

In [3]:
excel_sucks = True

if excel_sucks:

    study_char = pd.read_csv("study_characteristics.csv", 
                             index_col='RefID', na_values=['-', 'NR'])
    outcomes = pd.read_csv("outcomes.csv", na_values=['ND', 'NR'])
    
else:
    
    data_file = 'DBD Data for Meta Analyses_4-7-15_withNRCTs_RAE.xlsx'

    study_char = pd.read_excel(data_file, "Study Characteristics", 
                               index_col='RefID', na_values=['-', 'NR'])
    outcomes = pd.read_excel(data_file, "Outcomes", 
                               na_values=['ND', 'NR'])
In [4]:
study_char.tail()
Out[4]:
Pubmed ID Rel Incl1 Rel Incl2 Rel Incl3 Rel Excl Family NCT Title Year Author ... Screened (N) Randomized (N) Analyzed (N) Proportion Male (%) Direction of Effect DOE_Comments Other comments Drugs(s) - Unnamed: 46
RefID
8385 na NaN NaN NaN NaN NaN NaN Reducing Adolescent Oppositional and Conduct D... 2011 Sells SP, K. W. Early and T. E. Smith ... NaN 38 NaN 57 NaN NaN NaN NaN NaN NaN
8401 22820873 NaN NaN NaN NaN NaN NaN "Tuning into Kids": reducing young children's ... 2013 Havighurst SS, K. R. Wilson, A. E. Harley, C. ... ... 92 63 54 78 NaN NaN NaN NaN NaN NaN
NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 46 columns

Data cleaning

In [5]:
# Cast outcomes variables to floats
for col in ('Last FU Mean', 'Last FU SD',):
    outcomes[col] = outcomes[col].astype(float)
In [6]:
# Recode age category
study_char['age_cat'] = study_char.AgeCat.replace({'PRE-K':1, 'SCHOOL':0, 'TEEN':2})
In [7]:
# Fix data label typo
outcomes['Measure Instrument'] = outcomes['Measure Instrument'].replace({'Eyberg Child Behaviour Inventory, problem Subscale': 
                                        'Eyberg Child Behaviour Inventory, Problem Subscale'})
outcomes.Units = outcomes.Units.replace({'scale': 'Scale'})
In [8]:
# Parse followup times and convert to months
split_fut = outcomes.loc[outcomes['Last FU Time'].notnull(), 'Last FU Time'].apply(lambda x: str(x).split(' ')[:2])
fut_months = [float(time)/52.*(unit=='weeks') or float(time) for time, unit in split_fut]
outcomes.loc[outcomes['Last FU Time'].notnull(), 'Last FU Time'] = fut_months

We are assumung all CBC Externalizing values over 50 are T-scores, and those under 50 are raw scores. This recodes those observations.

In [9]:
cbce_ind = outcomes['Measure Instrument'].apply(lambda x: x.startswith('Child Behavior Checklist, Externalizing'))
under_50 = outcomes['BL Mean']<50
outcomes.loc[cbce_ind & (under_50^True), 'Measure Instrument'] = 'Child Behavior Checklist, Externalizing (T Score)'
outcomes.loc[cbce_ind & under_50, 'Measure Instrument'] = 'Child Behavior Checklist, Externalizing'

Recode measure instrument variables

In [10]:
instrument = []
subtype = []
units = []

for i,row in outcomes.iterrows():
    separator = row['Measure Instrument'].find(',')
    if separator == -1:
        separator = row['Measure Instrument'].find('-')
    instrument.append(row['Measure Instrument'][:separator])
    s = row['Measure Instrument'][separator+2:]
    paren = s.find('(')
    if paren > -1:
        subtype.append(s[:paren-1])
        units.append(s[paren+1:-1])
    else:
        subtype.append(s)
        if s.endswith('scale'):
            units.append('Scale')
        else:
            units.append('Score')
            
new_cols = pd.DataFrame({'instrument': instrument, 'subtype': subtype, 
                         'units': units}, index=outcomes.index)
In [11]:
outcomes['Measure Instrument'].value_counts()
Out[11]:
Eyberg Child Behaviour Inventory, Intensity Subscale            79
Eyberg Child Behaviour Inventory, Problem Subscale              57
Child Behavior Checklist, Externalizing (T Score)               46
Child Behavior Checklist, Externalizing                         11
Strengths and Difficulties Questionnaire- Conduct Problems Scale    10
Eyberg Child Behaviour Inventory, Intensity Subscale (T Score)    10
Strengths and Difficulties Questionnaire, Total Score           10
Child Behavior Checklist, Conduct Problems (T Score)             6
Child Behavior Checklist, Aggression                             6
Strengths and Difficulties Questionnaire- Emotional Symptoms Scale     4
Child Behavior Checklist, Delinquent Subscale                    4
Eyberg Child Behaviour Inventory, Problem Subscale (T Score)     4
Child Behavior Checklist, Rulebreaking                           2
Child Behavior Checklist, Social problems                        2
Strengths and Difficulties Questionnaire- Impact Score           2
Child Behavior Checklist, Conduct Problems                       2
Strengths and Difficulties Questionnaire- Hyperactivity Scale     2
Strengths and Difficulties Questionnaire, Percent Below Clinical Cutoff     2
DSM IV                                                           2
Childrens Global Assessment Scale                                2
dtype: int64
In [12]:
new_cols.head()
Out[12]:
instrument subtype units
0 Eyberg Child Behaviour Inventory Intensity Subscale T Score
1 Eyberg Child Behaviour Inventory Intensity Subscale T Score
2 Eyberg Child Behaviour Inventory Problem Subscale T Score
3 Eyberg Child Behaviour Inventory Problem Subscale T Score
4 Child Behavior Checklist Externalizing Score
In [13]:
# Append new columns
outcomes = outcomes.join(new_cols)
In [14]:
outcomes.intvn.value_counts()
Out[14]:
wlc              47
tau              39
iypt             32
pcit             16
TAU              10
iyptndiyct        8
WLC               7
pppsd             7
PCIT              5
MTP               5
mst               5
modularndn        4
iyct              4
ppcp              4
it                4
pppo              4
spokes            3
pmtndp            3
CT                3
PLL               3
pmto              3
snap              3
pcitc             3
FBT               3
pppe              3
pcitabb           2
hncte             2
pmtsd             2
hncstd            2
TIK               2
pmtpa             2
setpc             2
projndsupport     2
RST               2
pmtnds            2
itpt              1
cbt               1
sst               1
modularndcomm     1
modularndclin     1
pppstd            1
hitkashrut        1
iyptadv           1
scip              1
mcfi              1
cpp               1
hnc               1
pstnds            1
kitkashrut        1
coaching          1
dtype: int64
In [15]:
outcomes['Ref ID'].unique()
Out[15]:
array([  23,  100,  103,  371,  441,  475,  539,  564,  757,  836,  870,
        899,  993,  995, 1018, 1096, 1201, 1215, 1236, 1245, 1292, 1386,
       1511, 1521, 1585, 1875, 1951, 2092, 2117, 2219, 2239, 2347, 3211,
       3225, 3397, 3399, 3495, 3687, 3716, 3766, 3915, 3960, 7109, 7723,
       8192, 8231, 8289, 8292, 8385, 8401, 7848])

Data summaries

Cross-tabulation of the outcome counts by measure instrument

In [16]:
pd.crosstab(outcomes['instrument'], outcomes['Outcome'])
Out[16]:
Outcome 01 Behavior, disruptive 02 Behavior, aggression 06 Behavior, fighting, destruction, violation 08 Behavior, other 11 Function, interpersonal and social
instrument
Child Behavior Checklist 65 4 4 4 2
Childrens Global Assessment Scal 0 0 0 0 2
DSM I 2 0 0 0 0
Eyberg Child Behaviour Inventory 148 0 0 2 0
Strengths and Difficulties Questionnaire 10 0 0 20 0

Distribution of age categories

In [17]:
study_char.AgeCat.value_counts()
Out[17]:
SCHOOL    44
PRE-K     31
TEEN      18
dtype: int64

Frequencies of various intervention types

In [18]:
study_char['Intervention Type'].value_counts()
Out[18]:
PHARM                                                           20
IY-PT                                                           10
MST                                                              9
PCIT                                                             7
IY-PT + IY-CT                                                    4
Triple P (enhanced)                                              3
PMTO                                                             3
BSFT                                                             3
IY-PT (nurse led)                                                2
UCPP                                                             2
PT                                                               2
OTH: Intensive treatment                                         2
OTH: Modular treatment                                           2
Triple-P (self-directed)                                         2
CBT                                                              2
CPS                                                              2
PCIT-ABB                                                         2
TIK                                                              1
Family Therapy                                                   1
RST                                                              1
IY-PT + IY-CT + IY-TT                                            1
SNAP Under 12 \nOutReach Project(ORP)                            1
OTH: Modular treatment (community)                               1
PCIT (modified)                                                  1
OTH: Modular (nurse administered)                                1
SCIP (Social cognitive (Dodge's))                                1
IY-CT + IY-PT                                                    1
OTH: Family therapy                                              1
IY-PT (brief)                                                    1
OTH: Community Parent Education Program                          1
PHARM1 + PHARM2                                                  1
OTH: Booster                                                     1
OTH: Parents Plus Children's Program                             1
OTH: Project support                                             1
Coping Power (cultural adaptation)                               1
PONI                                                             1
OTH: Instrumental, emotional support & child management skills     1
SET-PC                                                           1
Triple-P (enhanced)                                              1
Triple-P (online)                                                1
Parenting Group (SPOKES)                                         1
MF-PEP + TAU                                                     1
PMT (perceptive)                                                 1
PMT (practitioner assisted)                                      1
HNC                                                              1
HNC (technology enhanced)                                        1
OTH: MTP                                                         1
SNAP Under 12 OutReach Project                                   1
PLL                                                              1
CBFI                                                             1
OTH: FFT                                                         1
IY-PT + ADVANCE                                                  1
SNAP Under 12 OutReach Project (enhanced)                        1
IY                                                               1
OTH: Child only treatment                                        1
Length: 55, dtype: int64

Extract variables of interest and merge tables

In [19]:
KQ1 = study_char[study_char.KQ=='KQ1']
In [20]:
study_varnames = ['Year', 'age_cat', 'Geographic setting', 'Age mean (years) ', 'Age SD (years)', 
              'Age min (years)', 'Age max (years)', 'Proportion Male (%)']

study_vars = KQ1[study_varnames].rename(columns={'Geographic setting': 'country', 
                           'Age mean (years) ': 'age_mean', 
                           'Age SD (years)': 'age_sd', 
                           'Age min (years)': 'age_min', 
                           'Age max (years)': 'age_max', 
                           'Proportion Male (%)': 'p_male'})
In [21]:
study_vars.head()
Out[21]:
Year age_cat country age_mean age_sd age_min age_max p_male
RefID
23 2013 1 USA 2.80 0.61 2 4 62
100 2013 2 USA 14.60 1.30 11 18 83
103 2013 1 USA 5.67 1.72 3 8 53
141 2013 0 USA 9.90 1.30 8 11 73
156 2013 2 Netherlands 16.00 1.31 12 18 73
In [22]:
study_vars.p_male.hist()
Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x11224bba8>

Proportion missing

In [23]:
study_vars.isnull().mean(0).round(2)
Out[23]:
Year        0.00
age_cat     0.00
country     0.00
age_mean    0.10
age_sd      0.19
age_min     0.06
age_max     0.07
p_male      0.02
dtype: float64

Will assume the mean age for those which are missing is simply the midpoint between minimum and maximum values

In [24]:
est_means = study_vars.apply(lambda x: x.age_min + (x.age_max - x.age_min) / 2, axis=1)[study_vars.age_mean.isnull()]
study_vars.loc[study_vars.age_mean.isnull(), 'age_mean'] = est_means

study_vars.age_mean.isnull().sum()
Out[24]:
2
In [25]:
outcomes_varnames = ['Ref ID', 'Measure Instrument', 'instrument', 'subtype', 'units', 
                     'intvn', 'cc', 'pc', 'fc',
                     'BL N', 'BL Mean', 'BL SD', 
                     'EOT \nN', 'EOT Mean', 'EOT \nSD', 'Last FU Time', 'Last FU N', 
                     'Last FU Mean', 'Last FU SD', 'CS Group N', 'CS Mean', 'CS SD']
In [26]:
outcomes_vars = outcomes[outcomes_varnames].rename(columns={'Ref ID': 'RefID', 
                                                                       'Measure Instrument': 'measure_instrument',
                                                                       'cc': 'child_component',
                                                                       'pc': 'parent_component',
                                                                       'fc': 'family_component',
                                                                       'oc': 'other_component',
                                                                       'BL N': 'baseline_n',
                                                                       'BL Mean': 'baseline_mean',
                                                                       'BL SD': 'baseline_sd', 
                                                                       'EOT \nN': 'end_treat_n', 
                                                                       'EOT Mean': 'end_treat_mean', 
                                                                       'EOT \nSD': 'end_treat_sd', 
                                                                       'Last FU Time': 'followup_time', 
                                                                       'Last FU N': 'followup_n',
                                                                       'Last FU Mean': 'followup_mean', 
                                                                       'Last FU SD': 'followup_sd', 
                                                                       'CS Group N': 'change_n',
                                                                       'CS Mean': 'change_mean',
                                                                       'CS SD': 'change_sd'})

Recode intervention clasification

In [27]:
outcomes_vars.isnull().sum()
Out[27]:
RefID                   0
measure_instrument      0
instrument              0
subtype                 0
units                   0
intvn                   0
child_component         0
parent_component        0
family_component        0
baseline_n              4
baseline_mean          17
baseline_sd            25
end_treat_n            74
end_treat_mean         67
end_treat_sd           74
followup_time          32
followup_n            103
followup_mean         118
followup_sd           126
change_n              251
change_mean           258
change_sd             263
dtype: int64
In [28]:
control = ((outcomes_vars.child_component^True) & 
           (outcomes_vars.parent_component^True) & 
           (outcomes_vars.family_component^True)).astype(int)
child_only = ((outcomes_vars.child_component) & 
              (outcomes_vars.parent_component^True) & 
              (outcomes_vars.family_component^True)).astype(int)
parent_only = ((outcomes_vars.child_component^True) & 
               (outcomes_vars.parent_component) & 
               (outcomes_vars.family_component^True)).astype(int)
outcomes_vars.ix[child_only.astype(bool), ['child_component', 'parent_component', 'family_component']]
Out[28]:
child_component parent_component family_component
125 1 0 0
126 1 0 0
128 1 0 0
129 1 0 0
162 1 0 0
188 1 0 0
194 1 0 0
195 1 0 0
234 1 0 0
236 1 0 0
238 1 0 0
In [29]:
multi_component = ((parent_only^True) & (child_only^True) & (control^True)).astype(int)

outcomes_vars['child_only'] = child_only
outcomes_vars['parent_only'] = parent_only
outcomes_vars['multi_component'] = multi_component

If requested, change PCIT to parent-only.

In [30]:
pcit_as_multicomponent = True

if not pcit_as_multicomponent:
    outcomes_vars.loc[outcomes_vars.intvn=='pcit', 'parenr_only'] = 1
    outcomes_vars.loc[outcomes_vars.intvn=='pcit', 'multi_component'] = 0

Obtain subset with non-missing EOT data

In [31]:
eot_subset = outcomes_vars[outcomes_vars.end_treat_mean.notnull() & outcomes_vars.end_treat_sd.notnull()].copy()

Calculate EOT difference

In [32]:
eot_subset['eot_diff_mean'] = eot_subset.end_treat_mean - eot_subset.baseline_mean
In [33]:
eot_subset['eot_diff_sd'] = eot_subset.baseline_sd + eot_subset.end_treat_sd
In [34]:
eot_subset['eot_diff_n'] = eot_subset[['baseline_n', 'end_treat_n']].min(1)

Distribution of baseline means among outcome metrics

In [35]:
for instrument in ('Eyberg Child Behaviour Inventory', 
                   'Child Behavior Checklist', 
                   'Strengths and Difficulties Questionnaire'):
    eot_subset[eot_subset.instrument==instrument]['baseline_mean'].hist(by=eot_subset['subtype'], 
                                                                              sharex=True)
    plt.suptitle(instrument);
In [36]:
eot_subset.instrument.value_counts()
Out[36]:
Eyberg Child Behaviour Inventory            116
Child Behavior Checklist                     48
Strengths and Difficulties Questionnaire     22
Childrens Global Assessment Scal              2
dtype: int64
In [37]:
eot_subset.instrument.value_counts()
Out[37]:
Eyberg Child Behaviour Inventory            116
Child Behavior Checklist                     48
Strengths and Difficulties Questionnaire     22
Childrens Global Assessment Scal              2
dtype: int64
In [38]:
eot_subset[eot_subset.RefID==441]
Out[38]:
RefID measure_instrument instrument subtype units intvn child_component parent_component family_component baseline_n ... followup_sd change_n change_mean change_sd child_only parent_only multi_component eot_diff_mean eot_diff_sd eot_diff_n
14 441 Eyberg Child Behaviour Inventory, Intensity Su... Eyberg Child Behaviour Inventory Intensity Subscale Scale iypt 0 1 0 32 ... NaN NaN NaN NaN 0 1 0 -31.40 46.80 32
15 441 Eyberg Child Behaviour Inventory, Intensity Su... Eyberg Child Behaviour Inventory Intensity Subscale Scale wlc 0 0 0 20 ... NaN NaN NaN NaN 0 0 0 -5.80 49.60 20
16 441 Eyberg Child Behaviour Inventory, Problem Subs... Eyberg Child Behaviour Inventory Problem Subscale Scale iypt 0 1 0 24 ... NaN NaN NaN NaN 0 1 0 -9.70 12.02 24
17 441 Eyberg Child Behaviour Inventory, Problem Subs... Eyberg Child Behaviour Inventory Problem Subscale Scale wlc 0 0 0 17 ... NaN NaN NaN NaN 0 0 0 -2.88 14.59 17

4 rows × 28 columns

Several studies use multiple instruments and metrics within instruments

In [39]:
eot_subset.groupby(['RefID', 'instrument'])['subtype'].value_counts()
Out[39]:
RefID  instrument                                                  
103    Eyberg Child Behaviour Inventory          Intensity Subscale    2
                                                 Problem Subscale      2
371    Eyberg Child Behaviour Inventory          Intensity Subscale    2
                                                 Problem Subscale      2
441    Eyberg Child Behaviour Inventory          Intensity Subscale    2
                                                 Problem Subscale      2
475    Eyberg Child Behaviour Inventory          Intensity Subscale    2
539    Child Behavior Checklist                  Aggression            2
                                                 Externalizing         2
564    Eyberg Child Behaviour Inventory          Intensity Subscale    2
                                                 Problem Subscale      2
836    Strengths and Difficulties Questionnaire  Total Score           2
899    Child Behavior Checklist                  Externalizing         3
       Eyberg Child Behaviour Inventory          Intensity Subscale    3
                                                 Problem Subscale      3
...
7723   Child Behavior Checklist          Externalizing                       2
7848   Child Behavior Checklist          Aggression                          2
                                         Social problems                     2
                                         Delinquent Subscale                 2
                                         Externalizing                       2
       Childrens Global Assessment Scal  hildrens Global Assessment Scale    2
8192   Eyberg Child Behaviour Inventory  Intensity Subscale                  2
8231   Eyberg Child Behaviour Inventory  Intensity Subscale                  4
                                         Problem Subscale                    4
8289   Child Behavior Checklist          Delinquent Subscale                 2
       Eyberg Child Behaviour Inventory  Intensity Subscale                  2
                                         Problem Subscale                    2
8292   Child Behavior Checklist          Externalizing                       2
8401   Eyberg Child Behaviour Inventory  Intensity Subscale                  2
                                         Problem Subscale                    2
Length: 73, dtype: int64
In [40]:
pd.crosstab(eot_subset.instrument, eot_subset.subtype)
Out[40]:
subtype Aggression Conduct Problems Conduct Problems Scale Delinquent Subscale Emotional Symptoms Scale Externalizing Hyperactivity Scale Impact Score Intensity Subscale Problem Subscale Social problems Total Score hildrens Global Assessment Scale
instrument
Child Behavior Checklist 4 6 0 4 0 32 0 0 0 0 2 0 0
Childrens Global Assessment Scal 0 0 0 0 0 0 0 0 0 0 0 0 2
Eyberg Child Behaviour Inventory 0 0 0 0 0 0 0 0 68 48 0 0 0
Strengths and Difficulties Questionnaire 0 0 6 0 4 0 2 2 0 0 0 8 0
In [41]:
x = eot_subset[eot_subset.instrument=='Eyberg Child Behaviour Inventory']
pd.crosstab(x.instrument, x.subtype)
Out[41]:
subtype Intensity Subscale Problem Subscale
instrument
Eyberg Child Behaviour Inventory 68 48
In [42]:
x = eot_subset[eot_subset.instrument=='Child Behavior Checklist']
pd.crosstab(x.instrument, x.subtype)
Out[42]:
subtype Aggression Conduct Problems Delinquent Subscale Externalizing Social problems
instrument
Child Behavior Checklist 4 6 4 32 2
In [43]:
x = eot_subset[eot_subset.instrument=='Strengths and Difficulties Questionnaire']
pd.crosstab(x.instrument, x.subtype)
Out[43]:
subtype Conduct Problems Scale Emotional Symptoms Scale Hyperactivity Scale Impact Score Total Score
instrument
Strengths and Difficulties Questionnaire 6 4 2 2 8

Merge study variables and outcomes

In [44]:
merged_vars = study_vars.merge(eot_subset, left_index=True, right_on='RefID')
merged_vars.shape
Out[44]:
(186, 36)

For now, restrict to the three most prevalent metrics.

In [45]:
merged_vars.measure_instrument.value_counts()
Out[45]:
Eyberg Child Behaviour Inventory, Intensity Subscale            64
Eyberg Child Behaviour Inventory, Problem Subscale              48
Child Behavior Checklist, Externalizing (T Score)               23
Strengths and Difficulties Questionnaire, Total Score            8
Child Behavior Checklist, Externalizing                          7
Strengths and Difficulties Questionnaire- Conduct Problems Scale     6
Child Behavior Checklist, Conduct Problems (T Score)             6
Strengths and Difficulties Questionnaire- Emotional Symptoms Scale     4
Eyberg Child Behaviour Inventory, Intensity Subscale (T Score)     4
Child Behavior Checklist, Delinquent Subscale                    4
Child Behavior Checklist, Aggression                             4
Strengths and Difficulties Questionnaire- Hyperactivity Scale     2
Strengths and Difficulties Questionnaire- Impact Score           2
Child Behavior Checklist, Social problems                        2
Childrens Global Assessment Scale                                2
dtype: int64

Take only the top 3 measures

In [46]:
top_measures = 3
In [47]:
merged_vars.RefID.unique()
Out[47]:
array([ 103,  371,  441,  475,  539,  564,  836,  899,  993, 1236, 1245,
       1511, 1585, 1875, 1951, 2092, 2117, 2219, 2239, 2347, 3211, 3225,
       3399, 3495, 3687, 3716, 3766, 3915, 3960, 7109, 7723, 7848, 8192,
       8231, 8289, 8292, 8401])
In [46]:
merged_vars.RefID.unique()
Out[46]:
array([ 103,  371,  441,  475,  539,  564,  836,  899,  993, 1236, 1245,
       1511, 1585, 1875, 1951, 2092, 2117, 2219, 2239, 2347, 3211, 3225,
       3399, 3495, 3687, 3716, 3766, 3915, 3960, 7109, 7723, 8192, 8231,
       8289, 8292, 8401])
In [48]:
analysis_subset = merged_vars[merged_vars.measure_instrument.isin(merged_vars.measure_instrument.value_counts().index[:top_measures])]
analysis_subset.groupby('measure_instrument')['baseline_mean'].max()
Out[48]:
measure_instrument
Child Behavior Checklist, Externalizing (T Score)        77.10
Eyberg Child Behaviour Inventory, Intensity Subscale    186.44
Eyberg Child Behaviour Inventory, Problem Subscale       28.62
Name: baseline_mean, dtype: float64
In [49]:
analysis_subset.groupby('measure_instrument').baseline_mean.hist()
Out[49]:
measure_instrument
Child Behavior Checklist, Externalizing (T Score)       Axes(0.125,0.125;0.775x0.775)
Eyberg Child Behaviour Inventory, Intensity Subscale    Axes(0.125,0.125;0.775x0.775)
Eyberg Child Behaviour Inventory, Problem Subscale      Axes(0.125,0.125;0.775x0.775)
Name: baseline_mean, dtype: object
In [50]:
analysis_subset['baseline_mean'].hist(by=analysis_subset['measure_instrument'],sharex=True)
Out[50]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x112a8bf98>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x112baceb8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x112bf95c0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x112c34320>]], dtype=object)
In [51]:
analysis_subset['eot_diff_mean'].hist(by=analysis_subset['measure_instrument'],sharex=True)
Out[51]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x112d4cc88>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11290b198>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1127c3550>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x107925ba8>]], dtype=object)
In [52]:
for x in analysis_subset.measure_instrument.unique():
    plt.figure()
    analysis_subset[analysis_subset.measure_instrument==x].baseline_mean.plot(kind='kde', alpha=0.4, grid=False)
    analysis_subset[analysis_subset.measure_instrument==x].end_treat_mean.plot(kind='kde', alpha=0.4, grid=False)
    plt.gca().set_xlabel(x)

Number of TAU/Control arms in subset

In [53]:
analysis_subset.shape[0] - analysis_subset.multi_component.sum() - analysis_subset.child_only.sum() - analysis_subset.parent_only.sum()
Out[53]:
53
In [54]:
set(analysis_subset[analysis_subset.multi_component==1].RefID.values)
Out[54]:
{539,
 899,
 1236,
 1511,
 2092,
 3399,
 3687,
 3766,
 3915,
 7848,
 8192,
 8231,
 8289,
 8292}
In [55]:
set(analysis_subset[analysis_subset.child_only==1].RefID.values)
Out[55]:
{1875, 3766, 8289}
In [56]:
set(analysis_subset[analysis_subset.parent_only==1].RefID.values)
Out[56]:
{103,
 371,
 441,
 564,
 1236,
 1245,
 1585,
 2117,
 2219,
 3211,
 3225,
 3495,
 3716,
 3766,
 3960,
 7109,
 7723,
 8401}
In [57]:
set(analysis_subset[(analysis_subset.parent_only==0) &
               (analysis_subset.child_only==0) &
               (analysis_subset.multi_component==0)].RefID.values)
Out[57]:
{371,
 441,
 539,
 564,
 899,
 1236,
 1511,
 1585,
 1875,
 2092,
 2117,
 2219,
 3211,
 3225,
 3399,
 3495,
 3687,
 3716,
 3766,
 3915,
 7109,
 7723,
 7848,
 8192,
 8231,
 8292,
 8401}

Total arm sample sizes

In [58]:
analysis_subset.loc[analysis_subset.child_only==1, 'baseline_n'].sum()
Out[58]:
190.0
In [59]:
analysis_subset.loc[analysis_subset.parent_only==1, 'baseline_n'].sum()
Out[59]:
1754.0
In [60]:
analysis_subset.loc[analysis_subset.multi_component==1, 'baseline_n'].sum()
Out[60]:
993.0
In [61]:
analysis_subset.loc[(analysis_subset.multi_component==0) & (analysis_subset.parent_only==0) & (analysis_subset.child_only==0), 'baseline_n'].sum()
Out[61]:
1687.0

Meta-analysis

Number of studies in analysis subset

In [62]:
unique_studies = analysis_subset.RefID.unique().tolist()
len(unique_studies)
Out[62]:
31
In [64]:
analysis_subset.RefID.unique()
Out[64]:
array([ 103,  371,  441,  539,  564,  899, 1236, 1245, 1511, 1585, 1875,
       2092, 2117, 2219, 3211, 3225, 3399, 3495, 3687, 3716, 3766, 3915,
       3960, 7109, 7723, 7848, 8192, 8231, 8289, 8292, 8401])
In [65]:
analysis_subset.groupby(['RefID', 'measure_instrument'])['measure_instrument'].count().unstack().fillna(0)
Out[65]:
measure_instrument Child Behavior Checklist, Externalizing (T Score) Eyberg Child Behaviour Inventory, Intensity Subscale Eyberg Child Behaviour Inventory, Problem Subscale
RefID
103 0 2 2
371 0 2 2
441 0 2 2
539 2 0 0
564 0 2 2
899 3 3 3
1236 0 6 6
1245 2 0 0
1511 3 0 0
1585 0 2 2
1875 3 0 0
2092 0 3 0
2117 2 0 0
2219 0 3 3
3211 0 2 2
3225 0 2 2
3399 2 0 0
3495 0 4 0
3687 0 6 6
3716 0 2 2
3766 0 9 0
3915 0 2 2
3960 0 0 2
7109 0 2 2
7723 2 0 0
7848 2 0 0
8192 0 2 0
8231 0 4 4
8289 0 2 2
8292 2 0 0
8401 0 2 2

Study counts by

In [66]:
data_by_refid = analysis_subset.groupby('RefID')
data_by_refid.apply(lambda x: (x[['child_component', 'parent_component', 'multi_component']].sum()>1)).sum()
Out[66]:
child_component      9
parent_component    22
multi_component      8
dtype: int64
In [67]:
data_by_refid.apply(lambda x: (x[['child_component', 'parent_component', 'multi_component']].sum(1)==0).sum()>0).astype(int)
Out[67]:
RefID
103      0
371      1
441      1
539      1
564      1
899      1
1236     1
1245     0
1511     1
1585     1
1875     1
2092     1
2117     1
2219     1
3211     1
3225     1
3399     1
3495     1
3687     1
3716     1
3766     1
3915     1
3960     0
7109     1
7723     1
7848     1
8192     1
8231     1
8289     0
8292     1
8401     1
dtype: int64
In [68]:
data_by_refid.apply(lambda x: x.age_cat.unique()[0]).value_counts()
Out[68]:
1    17
0    12
2     2
dtype: int64

We are restricting the analysis to the 4 most prevalent measure instruments in the database.

In [69]:
unique_measures = analysis_subset.measure_instrument.unique().tolist()
k = len(unique_measures)
unique_measures
Out[69]:
['Eyberg Child Behaviour Inventory, Intensity Subscale',
 'Eyberg Child Behaviour Inventory, Problem Subscale',
 'Child Behavior Checklist, Externalizing (T Score)']

Three intervention components were coded:

  • child_component
  • parent_component
  • multi_component
In [70]:
p_male, age_cat, intvn = analysis_subset[['p_male', 'age_cat', 'intvn']].values.T
child_only, parent_only, multi_component = analysis_subset[['child_only', 'parent_only', 
                                                                       'multi_component']].values.T

change_n, change_mean, change_sd = analysis_subset[['eot_diff_n', 'eot_diff_mean', 'eot_diff_sd']].values.T
In [71]:
school = (analysis_subset.age_cat.values==0).astype(int)
pre_k = (analysis_subset.age_cat.values==1).astype(int)
teen = (analysis_subset.age_cat.values==2).astype(int)

The response variable is a multivariate normal of dimension k=3, for each of the measure instruments:

  1. Eyberg Child Behaviour Inventory, Intensity Subscale
  2. Eyberg Child Behaviour Inventory, Problem Subscale
  3. Child Behavior Checklist, Externalizing (T Score)

$$\left({ \begin{array}{c} {m_1}\\ {m_2}\\ {m_3} \end{array} }\right)_i \sim\text{MVN}(\mathbf{\mu},\Sigma)$$

Means for each study are a draw from a multivariate normal.

In [72]:
wishart = False

mu = pm.Normal('mu', 0, 0.001, value=[0]*k)

if wishart:
    T = pm.Wishart('T', k+1, np.eye(k), value=np.eye(k))
    
    m = [pm.MvNormal('m_{}'.format(i), mu, T, value=[0]*k) for i in range(len(unique_studies))]
else:
    sigmas = pm.Uniform('sigmas', 0, 100, value=[10]*k)
    rhos = pm.Uniform('rhos', -1, 1, value=[0]*int(comb(k, 2)))

    Sigma = pm.Lambda('Sigma', lambda s=sigmas, r=rhos: np.array([[s[0]**2, s[0]*s[1]*r[0], s[0]*s[2]*r[1]],
                                                                 [s[0]*s[1]*r[0], s[1]**2, s[1]*s[2]*r[2]],
                                                                 [s[0]*s[2]*r[1], s[1]*s[2]*r[2], s[2]**2]]))
    
    m = [pm.MvNormalCov('m_{}'.format(i), mu, Sigma, value=[0]*k) for i in range(len(unique_studies))]

Unique intervention labels for each component; we will use these for component random effects.

In [73]:
unique_child_intvn = np.unique(intvn[child_only.astype(bool)]).tolist()
unique_parent_intvn = np.unique(intvn[parent_only.astype(bool)]).tolist()
unique_multi_intvn = np.unique(intvn[multi_component.astype(bool)]).tolist()
In [74]:
# Indices to random effect labels
child_component_index = [unique_child_intvn.index(x) for x in intvn[child_only.astype(bool)]]
parent_component_index = [unique_parent_intvn.index(x) for x in intvn[parent_only.astype(bool)]]
multi_component_index = [unique_multi_intvn.index(x) for x in intvn[multi_component.astype(bool)]]

Treatment component random effects

$$X_i = \left[{ \begin{array}{c} {x_c}\\ {x_p}\\ {x_m}\\ \end{array} }\right]_i$$

$$\begin{aligned} \beta_j^{(c)} &\sim N(\mu_{\beta}^{(c)},\tau_{\beta}^{(c)}) \\ \beta_j^{(p)} &\sim N(\mu_{\beta}^{(p)},\tau_{\beta}^{(p)}) \\ \beta_j^{(m)} &\sim N(\mu_{\beta}^{(m)},\tau_{\beta}^{(m)}) \end{aligned}$$

In [75]:
mu_beta = pm.Normal('mu_beta', 0, 0.001, value=[0]*3)
# sig_beta = pm.Uniform('sig_beta', 0, 100, value=1)
# tau_beta = sig_beta ** -2
tau_beta = pm.Gamma('tau_beta', 1, 0.1, value=1)

beta_c = pm.Normal('beta_c', mu_beta[0], tau_beta, value=[0]*len(unique_child_intvn))
beta_p = pm.Normal('beta_p', mu_beta[1], tau_beta, value=[0]*len(unique_parent_intvn))
beta_m = pm.Normal('beta_m', mu_beta[2], tau_beta, value=[0]*len(unique_multi_intvn))

b_c = pm.Lambda('b_c', lambda b=beta_c: 
             np.array([b[unique_child_intvn.index(x)] if child_only[i] else 0 for i,x in enumerate(intvn)]))
b_p = pm.Lambda('b_p', lambda b=beta_p: 
             np.array([b[unique_parent_intvn.index(x)] if parent_only[i] else 0 for i,x in enumerate(intvn)]))
b_m = pm.Lambda('b_m', lambda b=beta_m: 
             np.array([b[unique_multi_intvn.index(x)] if multi_component[i] else 0 for i,x in enumerate(intvn)]))

Calculate the probability of being the best intervention.

In [76]:
best = pm.Lambda('best', lambda b=mu_beta:  (b==b.min()).astype(int))

Interaction of parent and multi-component with pre-k children.

In [77]:
interaction = False

if interaction:
    beta_pk_p = pm.Normal('beta_pk_p', 0, 1e-5, value=0)
    beta_pk_m = pm.Normal('beta_pk_m', 0, 1e-5, value=0)
    b_pk_p = pm.Lambda('b_pk_p', lambda b=beta_pk_p: b * parent_only * pre_k)
    b_pk_m = pm.Lambda('b_pk_m', lambda b=beta_pk_m: b * multi_component * pre_k)
In [78]:
betas = b_c + b_p + b_m 

if interaction:
    betas = betas + b_pk_p + b_pk_m

Covariate effects of age and percent female.

$$\alpha \sim N(0, 1e5)$$

In [79]:
alpha_age = pm.Normal('alpha_age', 0, 1e-5, value=[1,2])

Unique study ID (RefID) and measure ID (measure_instrument) values.

In [80]:
study_id = [unique_studies.index(x) for x in analysis_subset.RefID]
measure_id = [unique_measures.index(x) for x in analysis_subset.measure_instrument]

Calculate expected response (treatment difference) as a function of treatment and covariates.

$$\theta_i = m_{j[i]k} + X_i \beta + \alpha x_{age}$$

In [81]:
baseline_sd = analysis_subset.baseline_sd.values

@pm.deterministic
def theta(m=m, betas=betas, alpha_age=alpha_age):  

    mi = [m[i][j] for i,j in zip(study_id, measure_id)]
    
    age_effect = np.array([alpha_age[a-1] if a else 0 for a in age_cat])
    
    return(mi + baseline_sd*(betas + age_effect))

Expected treatment effect for pre-K undergoing multi-component intervention, measused by Eyberg Child Behaviour Inventory, Intensity Subscale

In [82]:
if wishart:
    baseline = pm.MvNormal('baseline', mu, T, value=[0]*k)
else:
    baseline = pm.MvNormalCov('baseline', mu, Sigma, value=[0]*k)

Calculation of expected differences from baseline for each instrument.

In [83]:
# ECBI intensity subscale
ecbi_intensity_sd = baseline_sd[np.array(measure_id)==0].mean()

prek_intensity_pred = pm.Lambda('prek_intensity_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*(b + a[0]) )
school_intensity_pred = pm.Lambda('school_intensity_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*b )
teen_intensity_pred = pm.Lambda('teen_intensity_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*(b + a[1]) )

intensity_diff = pm.Lambda('intensity_diff', 
                           lambda base=baseline, prek=prek_intensity_pred, school=school_intensity_pred, 
                           teen=teen_intensity_pred: np.array([prek, school, teen])-base[0])
                            

# ECBI problem subscale
ecbi_problem_sd = baseline_sd[np.array(measure_id)==1].mean()

prek_problem_pred = pm.Lambda('prek_problem_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*(b + a[0]) )
school_problem_pred = pm.Lambda('school_problem_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*b )
teen_problem_pred = pm.Lambda('teen_problem_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*(b + a[1]) )

problem_diff = pm.Lambda('problem_diff', 
                           lambda base=baseline, prek=prek_problem_pred, school=school_problem_pred, 
                           teen=teen_problem_pred: np.array([prek, school, teen])-base[1])

# CBC T-score
cbct_sd = baseline_sd[np.array(measure_id)==2].mean()

prek_tscore_pred = pm.Lambda('prek_tscore_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[2] + cbct_sd*(b + a[0]) )
school_tscore_pred = pm.Lambda('school_tscore_pred', 
                            lambda mu=baseline, b=mu_beta: mu[2] + cbct_sd*b )
teen_tscore_pred = pm.Lambda('teen_tscore_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[2] + cbct_sd*(b + a[1]) )

tscore_diff = pm.Lambda('tscore_diff', 
                           lambda base=baseline, prek=prek_tscore_pred, school=school_tscore_pred, 
                           teen=teen_tscore_pred: np.array([prek, school, teen])-base[2])

Finally, the likelihood is just a normal distribution, with the observed standard error of the treatment effect as the standard deviation of the estimates.

$$d_i \sim N(\theta_i, \hat{\sigma}^2)$$

In [84]:
change_se = change_sd/np.sqrt(change_n)
In [85]:
d = pm.Normal('d', theta, change_se**-2, observed=True, value=change_mean)

Posterior predictive samples, if desired.

In [86]:
include_gof_samples = False

if include_gof_samples:
    d_sim = pm.Normal('d_sim', theta, change_se**-2, size=len(change_mean))

Instantiate MCMC model, and use Adaptive Metropolis for some vector-valued variables.

In [87]:
M = pm.MCMC(locals())
M.use_step_method(pm.AdaptiveMetropolis, m)
M.use_step_method(pm.AdaptiveMetropolis, mu_beta)
M.use_step_method(pm.AdaptiveMetropolis, [beta_c, beta_p, beta_m])

Run two chains to allow for convergence evaluation.

In [88]:
M.sample(200000, 190000)
 [-----------------100%-----------------] 200000 of 200000 complete in 707.7 sec
In [89]:
M.sample(200000, 190000)
 [-----------------100%-----------------] 200000 of 200000 complete in 652.6 sec

Summary of estimates of intervention components

In [90]:
if wishart:
    pm.Matplot.plot(T)
In [91]:
pm.Matplot.plot(mu_beta)
Plotting mu_beta_0
Plotting mu_beta_1
Plotting mu_beta_2
In [92]:
pm.Matplot.summary_plot?
In [93]:
pm.Matplot.summary_plot([mu_beta], custom_labels=['Child component', 'Parent component', 
                                                              'Multi-component'], chain=0, xlab="Effect size (standard deviations)")
In [94]:
pm.Matplot.summary_plot([mu_beta], custom_labels=['Child component', 'Parent component', 
                                                              'Multi-component'])
In [95]:
mu_beta.summary()
mu_beta:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-0.978           0.315            0.014            [-1.581 -0.355]
	-1.217           0.174            0.008            [-1.563 -0.892]
	-1.246           0.194            0.009            [-1.64  -0.869]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-1.58            -1.187          -0.973         -0.774        -0.35
	-1.552           -1.334          -1.219         -1.101        -0.874
	-1.63            -1.374          -1.244         -1.119        -0.856
	
In [96]:
mu_trace = M.mu_beta.trace()
In [97]:
multi_child = mu_trace[:, 2] - mu_trace[:, 0]
multi_parent = mu_trace[:, 2] - mu_trace[:, 1]
parent_child = mu_trace[:, 1] - mu_trace[:, 0]
In [98]:
'Multi-component vs child-only: mean={0:.2f}, 95% HPD=({1:.2}, {2:.2})'.format(multi_child.mean(), 
                                                                               *pm.utils.hpd(multi_child, 0.05))
Out[98]:
'Multi-component vs child-only: mean=-0.28, 95% HPD=(-1.0, 0.37)'
In [99]:
'Multi-component vs parent-only: mean={0:.2f}, 95% HPD=({1:.2}, {2:.2})'.format(multi_parent.mean(), 
                                                                               *pm.utils.hpd(multi_parent, 0.05))
Out[99]:
'Multi-component vs parent-only: mean=-0.05, 95% HPD=(-0.54, 0.46)'
In [100]:
'Parent vs child-only: mean={0:.2f}, 95% HPD=({1:.2}, {2:.2})'.format(parent_child.mean(), 
                                                                               *pm.utils.hpd(parent_child, 0.05))
Out[100]:
'Parent vs child-only: mean=-0.23, 95% HPD=(-0.9, 0.39)'
In [101]:
pm.Matplot.plot(mu_beta)
Plotting mu_beta_0
Plotting mu_beta_1
Plotting mu_beta_2
In [102]:
if interaction:
    pm.Matplot.summary_plot([beta_pk_m, beta_pk_p])

Difference means by measure instrument.

In [103]:
plt.figure(figsize=(24,4))
pm.Matplot.summary_plot([mu], custom_labels=unique_measures)
In [104]:
pm.Matplot.plot(mu)
Plotting mu_0
Plotting mu_1
Plotting mu_2
In [105]:
if not wishart:
    plt.figure(figsize=(24,4))
    pm.Matplot.summary_plot([sigmas], custom_labels=unique_measures)
In [106]:
sigmas.summary()
sigmas:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.039            0.014            0.001            [ 0.017  0.063]
	0.187            0.026            0.002            [ 0.141  0.241]
	0.063            0.016            0.001            [ 0.032  0.092]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	0.019            0.026           0.04           0.05          0.065
	0.143            0.169           0.184          0.201         0.246
	0.035            0.053           0.063          0.072         0.096
	
In [107]:
pm.Matplot.plot(sigmas)
Plotting sigmas_0
Plotting sigmas_1
Plotting sigmas_2
In [108]:
if not wishart:
    pm.Matplot.summary_plot([rhos], custom_labels=['rho12', 'rho13', 'rho23'])

Age effects for pre-k (top) and teen (bottom) groups, relative to pre-teen.

In [109]:
pm.Matplot.summary_plot([alpha_age], custom_labels=['pre-K', 'teen'])
In [110]:
alpha_age.summary()
alpha_age:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-0.312           0.067            0.003            [-0.448 -0.187]
	-0.085           0.232            0.013            [-0.529  0.366]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-0.446           -0.356          -0.309         -0.266        -0.184
	-0.539           -0.241          -0.091         0.074         0.358
	
In [111]:
pm.Matplot.plot(alpha_age)
Plotting alpha_age_0
Plotting alpha_age_1

Outcome Plots

In [112]:
traces = [[school_intensity_pred, prek_intensity_pred, teen_intensity_pred],
          [school_problem_pred, prek_problem_pred, teen_problem_pred],
          [school_tscore_pred, prek_tscore_pred, teen_tscore_pred]]
In [113]:
sb.set(style="white", palette="hot")
sb.despine(left=True)

colors = '#7fc97f','#beaed4','#fdc086','#386cb0'
#colors = '#a6611a','#dfc27d','#80cdc1','#018571'
# colors = ('#fef0d9','#fdcc8a','#fc8d59','#d7301f')[::-1]
# colors = sb.cubehelix_palette(4, start=2, rot=0, dark=.25, light=.75, reverse=False)

titles = ['Pre-K Children', 'School Children', 'Teenage Children']

for i,measure in enumerate(unique_measures):
    
    f, axes = plt.subplots(1, 3, figsize=(16, 6), sharex=True, sharey=True)
    measure_traces = traces[i]
    
    for j, trace in enumerate(np.array(measure_traces)[[1,0,2]]):
        
        x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
        
        c1, p1, m1 = trace.trace().T
        
        g = sb.distplot(x + c1, color=colors[0], ax=axes[j])
        sb.distplot(x + p1, color=colors[1], ax=axes[j])
        sb.distplot(x + m1, color=colors[2], ax=axes[j])
        if j:
            age_effect = alpha_age.trace()[:, j-1]
        else:
            age_effect = 0
        sb.distplot(x + baseline.trace()[:, i] + age_effect, color=colors[3], ax=axes[j]);
        g.set_title(titles[j])

        

        if not j:
            g.set_ylabel('Frequency')
            g.legend(g.lines, ['Child-only', 'Parent-only', 'Multi-component', 'Control/TAU'], loc='upper left')
        if j==1:
            g.set_xlabel(measure)
<matplotlib.figure.Figure at 0x11b171c18>

Generate threshold proportions

In [114]:
thresholds = [127, # ECBI, intensity
              11,  # ECBI, problem
              60]  # CBC t-score

age_labels = ['school', 'pre-k', 'teen']

estimates = []

for i,measure in enumerate(unique_measures):
    
    measure_traces = traces[i]
    cutoff = thresholds[i]
    
    for j, trace in enumerate(measure_traces):
        
        x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
        
        c1, p1, m1 = trace.trace().T
        
        _child_only = ((x + c1) > cutoff).mean().round(2)
        _parent_only = ((x + p1) > cutoff).mean().round(2)
        _multi_component = ((x + m1) > cutoff).mean().round(2)
        if j:
            age_effect = alpha_age.trace()[:, j-1]
        else:
            age_effect = 0
        _control = ((x + baseline.trace()[:, i] + age_effect) > cutoff).mean().round(2)
        
        print('\n{0}, {1}'.format(measure, age_labels[j]))
        print('Child-only: {0}\nParent-only: {1}\nMulti-component: {2}\nControl/TAU: {3}'.format(_child_only, 
                                                                                                 _parent_only,
                                                                                                 _multi_component,
                                                                                                 _control))
        
        estimates.append((measure, age_labels[j], _child_only, _parent_only, _multi_component, _control))
        
Eyberg Child Behaviour Inventory, Intensity Subscale, school
Child-only: 0.64
Parent-only: 0.47
Multi-component: 0.44
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Intensity Subscale, pre-k
Child-only: 0.41
Parent-only: 0.24
Multi-component: 0.22
Control/TAU: 0.93

Eyberg Child Behaviour Inventory, Intensity Subscale, teen
Child-only: 0.58
Parent-only: 0.42
Multi-component: 0.39
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Problem Subscale, school
Child-only: 0.8
Parent-only: 0.75
Multi-component: 0.73
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, pre-k
Child-only: 0.68
Parent-only: 0.56
Multi-component: 0.5
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, teen
Child-only: 0.76
Parent-only: 0.69
Multi-component: 0.66
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), school
Child-only: 0.63
Parent-only: 0.44
Multi-component: 0.4
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), pre-k
Child-only: 0.39
Parent-only: 0.23
Multi-component: 0.21
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), teen
Child-only: 0.57
Parent-only: 0.39
Multi-component: 0.36
Control/TAU: 1.0
In [115]:
thresholds = [127, # ECBI, intensity
              11,  # ECBI, problem
              60]  # CBC t-score

age_labels = ['school', 'pre-k', 'teen']

estimates = []

for i,measure in enumerate(unique_measures):
    
    measure_traces = traces[i]
    cutoff = thresholds[i]
    
    for j, trace in enumerate(measure_traces):
        
        x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
        
        c1, p1, m1 = trace.trace().T
        
        _child_only = ((x + c1) > cutoff).mean().round(2)
        _parent_only = ((x + p1) > cutoff).mean().round(2)
        _multi_component = ((x + m1) > cutoff).mean().round(2)
        if j:
            age_effect = alpha_age.trace()[:, j-1]
        else:
            age_effect = 0
        _control = ((x + baseline.trace()[:, i] + age_effect) > cutoff).mean().round(2)
        
        print('\n{0}, {1}'.format(measure, age_labels[j]))
        print('Child-only: {0}\nParent-only: {1}\nMulti-component: {2}\nControl/TAU: {3}'.format(_child_only, 
                                                                                                 _parent_only,
                                                                                                 _multi_component,
                                                                                                 _control))
        
        estimates.append((measure, age_labels[j], _child_only, _parent_only, _multi_component, _control))
        
Eyberg Child Behaviour Inventory, Intensity Subscale, school
Child-only: 0.63
Parent-only: 0.47
Multi-component: 0.43
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Intensity Subscale, pre-k
Child-only: 0.41
Parent-only: 0.24
Multi-component: 0.21
Control/TAU: 0.93

Eyberg Child Behaviour Inventory, Intensity Subscale, teen
Child-only: 0.58
Parent-only: 0.42
Multi-component: 0.38
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Problem Subscale, school
Child-only: 0.8
Parent-only: 0.75
Multi-component: 0.73
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, pre-k
Child-only: 0.66
Parent-only: 0.54
Multi-component: 0.49
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, teen
Child-only: 0.76
Parent-only: 0.69
Multi-component: 0.66
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), school
Child-only: 0.64
Parent-only: 0.45
Multi-component: 0.41
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), pre-k
Child-only: 0.39
Parent-only: 0.23
Multi-component: 0.21
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), teen
Child-only: 0.58
Parent-only: 0.39
Multi-component: 0.36
Control/TAU: 1.0
In [116]:
estimates
Out[116]:
[('Eyberg Child Behaviour Inventory, Intensity Subscale',
  'school',
  0.63,
  0.46999999999999997,
  0.42999999999999999,
  0.93999999999999995),
 ('Eyberg Child Behaviour Inventory, Intensity Subscale',
  'pre-k',
  0.40999999999999998,
  0.23999999999999999,
  0.20999999999999999,
  0.93000000000000005),
 ('Eyberg Child Behaviour Inventory, Intensity Subscale',
  'teen',
  0.57999999999999996,
  0.41999999999999998,
  0.38,
  0.93999999999999995),
 ('Eyberg Child Behaviour Inventory, Problem Subscale',
  'school',
  0.80000000000000004,
  0.75,
  0.72999999999999998,
  1.0),
 ('Eyberg Child Behaviour Inventory, Problem Subscale',
  'pre-k',
  0.66000000000000003,
  0.54000000000000004,
  0.48999999999999999,
  1.0),
 ('Eyberg Child Behaviour Inventory, Problem Subscale',
  'teen',
  0.76000000000000001,
  0.68999999999999995,
  0.66000000000000003,
  1.0),
 ('Child Behavior Checklist, Externalizing (T Score)',
  'school',
  0.64000000000000001,
  0.45000000000000001,
  0.40999999999999998,
  1.0),
 ('Child Behavior Checklist, Externalizing (T Score)',
  'pre-k',
  0.39000000000000001,
  0.23000000000000001,
  0.20999999999999999,
  1.0),
 ('Child Behavior Checklist, Externalizing (T Score)',
  'teen',
  0.57999999999999996,
  0.39000000000000001,
  0.35999999999999999,
  1.0)]
In [117]:
estimates_filename = 'estimates%s.csv' % (bool(~pcit_as_multicomponent)*'_pcit')
np.savetxt(estimates_filename, np.array([e[2:] for e in estimates]), delimiter='\t', fmt='%1.2f')
In [118]:
pd.DataFrame(estimates, columns=['Instrument', 'Age group', 'Child-only', 
                                 'Parent-only', 'Multi-component', 'TAU/Control']).to_csv('outcome_proportions.csv')

Summary statistics for TAU/control

In [119]:
baseline.summary()
baseline:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-0.01            0.044            0.001            [-0.095  0.086]
	-0.01            0.194            0.003            [-0.401  0.369]
	0.043            0.066            0.001            [-0.088  0.178]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-0.091           -0.037          -0.013         0.014         0.091
	-0.399           -0.137          -0.011         0.118         0.372
	-0.09            0.002           0.043          0.084         0.177
	

Summary statistics for intensity subscale

In [120]:
prek_intensity_pred.summary()
school_intensity_pred.summary()
teen_intensity_pred.summary()
prek_intensity_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-34.207          8.497            0.373          [-50.594 -17.548]
	-40.569          4.537            0.165          [-49.404 -31.751]
	-41.32           5.19             0.228          [-50.869 -30.328]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-50.526          -39.968         -34.091        -28.701       -17.448
	-49.463          -43.583         -40.563        -37.497       -31.784
	-51.447          -44.778         -41.361        -37.975       -30.765
	

school_intensity_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-25.943          8.347            0.366          [-41.982  -9.397]
	-32.304          4.609            0.206          [-41.494 -23.627]
	-33.055          5.141            0.239          [-43.494 -23.046]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-41.96           -31.497         -25.85         -20.542       -9.248
	-41.168          -35.383         -32.339        -29.214       -23.185
	-43.254          -36.478         -33.004        -29.693       -22.688
	

teen_intensity_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-28.193          9.578            0.36           [-48.166 -10.285]
	-34.554          7.559            0.37           [-49.605 -20.256]
	-35.305          6.951            0.306          [-48.732 -21.821]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-47.34           -34.56          -28.04         -21.808       -9.135
	-49.261          -39.792         -34.551        -29.36        -19.785
	-48.819          -39.931         -35.33         -30.635       -21.901
	

Summary statistics for problem subscale

In [121]:
prek_problem_pred.summary()
school_problem_pred.summary()
teen_problem_pred.summary()
prek_problem_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-7.977           1.989            0.087          [-11.791  -4.087]
	-9.459           1.076            0.039          [-11.584  -7.383]
	-9.634           1.22             0.052          [-12.04   -7.195]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-11.781          -9.309          -7.964         -6.669        -4.051
	-11.579          -10.184         -9.451         -8.727        -7.373
	-12.001          -10.452         -9.646         -8.838        -7.151
	

school_problem_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-6.052           1.955            0.085            [-9.993 -2.322]
	-7.534           1.095            0.048            [-9.717 -5.452]
	-7.709           1.21             0.055          [-10.18   -5.352]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-9.829           -7.355          -6.033         -4.776        -2.135
	-9.658           -8.269          -7.529         -6.794        -5.379
	-10.11           -8.517          -7.693         -6.919        -5.274
	

teen_problem_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-6.576           2.242            0.084          [-11.062  -2.193]
	-8.058           1.776            0.086          [-11.555  -4.628]
	-8.233           1.631            0.071          [-11.333  -4.978]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-11.028          -8.067          -6.545         -5.085        -2.137
	-11.527          -9.281          -8.056         -6.839        -4.588
	-11.429          -9.316          -8.23          -7.14         -5.063
	

Summary statistics for t scores

In [122]:
prek_tscore_pred.summary()
school_tscore_pred.summary()
teen_tscore_pred.summary()
prek_tscore_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-10.526          2.626            0.115          [-15.646  -5.413]
	-12.492          1.403            0.051          [-15.222  -9.782]
	-12.724          1.606            0.07           [-16.017  -9.65 ]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-15.588          -12.303         -10.491        -8.831        -5.329
	-15.231          -13.426         -12.494        -11.551       -9.783
	-15.862          -13.798         -12.739        -11.679       -9.472
	

school_tscore_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-7.972           2.58             0.113          [-12.962  -2.861]
	-9.938           1.425            0.063          [-12.699  -7.188]
	-10.17           1.59             0.074          [-13.334  -7.001]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-12.941          -9.698          -7.953         -6.307        -2.818
	-12.674          -10.892         -9.939         -8.982        -7.144
	-13.323          -11.231         -10.149        -9.134        -6.969
	

teen_tscore_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-8.667           2.96             0.111          [-14.89   -3.144]
	-10.633          2.336            0.114          [-15.209  -6.154]
	-10.866          2.149            0.095          [-14.942  -6.648]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-14.595          -10.633         -8.625         -6.697        -2.794
	-15.176          -12.247         -10.635        -9.028        -6.086
	-15.042          -12.3           -10.865        -9.419        -6.733
	

Summary statistics for differences.

Each summary corresponds to teh differences between baseline and age x component categories for each outcome. The order is:

  • child-only, pre-k
  • child-only, school
  • child-only, teen
  • parent-only, pre-k
  • parent-only, school
  • parent-only, teen
  • multi, pre-k
  • multi, school
  • multi, teen
In [123]:
intensity_diff.summary()
problem_diff.summary()
tscore_diff.summary()
intensity_diff:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-34.198          8.495            0.372          [-50.501 -17.463]
	-40.559          4.536            0.165          [-49.221 -31.581]
	-41.31           5.19             0.228          [-51.552 -31.001]
	-25.934          8.345            0.365          [-41.946  -9.407]
	-32.295          4.609            0.206          [-41.47  -23.668]
	-33.046          5.14             0.239          [-43.501 -23.061]
	-28.183          9.576            0.359          [-48.216 -10.33 ]
	-34.544          7.559            0.371          [-49.375 -20.067]
	-35.296          6.951            0.306          [-48.659 -21.764]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-50.5            -39.961         -34.082        -28.693       -17.454
	-49.465          -43.575         -40.556        -37.49        -31.768
	-51.484          -44.771         -41.347        -37.97        -30.754
	-41.926          -31.483         -25.817        -20.526       -9.272
	-41.161          -35.38          -32.327        -29.201       -23.188
	-43.235          -36.463         -32.997        -29.681       -22.703
	-47.344          -34.558         -28.02         -21.794       -9.129
	-49.266          -39.793         -34.529        -29.358       -19.78
	-48.812          -39.926         -35.324        -30.642       -21.872
	

problem_diff:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-7.967           1.979            0.087          [-11.766  -4.068]
	-9.449           1.057            0.038          [-11.468  -7.358]
	-9.624           1.209            0.053          [-12.011  -7.223]
	-6.042           1.944            0.085            [-9.773 -2.192]
	-7.524           1.074            0.048            [-9.662 -5.514]
	-7.699           1.198            0.056          [-10.135  -5.373]
	-6.566           2.231            0.084          [-11.233  -2.407]
	-8.048           1.761            0.086          [-11.503  -4.675]
	-8.223           1.619            0.071          [-11.337  -5.071]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-11.766          -9.31           -7.94          -6.685        -4.066
	-11.524          -10.152         -9.449         -8.734        -7.401
	-11.995          -10.431         -9.633         -8.846        -7.165
	-9.768           -7.335          -6.015         -4.782        -2.16
	-9.59            -8.243          -7.532         -6.803        -5.402
	-10.073          -8.495          -7.688         -6.915        -5.289
	-11.03           -8.051          -6.528         -5.078        -2.127
	-11.478          -9.271          -8.045         -6.84         -4.608
	-11.372          -9.302          -8.23          -7.139        -5.096
	

tscore_diff:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-10.569          2.626            0.115          [-15.608  -5.397]
	-12.535          1.402            0.051          [-15.212  -9.76 ]
	-12.767          1.604            0.07           [-15.932  -9.581]
	-8.015           2.579            0.113          [-12.964  -2.907]
	-9.981           1.424            0.064          [-12.817  -7.315]
	-10.213          1.589            0.074          [-13.444  -7.127]
	-8.71            2.96             0.111          [-14.902  -3.193]
	-10.676          2.336            0.115          [-15.26   -6.202]
	-10.908          2.148            0.095          [-15.038  -6.726]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-15.608          -12.35          -10.533        -8.868        -5.394
	-15.288          -13.467         -12.534        -11.587       -9.818
	-15.912          -13.837         -12.779        -11.735       -9.505
	-12.958          -9.73           -7.979         -6.344        -2.866
	-12.721          -10.935         -9.991         -9.025        -7.166
	-13.362          -11.269         -10.198        -9.173        -7.017
	-14.632          -10.681         -8.66          -6.736        -2.821
	-15.226          -12.298         -10.671        -9.073        -6.113
	-15.086          -12.34          -10.917        -9.47         -6.76