In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sb
import numpy as np
import pandas as pd
import pymc as pm
from scipy.misc import comb
In [2]:
np.random.seed(42)

Data import and cleaning

In [3]:
study_char = pd.read_excel("DBD Data for Meta Analyses.xlsx", "Study Characteristics", 
                           index_col='RefID', na_values=['-', 'NR'])
outcomes = pd.read_excel("DBD Data for Meta Analyses.xlsx", "Outcomes", 
                           na_values=['ND', 'NR'])
#demographics = pd.read_excel("DBD Data for Meta Analyses.xlsx", "Pt Demographics", na_values=['-', 'NR'])
In [4]:
study_char.tail()
Out[4]:
Pubmed ID Rel Incl1 Rel Incl2 Rel Incl3 Rel Excl Family NCT Title Year Author ... Parent Component Screened (N) Randomized (N) Analyzed (N) Proportion Male (%) Direction of Effect DOE_Comments Other comments Drugs(s) -
RefID
8292 na NaN NaN NaN NaN NaN NaN Reciprocal skills training in the treatment of... 2000 Barrett P, C. Turner, S. Rombouts and A. Duffy ... Both NaN 57 57 79 NaN NaN RST (hospital), RST (clinic), WLC NaN NaN
8320 na 1867 NaN NaN NaN Z-C NaN Differential responses of children with varyin... 2008 Wolff JC, R. W. Greene and T. H. Ollendick ... Both NaN 50 47 68 NaN NaN NaN NaN NaN
8365 na NaN NaN NaN NaN NaN NaN Treatment of depressed mothers with disruptive... 2000 Sanders MR and M. McFarland ... Both 167 47 47 74 NaN NaN NaN NaN NaN
8385 na NaN NaN NaN NaN NaN NaN Reducing Adolescent Oppositional and Conduct D... 2011 Sells SP, K. W. Early and T. E. Smith ... Both NaN 38 NaN 57 NaN NaN NaN NaN NaN
8401 22820873 NaN NaN NaN NaN NaN NaN "Tuning into Kids": reducing young children's ... 2013 Havighurst SS, K. R. Wilson, A. E. Harley, C. ... ... Both 92 63 54 78 NaN NaN NaN NaN NaN

5 rows × 45 columns

Data cleaning

In [5]:
# Cast outcomes variables to floats
for col in ('Last FU Mean', 'Last FU SD',):
    outcomes[col] = outcomes[col].astype(float)
In [6]:
# Recode age category
study_char['age_cat'] = study_char.AgeCat.replace({'PRE-K':1, 'SCHOOL':0, 'TEEN':2})
In [7]:
# Fix data label typo
outcomes['Measure Instrument'] = outcomes['Measure Instrument'].replace({'Eyberg Child Behaviour Inventory, problem Subscale': 
                                        'Eyberg Child Behaviour Inventory, Problem Subscale'})
outcomes.Units = outcomes.Units.replace({'scale': 'Scale'})
In [8]:
# Parse followup times and convert to months
split_fut = outcomes.loc[outcomes['Last FU Time'].notnull(), 'Last FU Time'].apply(lambda x: str(x).split(' ')[:2])
fut_months = [float(time)/52.*(unit=='weeks') or float(time) for time, unit in split_fut]
outcomes.loc[outcomes['Last FU Time'].notnull(), 'Last FU Time'] = fut_months

We are assumung all CBC Externalizing values over 50 are T-scores, and those under 50 are raw scores. This recodes those observations.

In [9]:
cbce_ind = outcomes['Measure Instrument'].apply(lambda x: x.startswith('Child Behavior Checklist, Externalizing'))
under_50 = outcomes['BL Mean']<50
outcomes.loc[cbce_ind & (under_50^True), 'Measure Instrument'] = 'Child Behavior Checklist, Externalizing (T Score)'
outcomes.loc[cbce_ind & under_50, 'Measure Instrument'] = 'Child Behavior Checklist, Externalizing'

Recode measure instrument variables

In [10]:
instrument = []
subtype = []
units = []

for i,row in outcomes.iterrows():
    separator = row['Measure Instrument'].find(',')
    if separator == -1:
        separator = row['Measure Instrument'].find('-')
    instrument.append(row['Measure Instrument'][:separator])
    s = row['Measure Instrument'][separator+2:]
    paren = s.find('(')
    if paren > -1:
        subtype.append(s[:paren-1])
        units.append(s[paren+1:-1])
    else:
        subtype.append(s)
        if s.endswith('scale'):
            units.append('Scale')
        else:
            units.append('Score')
            
new_cols = pd.DataFrame({'instrument': instrument, 'subtype': subtype, 
                         'units': units}, index=outcomes.index)
In [11]:
outcomes['Measure Instrument'].value_counts()
Out[11]:
Eyberg Child Behaviour Inventory, Intensity Subscale            79
Eyberg Child Behaviour Inventory, Problem Subscale              57
Child Behavior Checklist, Externalizing (T Score)               44
Child Behavior Checklist, Externalizing                         11
Eyberg Child Behaviour Inventory, Intensity Subscale (T Score)    10
Strengths and Difficulties Questionnaire- Conduct Problems Scale    10
Strengths and Difficulties Questionnaire, Total Score           10
Child Behavior Checklist, Conduct Problems (T Score)             6
Strengths and Difficulties Questionnaire- Emotional Symptoms Scale     4
Eyberg Child Behaviour Inventory, Problem Subscale (T Score)     4
Child Behavior Checklist, Aggression                             4
Child Behavior Checklist, Delinquent Subscale                    2
Child Behavior Checklist, Rulebreaking                           2
DSM IV                                                           2
Strengths and Difficulties Questionnaire- Impact Score           2
Strengths and Difficulties Questionnaire, Percent Below Clinical Cutoff     2
Child Behavior Checklist, Conduct Problems                       2
Strengths and Difficulties Questionnaire- Hyperactivity Scale     2
dtype: int64
In [12]:
new_cols.head()
Out[12]:
instrument subtype units
0 Eyberg Child Behaviour Inventory Intensity Subscale T Score
1 Eyberg Child Behaviour Inventory Intensity Subscale T Score
2 Eyberg Child Behaviour Inventory Problem Subscale T Score
3 Eyberg Child Behaviour Inventory Problem Subscale T Score
4 Child Behavior Checklist Externalizing Score
In [13]:
# Append new columns
outcomes = outcomes.join(new_cols)
In [14]:
outcomes.intvn.value_counts()
Out[14]:
wlc              47
tau              39
iypt             32
pcit             16
iyptndiyct        8
pppsd             7
WLC               7
TAU               5
mst               5
PCIT              5
it                4
pppo              4
iyct              4
modularndn        4
ppcp              4
pppe              3
CT                3
pcitc             3
FBT               3
pmto              3
PLL               3
spokes            3
snap              3
pmtndp            3
setpc             2
hncte             2
RST               2
projndsupport     2
hncstd            2
pmtpa             2
TIK               2
pmtnds            2
pcitabb           2
pmtsd             2
cpp               1
modularndclin     1
sst               1
pstnds            1
pppstd            1
hitkashrut        1
scip              1
modularndcomm     1
mcfi              1
iyptadv           1
kitkashrut        1
hnc               1
itpt              1
coaching          1
cbt               1
dtype: int64

Data summaries

Cross-tabulation of the outcome counts by measure instrument

In [15]:
pd.crosstab(outcomes['instrument'], outcomes['Outcome'])
Out[15]:
Outcome 01 Behavior, disruptive 02 Behavior, aggression 06 Behavior, fighting, destruction, violation 08 Behavior, other
instrument
Child Behavior Checklist 59 4 4 4
DSM I 2 0 0 0
Eyberg Child Behaviour Inventory 148 0 0 2
Strengths and Difficulties Questionnaire 10 0 0 20

Distribution of age categories

In [16]:
study_char.AgeCat.value_counts()
Out[16]:
SCHOOL    44
PRE-K     31
TEEN      18
dtype: int64

Frequencies of various intervention types

In [17]:
study_char['Intervention Type'].value_counts()
Out[17]:
PHARM                                                           20
IY-PT                                                           10
MST                                                              9
PCIT                                                             7
IY-PT + IY-CT                                                    4
Triple P (enhanced)                                              3
BSFT                                                             3
PMTO                                                             3
OTH: Intensive treatment                                         2
PCIT-ABB                                                         2
IY-PT (nurse led)                                                2
CPS                                                              2
OTH: Modular treatment                                           2
Triple-P (self-directed)                                         2
UCPP                                                             2
PT                                                               2
CBT                                                              2
OTH: MTP                                                         1
IY-PT + IY-CT + IY-TT                                            1
MF-PEP + TAU                                                     1
IY-PT (brief)                                                    1
SCIP (Social cognitive (Dodge's))                                1
OTH: Parents Plus Children's Program                             1
PMT (perceptive)                                                 1
OTH: Project support                                             1
PCIT (modified)                                                  1
IY                                                               1
OTH: Instrumental, emotional support & child management skills     1
OTH: Modular treatment (community)                               1
OTH: Child only treatment                                        1
TIK                                                              1
SNAP Under 12 \nOutReach Project(ORP)                            1
Parenting Group (SPOKES)                                         1
HNC (technology enhanced)                                        1
Triple-P (enhanced)                                              1
PLL                                                              1
OTH: Community Parent Education Program                          1
IY-PT + ADVANCE                                                  1
PHARM1 + PHARM2                                                  1
Family Therapy                                                   1
OTH: Booster                                                     1
SET-PC                                                           1
CBFI                                                             1
IY-CT + IY-PT                                                    1
PONI                                                             1
HNC                                                              1
OTH: Modular (nurse administered)                                1
SNAP Under 12 OutReach Project                                   1
OTH: FFT                                                         1
PMT (practitioner assisted)                                      1
RST                                                              1
Coping Power (cultural adaptation)                               1
SNAP Under 12 OutReach Project (enhanced)                        1
Triple-P (online)                                                1
OTH: Family therapy                                              1
Length: 55, dtype: int64

Extract variables of interest and merge tables

In [18]:
KQ1 = study_char[study_char.KQ=='KQ1']
In [19]:
study_varnames = ['Year', 'age_cat', 'Geographic setting', 'Age mean (years) ', 'Age SD (years)', 
              'Age min (years)', 'Age max (years)', 'Proportion Male (%)']

study_vars = KQ1[study_varnames].rename(columns={'Geographic setting': 'country', 
                           'Age mean (years) ': 'age_mean', 
                           'Age SD (years)': 'age_sd', 
                           'Age min (years)': 'age_min', 
                           'Age max (years)': 'age_max', 
                           'Proportion Male (%)': 'p_male'})
In [20]:
study_vars.head()
Out[20]:
Year age_cat country age_mean age_sd age_min age_max p_male
RefID
23 2013 1 USA 2.80 0.61 2 4 62
100 2013 2 USA 14.60 1.30 11 18 83
103 2013 1 USA 5.67 1.72 3 8 53
141 2013 0 USA 9.90 1.30 8 11 73
156 2013 2 Netherlands 16.00 1.31 12 18 73
In [21]:
study_vars.p_male.hist()
Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x10dfb4dd8>

Proportion missing

In [22]:
study_vars.isnull().mean(0).round(2)
Out[22]:
Year        0.00
age_cat     0.00
country     0.00
age_mean    0.10
age_sd      0.19
age_min     0.06
age_max     0.07
p_male      0.02
dtype: float64

Will assume the mean age for those which are missing is simply the midpoint between minimum and maximum values

In [23]:
est_means = study_vars.apply(lambda x: x.age_min + (x.age_max - x.age_min) / 2, axis=1)[study_vars.age_mean.isnull()]
study_vars.loc[study_vars.age_mean.isnull(), 'age_mean'] = est_means

study_vars.age_mean.isnull().sum()
Out[23]:
2
In [24]:
outcomes_varnames = ['Ref ID', 'Measure Instrument', 'instrument', 'subtype', 'units', 
                     'intvn', 'cc', 'pc', 'fc',
                     'BL N', 'BL Mean', 'BL SD', 
                     'EOT \nN', 'EOT Mean', 'EOT \nSD', 'Last FU Time', 'Last FU N', 
                     'Last FU Mean', 'Last FU SD', 'CS Group N', 'CS Mean', 'CS SD']
In [25]:
outcomes_vars = outcomes[outcomes_varnames].rename(columns={'Ref ID': 'RefID', 
                                                                       'Measure Instrument': 'measure_instrument',
                                                                       'cc': 'child_component',
                                                                       'pc': 'parent_component',
                                                                       'fc': 'family_component',
                                                                       'oc': 'other_component',
                                                                       'BL N': 'baseline_n',
                                                                       'BL Mean': 'baseline_mean',
                                                                       'BL SD': 'baseline_sd', 
                                                                       'EOT \nN': 'end_treat_n', 
                                                                       'EOT Mean': 'end_treat_mean', 
                                                                       'EOT \nSD': 'end_treat_sd', 
                                                                       'Last FU Time': 'followup_time', 
                                                                       'Last FU N': 'followup_n',
                                                                       'Last FU Mean': 'followup_mean', 
                                                                       'Last FU SD': 'followup_sd', 
                                                                       'CS Group N': 'change_n',
                                                                       'CS Mean': 'change_mean',
                                                                       'CS SD': 'change_sd'})

Recode intervention clasification

In [26]:
outcomes_vars.isnull().sum()
Out[26]:
RefID                   0
measure_instrument      0
instrument              0
subtype                 0
units                   0
intvn                   0
child_component         0
parent_component        0
family_component        0
baseline_n              4
baseline_mean          17
baseline_sd            25
end_treat_n            74
end_treat_mean         67
end_treat_sd           74
followup_time          32
followup_n            103
followup_mean         118
followup_sd           126
change_n              241
change_mean           248
change_sd             253
dtype: int64
In [27]:
control = ((outcomes_vars.child_component^True) & 
           (outcomes_vars.parent_component^True) & 
           (outcomes_vars.family_component^True)).astype(int)
child_only = ((outcomes_vars.child_component) & 
              (outcomes_vars.parent_component^True) & 
              (outcomes_vars.family_component^True)).astype(int)
parent_only = ((outcomes_vars.child_component^True) & 
               (outcomes_vars.parent_component) & 
               (outcomes_vars.family_component^True)).astype(int)
outcomes_vars.ix[child_only.astype(bool), ['child_component', 'parent_component', 'family_component']]
Out[27]:
child_component parent_component family_component
125 1 0 0
126 1 0 0
128 1 0 0
129 1 0 0
162 1 0 0
188 1 0 0
194 1 0 0
195 1 0 0
234 1 0 0
236 1 0 0
238 1 0 0
In [28]:
multi_component = ((parent_only^True) & (child_only^True) & (control^True)).astype(int)

outcomes_vars['child_only'] = child_only
outcomes_vars['parent_only'] = parent_only
outcomes_vars['multi_component'] = multi_component

If requested, change PCIT to parent-only.

In [29]:
pcit_as_multicomponent = True

if not pcit_as_multicomponent:
    outcomes_vars.loc[outcomes_vars.intvn=='pcit', 'parenr_only'] = 1
    outcomes_vars.loc[outcomes_vars.intvn=='pcit', 'multi_component'] = 0

Obtain subset with non-missing EOT data

In [30]:
eot_subset = outcomes_vars[outcomes_vars.end_treat_mean.notnull() & outcomes_vars.end_treat_sd.notnull()].copy()

Calculate EOT difference

In [31]:
eot_subset['eot_diff_mean'] = eot_subset.end_treat_mean - eot_subset.baseline_mean
In [32]:
eot_subset['eot_diff_sd'] = eot_subset.baseline_sd + eot_subset.end_treat_sd
In [33]:
eot_subset['eot_diff_n'] = eot_subset[['baseline_n', 'end_treat_n']].min(1)

Distribution of baseline means among outcome metrics

In [34]:
for instrument in ('Eyberg Child Behaviour Inventory', 
                   'Child Behavior Checklist', 
                   'Strengths and Difficulties Questionnaire'):
    eot_subset[eot_subset.instrument==instrument]['baseline_mean'].hist(by=eot_subset['subtype'], 
                                                                              sharex=True)
    plt.suptitle(instrument);
In [35]:
eot_subset.instrument.value_counts()
Out[35]:
Eyberg Child Behaviour Inventory            116
Child Behavior Checklist                     40
Strengths and Difficulties Questionnaire     22
dtype: int64
In [36]:
eot_subset[eot_subset.RefID==441]
Out[36]:
RefID measure_instrument instrument subtype units intvn child_component parent_component family_component baseline_n ... followup_sd change_n change_mean change_sd child_only parent_only multi_component eot_diff_mean eot_diff_sd eot_diff_n
14 441 Eyberg Child Behaviour Inventory, Intensity Su... Eyberg Child Behaviour Inventory Intensity Subscale Scale iypt 0 1 0 32 ... NaN NaN NaN NaN 0 1 0 -31.40 46.80 32
15 441 Eyberg Child Behaviour Inventory, Intensity Su... Eyberg Child Behaviour Inventory Intensity Subscale Scale wlc 0 0 0 20 ... NaN NaN NaN NaN 0 0 0 -5.80 49.60 20
16 441 Eyberg Child Behaviour Inventory, Problem Subs... Eyberg Child Behaviour Inventory Problem Subscale Scale iypt 0 1 0 24 ... NaN NaN NaN NaN 0 1 0 -9.70 12.02 24
17 441 Eyberg Child Behaviour Inventory, Problem Subs... Eyberg Child Behaviour Inventory Problem Subscale Scale wlc 0 0 0 17 ... NaN NaN NaN NaN 0 0 0 -2.88 14.59 17

4 rows × 28 columns

Several studies use multiple instruments and metrics within instruments

In [37]:
eot_subset.groupby(['RefID', 'instrument'])['subtype'].value_counts()
Out[37]:
RefID  instrument                                                  
103    Eyberg Child Behaviour Inventory          Problem Subscale      2
                                                 Intensity Subscale    2
371    Eyberg Child Behaviour Inventory          Problem Subscale      2
                                                 Intensity Subscale    2
441    Eyberg Child Behaviour Inventory          Problem Subscale      2
                                                 Intensity Subscale    2
475    Eyberg Child Behaviour Inventory          Intensity Subscale    2
539    Child Behavior Checklist                  Externalizing         2
                                                 Aggression            2
564    Eyberg Child Behaviour Inventory          Problem Subscale      2
                                                 Intensity Subscale    2
836    Strengths and Difficulties Questionnaire  Total Score           2
899    Child Behavior Checklist                  Externalizing         3
       Eyberg Child Behaviour Inventory          Problem Subscale      3
                                                 Intensity Subscale    3
...
3960   Eyberg Child Behaviour Inventory          Problem Subscale            2
7109   Eyberg Child Behaviour Inventory          Problem Subscale            2
                                                 Intensity Subscale          2
       Strengths and Difficulties Questionnaire  Emotional Symptoms Scale    2
                                                 Conduct Problems Scale      2
7723   Child Behavior Checklist                  Externalizing               2
8192   Eyberg Child Behaviour Inventory          Intensity Subscale          2
8231   Eyberg Child Behaviour Inventory          Problem Subscale            4
                                                 Intensity Subscale          4
8289   Child Behavior Checklist                  Delinquent Subscale         2
       Eyberg Child Behaviour Inventory          Problem Subscale            2
                                                 Intensity Subscale          2
8292   Child Behavior Checklist                  Externalizing               2
8401   Eyberg Child Behaviour Inventory          Problem Subscale            2
                                                 Intensity Subscale          2
Length: 68, dtype: int64
In [38]:
pd.crosstab(eot_subset.instrument, eot_subset.subtype)
Out[38]:
subtype Aggression Conduct Problems Conduct Problems Scale Delinquent Subscale Emotional Symptoms Scale Externalizing Hyperactivity Scale Impact Score Intensity Subscale Problem Subscale Total Score
instrument
Child Behavior Checklist 2 6 0 2 0 30 0 0 0 0 0
Eyberg Child Behaviour Inventory 0 0 0 0 0 0 0 0 68 48 0
Strengths and Difficulties Questionnaire 0 0 6 0 4 0 2 2 0 0 8
In [39]:
x = eot_subset[eot_subset.instrument=='Eyberg Child Behaviour Inventory']
pd.crosstab(x.instrument, x.subtype)
Out[39]:
subtype Intensity Subscale Problem Subscale
instrument
Eyberg Child Behaviour Inventory 68 48
In [40]:
x = eot_subset[eot_subset.instrument=='Child Behavior Checklist']
pd.crosstab(x.instrument, x.subtype)
Out[40]:
subtype Aggression Conduct Problems Delinquent Subscale Externalizing
instrument
Child Behavior Checklist 2 6 2 30
In [41]:
x = eot_subset[eot_subset.instrument=='Strengths and Difficulties Questionnaire']
pd.crosstab(x.instrument, x.subtype)
Out[41]:
subtype Conduct Problems Scale Emotional Symptoms Scale Hyperactivity Scale Impact Score Total Score
instrument
Strengths and Difficulties Questionnaire 6 4 2 2 8

Merge study variables and outcomes

In [42]:
merged_vars = study_vars.merge(eot_subset, left_index=True, right_on='RefID')
merged_vars.shape
Out[42]:
(176, 36)

For now, restrict to the three most prevalent metrics.

In [43]:
merged_vars.measure_instrument.value_counts()
Out[43]:
Eyberg Child Behaviour Inventory, Intensity Subscale            64
Eyberg Child Behaviour Inventory, Problem Subscale              48
Child Behavior Checklist, Externalizing (T Score)               21
Strengths and Difficulties Questionnaire, Total Score            8
Child Behavior Checklist, Externalizing                          7
Strengths and Difficulties Questionnaire- Conduct Problems Scale     6
Child Behavior Checklist, Conduct Problems (T Score)             6
Eyberg Child Behaviour Inventory, Intensity Subscale (T Score)     4
Strengths and Difficulties Questionnaire- Emotional Symptoms Scale     4
Strengths and Difficulties Questionnaire- Impact Score           2
Child Behavior Checklist, Delinquent Subscale                    2
Strengths and Difficulties Questionnaire- Hyperactivity Scale     2
Child Behavior Checklist, Aggression                             2
dtype: int64

Take only the top 3 measures

In [44]:
top_measures = 3
In [45]:
merged_vars.RefID.unique()
Out[45]:
array([ 103,  371,  441,  475,  539,  564,  836,  899,  993, 1236, 1245,
       1511, 1585, 1875, 1951, 2092, 2117, 2219, 2239, 2347, 3211, 3225,
       3399, 3495, 3687, 3716, 3766, 3915, 3960, 7109, 7723, 8192, 8231,
       8289, 8292, 8401])
In [46]:
analysis_subset = merged_vars[merged_vars.measure_instrument.isin(merged_vars.measure_instrument.value_counts().index[:top_measures])]
analysis_subset.groupby('measure_instrument')['baseline_mean'].max()
Out[46]:
measure_instrument
Child Behavior Checklist, Externalizing (T Score)        77.10
Eyberg Child Behaviour Inventory, Intensity Subscale    186.44
Eyberg Child Behaviour Inventory, Problem Subscale       28.62
Name: baseline_mean, dtype: float64
In [47]:
analysis_subset.groupby('measure_instrument').baseline_mean.hist()
Out[47]:
measure_instrument
Child Behavior Checklist, Externalizing (T Score)       Axes(0.125,0.125;0.775x0.775)
Eyberg Child Behaviour Inventory, Intensity Subscale    Axes(0.125,0.125;0.775x0.775)
Eyberg Child Behaviour Inventory, Problem Subscale      Axes(0.125,0.125;0.775x0.775)
Name: baseline_mean, dtype: object
In [48]:
analysis_subset['baseline_mean'].hist(by=analysis_subset['measure_instrument'],sharex=True)
Out[48]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x10ea27ac8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10eb54e10>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x10eba2518>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10ebd9208>]], dtype=object)
In [49]:
analysis_subset['eot_diff_mean'].hist(by=analysis_subset['measure_instrument'],sharex=True)
Out[49]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x10e5feb38>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10e62d278>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x10e025c88>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10e222e10>]], dtype=object)
In [50]:
for x in analysis_subset.measure_instrument.unique():
    plt.figure()
    analysis_subset[analysis_subset.measure_instrument==x].baseline_mean.plot(kind='kde', alpha=0.4, grid=False)
    analysis_subset[analysis_subset.measure_instrument==x].end_treat_mean.plot(kind='kde', alpha=0.4, grid=False)
    plt.gca().set_xlabel(x)

Number of TAU/Control arms in subset

In [51]:
analysis_subset.shape[0] - analysis_subset.multi_component.sum() - analysis_subset.child_only.sum() - analysis_subset.parent_only.sum()
Out[51]:
52
In [52]:
set(analysis_subset[analysis_subset.multi_component==1].RefID.values)
Out[52]:
{539, 899, 1236, 1511, 2092, 3399, 3687, 3766, 3915, 8192, 8231, 8289, 8292}
In [53]:
set(analysis_subset[analysis_subset.child_only==1].RefID.values)
Out[53]:
{1875, 3766, 8289}
In [54]:
set(analysis_subset[analysis_subset.parent_only==1].RefID.values)
Out[54]:
{103,
 371,
 441,
 564,
 1236,
 1245,
 1585,
 2117,
 2219,
 3211,
 3225,
 3495,
 3716,
 3766,
 3960,
 7109,
 7723,
 8401}
In [55]:
set(analysis_subset[(analysis_subset.parent_only==0) &
               (analysis_subset.child_only==0) &
               (analysis_subset.multi_component==0)].RefID.values)
Out[55]:
{371,
 441,
 539,
 564,
 899,
 1236,
 1511,
 1585,
 1875,
 2092,
 2117,
 2219,
 3211,
 3225,
 3399,
 3495,
 3687,
 3716,
 3766,
 3915,
 7109,
 7723,
 8192,
 8231,
 8292,
 8401}

Meta-analysis

Number of studies in analysis subset

In [56]:
unique_studies = analysis_subset.RefID.unique().tolist()
len(unique_studies)
Out[56]:
30
In [57]:
analysis_subset.RefID.unique()
Out[57]:
array([ 103,  371,  441,  539,  564,  899, 1236, 1245, 1511, 1585, 1875,
       2092, 2117, 2219, 3211, 3225, 3399, 3495, 3687, 3716, 3766, 3915,
       3960, 7109, 7723, 8192, 8231, 8289, 8292, 8401])
In [58]:
analysis_subset.groupby(['RefID', 'measure_instrument'])['measure_instrument'].count().unstack().fillna(0)
Out[58]:
measure_instrument Child Behavior Checklist, Externalizing (T Score) Eyberg Child Behaviour Inventory, Intensity Subscale Eyberg Child Behaviour Inventory, Problem Subscale
RefID
103 0 2 2
371 0 2 2
441 0 2 2
539 2 0 0
564 0 2 2
899 3 3 3
1236 0 6 6
1245 2 0 0
1511 3 0 0
1585 0 2 2
1875 3 0 0
2092 0 3 0
2117 2 0 0
2219 0 3 3
3211 0 2 2
3225 0 2 2
3399 2 0 0
3495 0 4 0
3687 0 6 6
3716 0 2 2
3766 0 9 0
3915 0 2 2
3960 0 0 2
7109 0 2 2
7723 2 0 0
8192 0 2 0
8231 0 4 4
8289 0 2 2
8292 2 0 0
8401 0 2 2

Study counts by

In [59]:
data_by_refid = analysis_subset.groupby('RefID')
data_by_refid.apply(lambda x: (x[['child_component', 'parent_component', 'multi_component']].sum()>1)).sum()
Out[59]:
child_component      9
parent_component    22
multi_component      8
dtype: int64
In [60]:
data_by_refid.apply(lambda x: (x[['child_component', 'parent_component', 'multi_component']].sum(1)==0).sum()>0).astype(int)
Out[60]:
RefID
103      0
371      1
441      1
539      1
564      1
899      1
1236     1
1245     0
1511     1
1585     1
1875     1
2092     1
2117     1
2219     1
3211     1
3225     1
3399     1
3495     1
3687     1
3716     1
3766     1
3915     1
3960     0
7109     1
7723     1
8192     1
8231     1
8289     0
8292     1
8401     1
dtype: int64
In [61]:
data_by_refid.apply(lambda x: x.age_cat.unique()[0]).value_counts()
Out[61]:
1    17
0    11
2     2
dtype: int64

We are restricting the analysis to the 4 most prevalent measure instruments in the database.

In [62]:
unique_measures = analysis_subset.measure_instrument.unique().tolist()
k = len(unique_measures)
unique_measures
Out[62]:
['Eyberg Child Behaviour Inventory, Intensity Subscale',
 'Eyberg Child Behaviour Inventory, Problem Subscale',
 'Child Behavior Checklist, Externalizing (T Score)']

Three intervention components were coded:

  • child_component
  • parent_component
  • multi_component
In [63]:
p_male, age_cat, intvn = analysis_subset[['p_male', 'age_cat', 'intvn']].values.T
child_only, parent_only, multi_component = analysis_subset[['child_only', 'parent_only', 
                                                                       'multi_component']].values.T

change_n, change_mean, change_sd = analysis_subset[['eot_diff_n', 'eot_diff_mean', 'eot_diff_sd']].values.T
In [64]:
school = (analysis_subset.age_cat.values==0).astype(int)
pre_k = (analysis_subset.age_cat.values==1).astype(int)
teen = (analysis_subset.age_cat.values==2).astype(int)

The response variable is a multivariate normal of dimension k=3, for each of the measure instruments:

  1. Eyberg Child Behaviour Inventory, Intensity Subscale
  2. Eyberg Child Behaviour Inventory, Problem Subscale
  3. Child Behavior Checklist, Externalizing (T Score)

$$\left({ \begin{array}{c} {m_1}\\ {m_2}\\ {m_3} \end{array} }\right)_i \sim\text{MVN}(\mathbf{\mu},\Sigma)$$

Means for each study are a draw from a multivariate normal.

In [65]:
wishart = False

mu = pm.Normal('mu', 0, 0.001, value=[0]*k)

if wishart:
    T = pm.Wishart('T', k+1, np.eye(k), value=np.eye(k))
    
    m = [pm.MvNormal('m_{}'.format(i), mu, T, value=[0]*k) for i in range(len(unique_studies))]
else:
    sigmas = pm.Uniform('sigmas', 0, 100, value=[10]*k)
    rhos = pm.Uniform('rhos', -1, 1, value=[0]*int(comb(k, 2)))

    Sigma = pm.Lambda('Sigma', lambda s=sigmas, r=rhos: np.array([[s[0]**2, s[0]*s[1]*r[0], s[0]*s[2]*r[1]],
                                                                 [s[0]*s[1]*r[0], s[1]**2, s[1]*s[2]*r[2]],
                                                                 [s[0]*s[2]*r[1], s[1]*s[2]*r[2], s[2]**2]]))
    
    m = [pm.MvNormalCov('m_{}'.format(i), mu, Sigma, value=[0]*k) for i in range(len(unique_studies))]

Unique intervention labels for each component; we will use these for component random effects.

In [66]:
unique_child_intvn = np.unique(intvn[child_only.astype(bool)]).tolist()
unique_parent_intvn = np.unique(intvn[parent_only.astype(bool)]).tolist()
unique_multi_intvn = np.unique(intvn[multi_component.astype(bool)]).tolist()
In [67]:
# Indices to random effect labels
child_component_index = [unique_child_intvn.index(x) for x in intvn[child_only.astype(bool)]]
parent_component_index = [unique_parent_intvn.index(x) for x in intvn[parent_only.astype(bool)]]
multi_component_index = [unique_multi_intvn.index(x) for x in intvn[multi_component.astype(bool)]]

Treatment component random effects

$$X_i = \left[{ \begin{array}{c} {x_c}\\ {x_p}\\ {x_m}\\ \end{array} }\right]_i$$

$$\begin{aligned} \beta_j^{(c)} &\sim N(\mu_{\beta}^{(c)},\tau_{\beta}^{(c)}) \\ \beta_j^{(p)} &\sim N(\mu_{\beta}^{(p)},\tau_{\beta}^{(p)}) \\ \beta_j^{(m)} &\sim N(\mu_{\beta}^{(m)},\tau_{\beta}^{(m)}) \end{aligned}$$

In [68]:
mu_beta = pm.Normal('mu_beta', 0, 0.001, value=[0]*3)
# sig_beta = pm.Uniform('sig_beta', 0, 100, value=1)
# tau_beta = sig_beta ** -2
tau_beta = pm.Gamma('tau_beta', 1, 0.1, value=1)

beta_c = pm.Normal('beta_c', mu_beta[0], tau_beta, value=[0]*len(unique_child_intvn))
beta_p = pm.Normal('beta_p', mu_beta[1], tau_beta, value=[0]*len(unique_parent_intvn))
beta_m = pm.Normal('beta_m', mu_beta[2], tau_beta, value=[0]*len(unique_multi_intvn))

b_c = pm.Lambda('b_c', lambda b=beta_c: 
             np.array([b[unique_child_intvn.index(x)] if child_only[i] else 0 for i,x in enumerate(intvn)]))
b_p = pm.Lambda('b_p', lambda b=beta_p: 
             np.array([b[unique_parent_intvn.index(x)] if parent_only[i] else 0 for i,x in enumerate(intvn)]))
b_m = pm.Lambda('b_m', lambda b=beta_m: 
             np.array([b[unique_multi_intvn.index(x)] if multi_component[i] else 0 for i,x in enumerate(intvn)]))

Calculate the probability of being the best intervention.

In [69]:
best = pm.Lambda('best', lambda b=mu_beta:  (b==b.min()).astype(int))

Interaction of parent and multi-component with pre-k children.

In [70]:
interaction = False

if interaction:
    beta_pk_p = pm.Normal('beta_pk_p', 0, 1e-5, value=0)
    beta_pk_m = pm.Normal('beta_pk_m', 0, 1e-5, value=0)
    b_pk_p = pm.Lambda('b_pk_p', lambda b=beta_pk_p: b * parent_only * pre_k)
    b_pk_m = pm.Lambda('b_pk_m', lambda b=beta_pk_m: b * multi_component * pre_k)
In [71]:
betas = b_c + b_p + b_m 

if interaction:
    betas = betas + b_pk_p + b_pk_m

Covariate effects of age and percent female.

$$\alpha \sim N(0, 1e5)$$

In [72]:
alpha_age = pm.Normal('alpha_age', 0, 1e-5, value=[1,2])

Unique study ID (RefID) and measure ID (measure_instrument) values.

In [73]:
study_id = [unique_studies.index(x) for x in analysis_subset.RefID]
measure_id = [unique_measures.index(x) for x in analysis_subset.measure_instrument]

Calculate expected response (treatment difference) as a function of treatment and covariates.

$$\theta_i = m_{j[i]k} + X_i \beta + \alpha x_{age}$$

In [74]:
baseline_sd = analysis_subset.baseline_sd.values

@pm.deterministic
def theta(m=m, betas=betas, alpha_age=alpha_age):  

    mi = [m[i][j] for i,j in zip(study_id, measure_id)]
    
    age_effect = np.array([alpha_age[a-1] if a else 0 for a in age_cat])
    
    return(mi + baseline_sd*(betas + age_effect))

Expected treatment effect for pre-K undergoing multi-component intervention, measused by Eyberg Child Behaviour Inventory, Intensity Subscale

In [75]:
if wishart:
    baseline = pm.MvNormal('baseline', mu, T, value=[0]*k)
else:
    baseline = pm.MvNormalCov('baseline', mu, Sigma, value=[0]*k)

Calculation of expected differences from baseline for each instrument.

In [76]:
# ECBI intensity subscale
ecbi_intensity_sd = baseline_sd[np.array(measure_id)==0].mean()

prek_intensity_pred = pm.Lambda('prek_intensity_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*(b + a[0]) )
school_intensity_pred = pm.Lambda('school_intensity_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*b )
teen_intensity_pred = pm.Lambda('teen_intensity_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*(b + a[1]) )

intensity_diff = pm.Lambda('intensity_diff', 
                           lambda base=baseline, prek=prek_intensity_pred, school=school_intensity_pred, 
                           teen=teen_intensity_pred: np.array([prek, school, teen])-base[0])
                            

# ECBI problem subscale
ecbi_problem_sd = baseline_sd[np.array(measure_id)==1].mean()

prek_problem_pred = pm.Lambda('prek_problem_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*(b + a[0]) )
school_problem_pred = pm.Lambda('school_problem_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*b )
teen_problem_pred = pm.Lambda('teen_problem_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*(b + a[1]) )

problem_diff = pm.Lambda('problem_diff', 
                           lambda base=baseline, prek=prek_problem_pred, school=school_problem_pred, 
                           teen=teen_problem_pred: np.array([prek, school, teen])-base[1])

# CBC T-score
cbct_sd = baseline_sd[np.array(measure_id)==2].mean()

prek_tscore_pred = pm.Lambda('prek_tscore_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[2] + cbct_sd*(b + a[0]) )
school_tscore_pred = pm.Lambda('school_tscore_pred', 
                            lambda mu=baseline, b=mu_beta: mu[2] + cbct_sd*b )
teen_tscore_pred = pm.Lambda('teen_tscore_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[2] + cbct_sd*(b + a[1]) )

tscore_diff = pm.Lambda('tscore_diff', 
                           lambda base=baseline, prek=prek_tscore_pred, school=school_tscore_pred, 
                           teen=teen_tscore_pred: np.array([prek, school, teen])-base[2])

Finally, the likelihood is just a normal distribution, with the observed standard error of the treatment effect as the standard deviation of the estimates.

$$d_i \sim N(\theta_i, \hat{\sigma}^2)$$

In [77]:
change_se = change_sd/np.sqrt(change_n)
In [78]:
d = pm.Normal('d', theta, change_se**-2, observed=True, value=change_mean)

Posterior predictive samples, if desired.

In [79]:
include_gof_samples = False

if include_gof_samples:
    d_sim = pm.Normal('d_sim', theta, change_se**-2, size=len(change_mean))

Instantiate MCMC model, and use Adaptive Metropolis for some vector-valued variables.

In [80]:
M = pm.MCMC(locals())
M.use_step_method(pm.AdaptiveMetropolis, m)
M.use_step_method(pm.AdaptiveMetropolis, mu_beta)
M.use_step_method(pm.AdaptiveMetropolis, [beta_c, beta_p, beta_m])

Run two chains to allow for convergence evaluation.

In [81]:
M.sample(200000, 190000)
 [-----------------100%-----------------] 200000 of 200000 complete in 865.3 sec
In [82]:
M.sample(200000, 190000)
 [-----------------100%-----------------] 200000 of 200000 complete in 854.2 sec

Summary of estimates of intervention components

In [83]:
if wishart:
    pm.Matplot.plot(T)
In [84]:
pm.Matplot.plot(mu_beta)
Plotting mu_beta_0
Plotting mu_beta_1
Plotting mu_beta_2
In [113]:
pm.Matplot.summary_plot?
In [114]:
pm.Matplot.summary_plot([mu_beta], custom_labels=['Child component', 'Parent component', 
                                                              'Multi-component'], chain=0, xlab="Effect size (standard deviations)")
In [85]:
pm.Matplot.summary_plot([mu_beta], custom_labels=['Child component', 'Parent component', 
                                                              'Multi-component'])
In [86]:
mu_beta.summary()
mu_beta:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-0.97            0.311            0.014            [-1.567 -0.341]
	-1.175           0.177            0.008            [-1.53  -0.835]
	-1.351           0.201            0.009            [-1.745 -0.952]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-1.599           -1.168          -0.966         -0.768        -0.356
	-1.532           -1.282          -1.17          -1.064        -0.835
	-1.756           -1.482          -1.348         -1.221        -0.961
	
In [126]:
mu_trace = M.mu_beta.trace()
In [133]:
multi_child = mu_trace[:, 2] - mu_trace[:, 0]
multi_parent = mu_trace[:, 2] - mu_trace[:, 1]
parent_child = mu_trace[:, 1] - mu_trace[:, 0]
In [142]:
'Multi-component vs child-only: mean={0:.2f}, 95% HPD=({1:.2}, {2:.2})'.format(multi_child.mean(), 
                                                                               *pm.utils.hpd(multi_child, 0.05))
Out[142]:
'Multi-component vs child-only: mean=-0.38, 95% HPD=(-1.1, 0.28)'
In [143]:
'Multi-component vs parent-only: mean={0:.2f}, 95% HPD=({1:.2}, {2:.2})'.format(multi_parent.mean(), 
                                                                               *pm.utils.hpd(multi_parent, 0.05))
Out[143]:
'Multi-component vs parent-only: mean=-0.18, 95% HPD=(-0.67, 0.32)'
In [144]:
'Parent vs child-only: mean={0:.2f}, 95% HPD=({1:.2}, {2:.2})'.format(parent_child.mean(), 
                                                                               *pm.utils.hpd(parent_child, 0.05))
Out[144]:
'Parent vs child-only: mean=-0.20, 95% HPD=(-0.92, 0.44)'
In [88]:
pm.Matplot.plot(mu_beta)
Plotting mu_beta_0
Plotting mu_beta_1
Plotting mu_beta_2
In [89]:
if interaction:
    pm.Matplot.summary_plot([beta_pk_m, beta_pk_p])

Difference means by measure instrument.

In [90]:
plt.figure(figsize=(24,4))
pm.Matplot.summary_plot([mu], custom_labels=unique_measures)
In [91]:
pm.Matplot.plot(mu)
Plotting mu_0
Plotting mu_1
Plotting mu_2
In [92]:
if not wishart:
    plt.figure(figsize=(24,4))
    pm.Matplot.summary_plot([sigmas], custom_labels=unique_measures)
In [93]:
sigmas.summary()
sigmas:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.042            0.009            0.001            [ 0.027  0.06 ]
	0.383            0.057            0.005            [ 0.273  0.489]
	0.119            0.019            0.002            [ 0.084  0.154]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	0.028            0.036           0.041          0.046         0.063
	0.287            0.341           0.382          0.418         0.517
	0.087            0.105           0.118          0.132         0.159
	
In [94]:
pm.Matplot.plot(sigmas)
Plotting sigmas_0
Plotting sigmas_1
Plotting sigmas_2
In [95]:
if not wishart:
    pm.Matplot.summary_plot([rhos], custom_labels=['rho12', 'rho13', 'rho23'])

Age effects for pre-k (top) and teen (bottom) groups, relative to pre-teen.

In [96]:
pm.Matplot.summary_plot([alpha_age], custom_labels=['pre-K', 'teen'])
In [97]:
alpha_age.summary()
alpha_age:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-0.319           0.068            0.003            [-0.453 -0.192]
	-0.035           0.223            0.013            [-0.447  0.432]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-0.451           -0.366          -0.318         -0.273        -0.186
	-0.475           -0.181          -0.029         0.11          0.413
	
In [98]:
pm.Matplot.plot(alpha_age)
Plotting alpha_age_0
Plotting alpha_age_1

Outcome Plots

In [99]:
traces = [[school_intensity_pred, prek_intensity_pred, teen_intensity_pred],
          [school_problem_pred, prek_problem_pred, teen_problem_pred],
          [school_tscore_pred, prek_tscore_pred, teen_tscore_pred]]
In [100]:
sb.set(style="white", palette="hot")
sb.despine(left=True)

colors = '#7fc97f','#beaed4','#fdc086','#386cb0'
#colors = '#a6611a','#dfc27d','#80cdc1','#018571'
# colors = ('#fef0d9','#fdcc8a','#fc8d59','#d7301f')[::-1]
# colors = sb.cubehelix_palette(4, start=2, rot=0, dark=.25, light=.75, reverse=False)

titles = ['Pre-K Children', 'School Children', 'Teenage Children']

for i,measure in enumerate(unique_measures):
    
    f, axes = plt.subplots(1, 3, figsize=(16, 6), sharex=True, sharey=True)
    measure_traces = traces[i]
    
    for j, trace in enumerate(np.array(measure_traces)[[1,0,2]]):
        
        x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
        
        c1, p1, m1 = trace.trace().T
        
        g = sb.distplot(x + c1, color=colors[0], ax=axes[j])
        sb.distplot(x + p1, color=colors[1], ax=axes[j])
        sb.distplot(x + m1, color=colors[2], ax=axes[j])
        if j:
            age_effect = alpha_age.trace()[:, j-1]
        else:
            age_effect = 0
        sb.distplot(x + baseline.trace()[:, i] + age_effect, color=colors[3], ax=axes[j]);
        g.set_title(titles[j])

        

        if not j:
            g.set_ylabel('Frequency')
            g.legend(g.lines, ['Child-only', 'Parent-only', 'Multi-component', 'Control/TAU'], loc='upper left')
        if j==1:
            g.set_xlabel(measure)
<matplotlib.figure.Figure at 0x10e499ef0>

Generate threshold proportions

In [101]:
thresholds = [127, # ECBI, intensity
              11,  # ECBI, problem
              60]  # CBC t-score

age_labels = ['school', 'pre-k', 'teen']

estimates = []

for i,measure in enumerate(unique_measures):
    
    measure_traces = traces[i]
    cutoff = thresholds[i]
    
    for j, trace in enumerate(measure_traces):
        
        x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
        
        c1, p1, m1 = trace.trace().T
        
        _child_only = ((x + c1) > cutoff).mean().round(2)
        _parent_only = ((x + p1) > cutoff).mean().round(2)
        _multi_component = ((x + m1) > cutoff).mean().round(2)
        if j:
            age_effect = alpha_age.trace()[:, j-1]
        else:
            age_effect = 0
        _control = ((x + baseline.trace()[:, i] + age_effect) > cutoff).mean().round(2)
        
        print('\n{0}, {1}'.format(measure, age_labels[j]))
        print('Child-only: {0}\nParent-only: {1}\nMulti-component: {2}\nControl/TAU: {3}'.format(_child_only, 
                                                                                                 _parent_only,
                                                                                                 _multi_component,
                                                                                                 _control))
        
        estimates.append((measure, age_labels[j], _child_only, _parent_only, _multi_component, _control))
        
Eyberg Child Behaviour Inventory, Intensity Subscale, school
Child-only: 0.65
Parent-only: 0.52
Multi-component: 0.37
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Intensity Subscale, pre-k
Child-only: 0.42
Parent-only: 0.26
Multi-component: 0.17
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Intensity Subscale, teen
Child-only: 0.61
Parent-only: 0.48
Multi-component: 0.35
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Problem Subscale, school
Child-only: 0.8
Parent-only: 0.76
Multi-component: 0.69
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, pre-k
Child-only: 0.68
Parent-only: 0.6
Multi-component: 0.42
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, teen
Child-only: 0.78
Parent-only: 0.72
Multi-component: 0.63
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), school
Child-only: 0.61
Parent-only: 0.44
Multi-component: 0.32
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), pre-k
Child-only: 0.38
Parent-only: 0.24
Multi-component: 0.17
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), teen
Child-only: 0.59
Parent-only: 0.43
Multi-component: 0.31
Control/TAU: 1.0
In [102]:
thresholds = [127, # ECBI, intensity
              11,  # ECBI, problem
              60]  # CBC t-score

age_labels = ['school', 'pre-k', 'teen']

estimates = []

for i,measure in enumerate(unique_measures):
    
    measure_traces = traces[i]
    cutoff = thresholds[i]
    
    for j, trace in enumerate(measure_traces):
        
        x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
        
        c1, p1, m1 = trace.trace().T
        
        _child_only = ((x + c1) > cutoff).mean().round(2)
        _parent_only = ((x + p1) > cutoff).mean().round(2)
        _multi_component = ((x + m1) > cutoff).mean().round(2)
        if j:
            age_effect = alpha_age.trace()[:, j-1]
        else:
            age_effect = 0
        _control = ((x + baseline.trace()[:, i] + age_effect) > cutoff).mean().round(2)
        
        print('\n{0}, {1}'.format(measure, age_labels[j]))
        print('Child-only: {0}\nParent-only: {1}\nMulti-component: {2}\nControl/TAU: {3}'.format(_child_only, 
                                                                                                 _parent_only,
                                                                                                 _multi_component,
                                                                                                 _control))
        
        estimates.append((measure, age_labels[j], _child_only, _parent_only, _multi_component, _control))
        
Eyberg Child Behaviour Inventory, Intensity Subscale, school
Child-only: 0.65
Parent-only: 0.52
Multi-component: 0.37
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Intensity Subscale, pre-k
Child-only: 0.42
Parent-only: 0.27
Multi-component: 0.17
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Intensity Subscale, teen
Child-only: 0.61
Parent-only: 0.48
Multi-component: 0.35
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Problem Subscale, school
Child-only: 0.8
Parent-only: 0.76
Multi-component: 0.7
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, pre-k
Child-only: 0.67
Parent-only: 0.6
Multi-component: 0.41
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, teen
Child-only: 0.78
Parent-only: 0.72
Multi-component: 0.64
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), school
Child-only: 0.62
Parent-only: 0.43
Multi-component: 0.32
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), pre-k
Child-only: 0.37
Parent-only: 0.24
Multi-component: 0.17
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), teen
Child-only: 0.58
Parent-only: 0.42
Multi-component: 0.3
Control/TAU: 1.0
In [103]:
estimates
Out[103]:
[('Eyberg Child Behaviour Inventory, Intensity Subscale',
  'school',
  0.65000000000000002,
  0.52000000000000002,
  0.37,
  0.93999999999999995),
 ('Eyberg Child Behaviour Inventory, Intensity Subscale',
  'pre-k',
  0.41999999999999998,
  0.27000000000000002,
  0.17000000000000001,
  0.93999999999999995),
 ('Eyberg Child Behaviour Inventory, Intensity Subscale',
  'teen',
  0.60999999999999999,
  0.47999999999999998,
  0.34999999999999998,
  0.93999999999999995),
 ('Eyberg Child Behaviour Inventory, Problem Subscale',
  'school',
  0.80000000000000004,
  0.76000000000000001,
  0.69999999999999996,
  1.0),
 ('Eyberg Child Behaviour Inventory, Problem Subscale',
  'pre-k',
  0.67000000000000004,
  0.59999999999999998,
  0.40999999999999998,
  1.0),
 ('Eyberg Child Behaviour Inventory, Problem Subscale',
  'teen',
  0.78000000000000003,
  0.71999999999999997,
  0.64000000000000001,
  1.0),
 ('Child Behavior Checklist, Externalizing (T Score)',
  'school',
  0.62,
  0.42999999999999999,
  0.32000000000000001,
  1.0),
 ('Child Behavior Checklist, Externalizing (T Score)',
  'pre-k',
  0.37,
  0.23999999999999999,
  0.17000000000000001,
  1.0),
 ('Child Behavior Checklist, Externalizing (T Score)',
  'teen',
  0.57999999999999996,
  0.41999999999999998,
  0.29999999999999999,
  1.0)]
In [104]:
estimates_filename = 'estimates%s.csv' % (bool(~pcit_as_multicomponent)*'_pcit')
np.savetxt(estimates_filename, np.array([e[2:] for e in estimates]), delimiter='\t', fmt='%1.2f')
In [105]:
pd.DataFrame(estimates, columns=['Instrument', 'Age group', 'Child-only', 
                                 'Parent-only', 'Multi-component', 'TAU/Control']).to_csv('outcome_proportions.csv')

Summary statistics for TAU/control

In [106]:
baseline.summary()
baseline:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-0.019           0.049            0.002            [-0.115  0.078]
	0.059            0.392            0.006            [-0.73   0.826]
	-0.042           0.125            0.003            [-0.293  0.199]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-0.117           -0.052          -0.018         0.013         0.076
	-0.724           -0.2            0.059          0.314         0.838
	-0.295           -0.123          -0.039         0.042         0.198
	

Summary statistics for intensity subscale

In [107]:
prek_intensity_pred.summary()
school_intensity_pred.summary()
teen_intensity_pred.summary()
prek_intensity_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-34.209          8.425            0.378          [-51.547 -17.752]
	-39.653          4.522            0.17           [-48.697 -30.677]
	-44.327          5.336            0.217          [-55.642 -34.383]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-51.141          -39.79          -34.084        -28.825       -17.233
	-48.788          -42.415         -39.653        -36.813       -30.735
	-55.089          -47.691         -44.311        -40.843       -33.694
	

school_intensity_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-25.745          8.245            0.368          [-41.605  -9.007]
	-31.189          4.691            0.219          [-40.666 -22.196]
	-35.863          5.32             0.233          [-46.322 -25.263]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-42.35           -30.995         -25.621        -20.389       -9.488
	-40.69           -34.043         -31.059        -28.239       -22.206
	-46.598          -39.328         -35.798        -32.392       -25.512
	

teen_intensity_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-26.681          9.259            0.379          [-45.921  -9.456]
	-32.126          7.459            0.402          [-47.037 -17.968]
	-36.799          6.933            0.29           [-49.991 -22.909]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-45.329          -32.785         -26.378        -20.429       -8.685
	-46.998          -37.089         -31.957        -27.115       -17.782
	-51.062          -41.391         -36.69         -32.039       -23.568
	

Summary statistics for problem subscale

In [108]:
prek_problem_pred.summary()
school_problem_pred.summary()
teen_problem_pred.summary()
prek_problem_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-7.906           2.005            0.088          [-12.062  -4.045]
	-9.175           1.122            0.04             [-11.37  -6.93]
	-10.264          1.305            0.052          [-12.854  -7.699]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-11.905          -9.234          -7.891         -6.608        -3.858
	-11.418          -9.891          -9.171         -8.441        -6.968
	-12.837          -11.115         -10.268        -9.408        -7.679
	

school_problem_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-5.934           1.963            0.086            [-9.768 -1.969]
	-7.203           1.158            0.051            [-9.447 -4.904]
	-8.292           1.301            0.055          [-10.85   -5.733]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-9.867           -7.207          -5.909         -4.66         -2.05
	-9.534           -7.93           -7.191         -6.447        -4.972
	-10.909          -9.13           -8.264         -7.423        -5.784
	

teen_problem_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-6.153           2.192            0.088          [-10.509  -1.872]
	-7.421           1.776            0.093          [-10.838  -3.886]
	-8.51            1.659            0.068          [-11.751  -5.239]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-10.544          -7.593          -6.122         -4.675        -1.89
	-10.983          -8.56           -7.381         -6.235        -4.014
	-11.91           -9.603          -8.483         -7.382        -5.341
	

Summary statistics for t scores

In [109]:
prek_tscore_pred.summary()
school_tscore_pred.summary()
teen_tscore_pred.summary()
prek_tscore_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-10.714          2.631            0.118          [-16.182  -5.616]
	-12.414          1.417            0.053          [-15.389  -9.748]
	-13.872          1.669            0.067          [-17.418 -10.761]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-16.025          -12.451         -10.679        -9.032        -5.422
	-15.294          -13.277         -12.417        -11.516       -9.616
	-17.228          -14.922         -13.87         -12.782       -10.548
	

school_tscore_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-8.072           2.576            0.115          [-13.04   -2.825]
	-9.771           1.47             0.068          [-12.629  -6.836]
	-11.23           1.664            0.072          [-14.514  -7.927]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-13.22           -9.724          -8.016         -6.399        -2.976
	-12.787          -10.671         -9.731         -8.845        -6.946
	-14.594          -12.321         -11.209        -10.134       -7.985
	

teen_tscore_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-8.364           2.892            0.119            [-14.16  -2.75]
	-10.064          2.332            0.126          [-14.784  -5.688]
	-11.523          2.167            0.091          [-15.786  -7.307]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-14.188          -10.282         -8.281         -6.419        -2.756
	-14.724          -11.611         -10.004        -8.501        -5.618
	-15.985          -12.966         -11.483        -10.044       -7.398
	

Summary statistics for differences.

Each summary corresponds to teh differences between baseline and age x component categories for each outcome. The order is:

  • child-only, pre-k
  • child-only, school
  • child-only, teen
  • parent-only, pre-k
  • parent-only, school
  • parent-only, teen
  • multi, pre-k
  • multi, school
  • multi, teen
In [110]:
intensity_diff.summary()
problem_diff.summary()
tscore_diff.summary()
intensity_diff:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-34.189          8.424            0.378          [-51.546 -17.803]
	-39.634          4.522            0.17           [-48.904 -30.894]
	-44.308          5.335            0.216          [-55.528 -34.283]
	-25.725          8.246            0.368          [-41.584  -9.037]
	-31.17           4.692            0.219          [-40.579 -22.152]
	-35.843          5.32             0.233          [-46.304 -25.268]
	-26.662          9.258            0.379          [-45.942  -9.505]
	-32.106          7.458            0.402          [-46.984 -17.922]
	-36.78           6.932            0.29           [-49.98  -22.908]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-51.106          -39.765         -34.051        -28.823       -17.234
	-48.776          -42.4           -39.643        -36.789       -30.732
	-55.092          -47.683         -44.283        -40.816       -33.669
	-42.414          -30.975         -25.619        -20.376       -9.454
	-40.653          -34.006         -31.036        -28.237       -22.162
	-46.587          -39.307         -35.765        -32.379       -25.499
	-45.335          -32.751         -26.363        -20.415       -8.654
	-46.962          -37.053         -31.935        -27.085       -17.754
	-51.027          -41.381         -36.679        -32.015       -23.538
	

problem_diff:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-7.965           1.963            0.088          [-12.009  -4.148]
	-9.234           1.053            0.04           [-11.394  -7.198]
	-10.323          1.243            0.05           [-12.937  -7.987]
	-5.993           1.921            0.086            [-9.688 -2.105]
	-7.262           1.093            0.051            [-9.454 -5.161]
	-8.351           1.239            0.054          [-10.788  -5.887]
	-6.212           2.157            0.088          [-10.704  -2.214]
	-7.48            1.738            0.094          [-10.946  -4.176]
	-8.569           1.615            0.068          [-11.644  -5.337]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-11.907          -9.264          -7.933         -6.715        -4.015
	-11.364          -9.878          -9.236         -8.571        -7.16
	-12.835          -11.109         -10.317        -9.509        -7.844
	-9.882           -7.217          -5.969         -4.747        -2.203
	-9.471           -7.923          -7.231         -6.579        -5.163
	-10.854          -9.158          -8.333         -7.544        -5.941
	-10.562          -7.63           -6.142         -4.756        -2.016
	-10.941          -8.633          -7.44          -6.31         -4.136
	-11.888          -9.641          -8.545         -7.459        -5.484
	

tscore_diff:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-10.672          2.63             0.118          [-16.09   -5.557]
	-12.371          1.411            0.053          [-15.265  -9.643]
	-13.83           1.665            0.068          [-17.333 -10.701]
	-8.03            2.574            0.115          [-12.98   -2.821]
	-9.729           1.465            0.068          [-12.666  -6.915]
	-11.188          1.661            0.073          [-14.453  -7.887]
	-8.322           2.89             0.118          [-14.341  -2.967]
	-10.022          2.328            0.125          [-14.666  -5.594]
	-11.481          2.164            0.091          [-15.601  -7.151]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-15.952          -12.412         -10.629        -8.997        -5.38
	-15.225          -13.235         -12.374        -11.483       -9.593
	-17.197          -14.884         -13.822        -12.74        -10.509
	-13.239          -9.669          -7.997         -6.36         -2.951
	-12.69           -10.615         -9.688         -8.814        -6.918
	-14.542          -12.269         -11.164        -10.107       -7.959
	-14.151          -10.223         -8.229         -6.372        -2.701
	-14.659          -11.566         -9.968         -8.454        -5.542
	-15.928          -12.917         -11.449        -9.993        -7.347