In [1]:

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sb
import numpy as np
import pandas as pd
import pymc as pm
from scipy.misc import comb

In [2]:

np.random.seed(42)

Data import and cleaning¶

In [3]:

excel_sucks = True

if excel_sucks:

    study_char = pd.read_csv("study_characteristics.csv", 
                             index_col='RefID', na_values=['-', 'NR'])
    outcomes = pd.read_csv("outcomes.csv", na_values=['ND', 'NR'])
    
else:
    
    data_file = 'DBD Data for Meta Analyses_4-7-15_withNRCTs_RAE.xlsx'

    study_char = pd.read_excel(data_file, "Study Characteristics", 
                               index_col='RefID', na_values=['-', 'NR'])
    outcomes = pd.read_excel(data_file, "Outcomes", 
                               na_values=['ND', 'NR'])

In [4]:

study_char.tail()

Out[4]:

	Pubmed ID	Rel Incl1	Rel Incl2	Rel Incl3	Rel Excl	Family	NCT	Title	Year	Author	...	Screened (N)	Randomized (N)	Analyzed (N)	Proportion Male (%)	Direction of Effect	DOE_Comments	Other comments	Drugs(s)	-	Unnamed: 46
RefID
8385	na	NaN	NaN	NaN	NaN	NaN	NaN	Reducing Adolescent Oppositional and Conduct D...	2011	Sells SP, K. W. Early and T. E. Smith	...	NaN	38	NaN	57	NaN	NaN	NaN	NaN	NaN	NaN
8401	22820873	NaN	NaN	NaN	NaN	NaN	NaN	"Tuning into Kids": reducing young children's ...	2013	Havighurst SS, K. R. Wilson, A. E. Harley, C. ...	...	92	63	54	78	NaN	NaN	NaN	NaN	NaN	NaN
NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

5 rows × 46 columns

Data cleaning

In [5]:

# Cast outcomes variables to floats
for col in ('Last FU Mean', 'Last FU SD',):
    outcomes[col] = outcomes[col].astype(float)

In [6]:

# Recode age category
study_char['age_cat'] = study_char.AgeCat.replace({'PRE-K':1, 'SCHOOL':0, 'TEEN':2})

In [7]:

# Fix data label typo
outcomes['Measure Instrument'] = outcomes['Measure Instrument'].replace({'Eyberg Child Behaviour Inventory, problem Subscale': 
                                        'Eyberg Child Behaviour Inventory, Problem Subscale'})
outcomes.Units = outcomes.Units.replace({'scale': 'Scale'})

In [8]:

# Parse followup times and convert to months
split_fut = outcomes.loc[outcomes['Last FU Time'].notnull(), 'Last FU Time'].apply(lambda x: str(x).split(' ')[:2])
fut_months = [float(time)/52.*(unit=='weeks') or float(time) for time, unit in split_fut]
outcomes.loc[outcomes['Last FU Time'].notnull(), 'Last FU Time'] = fut_months

We are assumung all CBC Externalizing values over 50 are T-scores, and those under 50 are raw scores. This recodes those observations.

In [9]:

cbce_ind = outcomes['Measure Instrument'].apply(lambda x: x.startswith('Child Behavior Checklist, Externalizing'))
under_50 = outcomes['BL Mean']<50
outcomes.loc[cbce_ind & (under_50^True), 'Measure Instrument'] = 'Child Behavior Checklist, Externalizing (T Score)'
outcomes.loc[cbce_ind & under_50, 'Measure Instrument'] = 'Child Behavior Checklist, Externalizing'

Recode measure instrument variables

In [10]:

instrument = []
subtype = []
units = []

for i,row in outcomes.iterrows():
    separator = row['Measure Instrument'].find(',')
    if separator == -1:
        separator = row['Measure Instrument'].find('-')
    instrument.append(row['Measure Instrument'][:separator])
    s = row['Measure Instrument'][separator+2:]
    paren = s.find('(')
    if paren > -1:
        subtype.append(s[:paren-1])
        units.append(s[paren+1:-1])
    else:
        subtype.append(s)
        if s.endswith('scale'):
            units.append('Scale')
        else:
            units.append('Score')
            
new_cols = pd.DataFrame({'instrument': instrument, 'subtype': subtype, 
                         'units': units}, index=outcomes.index)

In [11]:

outcomes['Measure Instrument'].value_counts()

Out[11]:

Eyberg Child Behaviour Inventory, Intensity Subscale            79
Eyberg Child Behaviour Inventory, Problem Subscale              57
Child Behavior Checklist, Externalizing (T Score)               46
Child Behavior Checklist, Externalizing                         11
Strengths and Difficulties Questionnaire, Total Score           10
Strengths and Difficulties Questionnaire- Conduct Problems Scale    10
Eyberg Child Behaviour Inventory, Intensity Subscale (T Score)    10
Child Behavior Checklist, Conduct Problems (T Score)             6
Child Behavior Checklist, Aggression                             6
Strengths and Difficulties Questionnaire- Emotional Symptoms Scale     4
Eyberg Child Behaviour Inventory, Problem Subscale (T Score)     4
Child Behavior Checklist, Delinquent Subscale                    4
Child Behavior Checklist, Conduct Problems                       2
Child Behavior Checklist, Social problems                        2
DSM IV                                                           2
Strengths and Difficulties Questionnaire- Hyperactivity Scale     2
Strengths and Difficulties Questionnaire- Impact Score           2
Child Behavior Checklist, Rulebreaking                           2
Strengths and Difficulties Questionnaire, Percent Below Clinical Cutoff     2
Childrens Global Assessment Scale                                2
dtype: int64

In [12]:

new_cols.head()

Out[12]:

	instrument	subtype	units
0	Eyberg Child Behaviour Inventory	Intensity Subscale	T Score
1	Eyberg Child Behaviour Inventory	Intensity Subscale	T Score
2	Eyberg Child Behaviour Inventory	Problem Subscale	T Score
3	Eyberg Child Behaviour Inventory	Problem Subscale	T Score
4	Child Behavior Checklist	Externalizing	Score

In [13]:

# Append new columns
outcomes = outcomes.join(new_cols)

In [14]:

outcomes.intvn.value_counts()

Out[14]:

wlc              47
tau              39
iypt             32
pcit             16
TAU              10
iyptndiyct        8
pppsd             7
WLC               7
PCIT              5
mst               5
MTP               5
modularndn        4
it                4
iyct              4
pppo              4
ppcp              4
pmtndp            3
FBT               3
snap              3
pcitc             3
pppe              3
spokes            3
PLL               3
CT                3
pmto              3
TIK               2
pcitabb           2
pmtsd             2
RST               2
projndsupport     2
setpc             2
hncte             2
pmtnds            2
hncstd            2
pmtpa             2
itpt              1
scip              1
hnc               1
coaching          1
cbt               1
modularndcomm     1
hitkashrut        1
modularndclin     1
iyptadv           1
pstnds            1
kitkashrut        1
sst               1
cpp               1
pppstd            1
mcfi              1
dtype: int64

In [15]:

outcomes['Ref ID'].unique()

Out[15]:

array([  23,  100,  103,  371,  441,  475,  539,  564,  757,  836,  870,
        899,  993,  995, 1018, 1096, 1201, 1215, 1236, 1245, 1292, 1386,
       1511, 1521, 1585, 1875, 1951, 2092, 2117, 2219, 2239, 2347, 3211,
       3225, 3397, 3399, 3495, 3687, 3716, 3766, 3915, 3960, 7109, 7723,
       8192, 8231, 8289, 8292, 8385, 8401, 7848])

Data summaries¶

Cross-tabulation of the outcome counts by measure instrument

In [16]:

pd.crosstab(outcomes['instrument'], outcomes['Outcome'])

Out[16]:

Outcome	01 Behavior, disruptive	02 Behavior, aggression	06 Behavior, fighting, destruction, violation	08 Behavior, other	11 Function, interpersonal and social
instrument
Child Behavior Checklist	65	4	4	4	2
Childrens Global Assessment Scal	0	0	0	0	2
DSM I	2	0	0	0	0
Eyberg Child Behaviour Inventory	148	0	0	2	0
Strengths and Difficulties Questionnaire	10	0	0	20	0

Distribution of age categories

In [17]:

study_char.AgeCat.value_counts()

Out[17]:

SCHOOL    44
PRE-K     31
TEEN      18
dtype: int64

Frequencies of various intervention types

In [19]:

study_char['Intervention Type'].value_counts()

Out[19]:

PHARM                                                           20
IY-PT                                                           10
MST                                                              9
PCIT                                                             7
IY-PT + IY-CT                                                    4
PMTO                                                             3
Triple P (enhanced)                                              3
BSFT                                                             3
CBT                                                              2
Triple-P (self-directed)                                         2
PCIT-ABB                                                         2
PT                                                               2
UCPP                                                             2
IY-PT (nurse led)                                                2
CPS                                                              2
OTH: Intensive treatment                                         2
OTH: Modular treatment                                           2
PMT (practitioner assisted)                                      1
PONI                                                             1
MF-PEP + TAU                                                     1
Family Therapy                                                   1
OTH: Booster                                                     1
OTH: FFT                                                         1
PLL                                                              1
IY-PT + ADVANCE                                                  1
OTH: Child only treatment                                        1
IY-CT + IY-PT                                                    1
OTH: Community Parent Education Program                          1
IY                                                               1
SNAP Under 12 OutReach Project (enhanced)                        1
RST                                                              1
OTH: Instrumental, emotional support & child management skills     1
SET-PC                                                           1
TIK                                                              1
IY-PT (brief)                                                    1
OTH: Modular (nurse administered)                                1
OTH: MTP                                                         1
Triple-P (online)                                                1
HNC                                                              1
PMT (perceptive)                                                 1
SNAP Under 12 OutReach Project                                   1
CBFI                                                             1
HNC (technology enhanced)                                        1
OTH: Project support                                             1
Coping Power (cultural adaptation)                               1
Triple-P (enhanced)                                              1
SCIP (Social cognitive (Dodge's))                                1
OTH: Modular treatment (community)                               1
IY-PT + IY-CT + IY-TT                                            1
PHARM1 + PHARM2                                                  1
OTH: Parents Plus Children's Program                             1
Parenting Group (SPOKES)                                         1
SNAP Under 12 \nOutReach Project(ORP)                            1
PCIT (modified)                                                  1
OTH: Family therapy                                              1
Length: 55, dtype: int64

Extract variables of interest and merge tables¶

In [20]:

KQ1 = study_char[study_char.KQ=='KQ1']

In [21]:

study_varnames = ['Year', 'age_cat', 'Geographic setting', 'Age mean (years) ', 'Age SD (years)', 
              'Age min (years)', 'Age max (years)', 'Proportion Male (%)', 'Design']

study_vars = KQ1[study_varnames].rename(columns={'Geographic setting': 'country', 
                           'Age mean (years) ': 'age_mean', 
                           'Age SD (years)': 'age_sd', 
                           'Age min (years)': 'age_min', 
                           'Age max (years)': 'age_max', 
                           'Proportion Male (%)': 'p_male'})

In [22]:

study_vars.head()

Out[22]:

	Year	age_cat	country	age_mean	age_sd	age_min	age_max	p_male	Design
RefID
23	2013	1	USA	2.80	0.61	2	4	62	RCT
100	2013	2	USA	14.60	1.30	11	18	83	RCT
103	2013	1	USA	5.67	1.72	3	8	53	RCT
141	2013	0	USA	9.90	1.30	8	11	73	RCT
156	2013	2	Netherlands	16.00	1.31	12	18	73	RCT

In [23]:

study_vars.p_male.hist()

Out[23]:

<matplotlib.axes._subplots.AxesSubplot at 0x10cc2d908>

Proportion missing

In [24]:

study_vars.isnull().mean(0).round(2)

Out[24]:

Year        0.00
age_cat     0.00
country     0.00
age_mean    0.10
age_sd      0.19
age_min     0.06
age_max     0.07
p_male      0.02
Design      0.00
dtype: float64

Will assume the mean age for those which are missing is simply the midpoint between minimum and maximum values

In [25]:

est_means = study_vars.apply(lambda x: x.age_min + (x.age_max - x.age_min) / 2, axis=1)[study_vars.age_mean.isnull()]
study_vars.loc[study_vars.age_mean.isnull(), 'age_mean'] = est_means

study_vars.age_mean.isnull().sum()

Out[25]:

In [26]:

outcomes_varnames = ['Ref ID', 'Measure Instrument', 'instrument', 'subtype', 'units', 
                     'intvn', 'cc', 'pc', 'fc',
                     'BL N', 'BL Mean', 'BL SD', 
                     'EOT \nN', 'EOT Mean', 'EOT \nSD', 'Last FU Time', 'Last FU N', 
                     'Last FU Mean', 'Last FU SD', 'CS Group N', 'CS Mean', 'CS SD']

In [27]:

outcomes_vars = outcomes[outcomes_varnames].rename(columns={'Ref ID': 'RefID', 
                                                                       'Measure Instrument': 'measure_instrument',
                                                                       'cc': 'child_component',
                                                                       'pc': 'parent_component',
                                                                       'fc': 'family_component',
                                                                       'oc': 'other_component',
                                                                       'BL N': 'baseline_n',
                                                                       'BL Mean': 'baseline_mean',
                                                                       'BL SD': 'baseline_sd', 
                                                                       'EOT \nN': 'end_treat_n', 
                                                                       'EOT Mean': 'end_treat_mean', 
                                                                       'EOT \nSD': 'end_treat_sd', 
                                                                       'Last FU Time': 'followup_time', 
                                                                       'Last FU N': 'followup_n',
                                                                       'Last FU Mean': 'followup_mean', 
                                                                       'Last FU SD': 'followup_sd', 
                                                                       'CS Group N': 'change_n',
                                                                       'CS Mean': 'change_mean',
                                                                       'CS SD': 'change_sd'})

Recode intervention clasification

In [28]:

outcomes_vars.isnull().sum()

Out[28]:

RefID                   0
measure_instrument      0
instrument              0
subtype                 0
units                   0
intvn                   0
child_component         0
parent_component        0
family_component        0
baseline_n              4
baseline_mean          17
baseline_sd            25
end_treat_n            74
end_treat_mean         67
end_treat_sd           74
followup_time          32
followup_n            103
followup_mean         118
followup_sd           126
change_n              251
change_mean           258
change_sd             263
dtype: int64

In [29]:

control = ((outcomes_vars.child_component^True) & 
           (outcomes_vars.parent_component^True) & 
           (outcomes_vars.family_component^True)).astype(int)
child_only = ((outcomes_vars.child_component) & 
              (outcomes_vars.parent_component^True) & 
              (outcomes_vars.family_component^True)).astype(int)
parent_only = ((outcomes_vars.child_component^True) & 
               (outcomes_vars.parent_component) & 
               (outcomes_vars.family_component^True)).astype(int)
outcomes_vars.ix[child_only.astype(bool), ['child_component', 'parent_component', 'family_component']]

Out[29]:

	child_component	parent_component	family_component
125	1	0	0
126	1	0	0
128	1	0	0
129	1	0	0
162	1	0	0
188	1	0	0
194	1	0	0
195	1	0	0
234	1	0	0
236	1	0	0
238	1	0	0

In [30]:

multi_component = ((parent_only^True) & (child_only^True) & (control^True)).astype(int)

outcomes_vars['child_only'] = child_only
outcomes_vars['parent_only'] = parent_only
outcomes_vars['multi_component'] = multi_component

If requested, change PCIT to parent-only.

In [31]:

pcit_as_multicomponent = True

if not pcit_as_multicomponent:
    outcomes_vars.loc[outcomes_vars.intvn=='pcit', 'parenr_only'] = 1
    outcomes_vars.loc[outcomes_vars.intvn=='pcit', 'multi_component'] = 0

Obtain subset with non-missing EOT data

In [32]:

eot_subset = outcomes_vars[outcomes_vars.end_treat_mean.notnull() & outcomes_vars.end_treat_sd.notnull()].copy()

Calculate EOT difference

In [33]:

eot_subset['eot_diff_mean'] = eot_subset.end_treat_mean - eot_subset.baseline_mean

In [34]:

eot_subset['eot_diff_sd'] = eot_subset.baseline_sd + eot_subset.end_treat_sd

In [35]:

eot_subset['eot_diff_n'] = eot_subset[['baseline_n', 'end_treat_n']].min(1)

Distribution of baseline means among outcome metrics

In [36]:

for instrument in ('Eyberg Child Behaviour Inventory', 
                   'Child Behavior Checklist', 
                   'Strengths and Difficulties Questionnaire'):
    eot_subset[eot_subset.instrument==instrument]['baseline_mean'].hist(by=eot_subset['subtype'], 
                                                                              sharex=True)
    plt.suptitle(instrument);

In [37]:

eot_subset.instrument.value_counts()

Out[37]:

Eyberg Child Behaviour Inventory            116
Child Behavior Checklist                     48
Strengths and Difficulties Questionnaire     22
Childrens Global Assessment Scal              2
dtype: int64

In [38]:

eot_subset.instrument.value_counts()

Out[38]:

Eyberg Child Behaviour Inventory            116
Child Behavior Checklist                     48
Strengths and Difficulties Questionnaire     22
Childrens Global Assessment Scal              2
dtype: int64

In [39]:

eot_subset[eot_subset.RefID==441]

Out[39]:

	RefID	measure_instrument	instrument	subtype	units	intvn	parent_component	baseline_n	...	followup_sd	change_n	change_mean	change_sd	parent_only	eot_diff_mean	eot_diff_sd	eot_diff_n
14	441	Eyberg Child Behaviour Inventory, Intensity Su...	Eyberg Child Behaviour Inventory	Intensity Subscale	Scale	iypt	1	32	...	NaN	NaN	NaN	NaN	1	-31.40	46.80	32
15	441	Eyberg Child Behaviour Inventory, Intensity Su...	Eyberg Child Behaviour Inventory	Intensity Subscale	Scale	wlc	0	20	...	NaN	NaN	NaN	NaN	0	-5.80	49.60	20
16	441	Eyberg Child Behaviour Inventory, Problem Subs...	Eyberg Child Behaviour Inventory	Problem Subscale	Scale	iypt	1	24	...	NaN	NaN	NaN	NaN	1	-9.70	12.02	24
17	441	Eyberg Child Behaviour Inventory, Problem Subs...	Eyberg Child Behaviour Inventory	Problem Subscale	Scale	wlc	0	17	...	NaN	NaN	NaN	NaN	0	-2.88	14.59	17

4 rows × 28 columns

Several studies use multiple instruments and metrics within instruments

In [40]:

eot_subset.groupby(['RefID', 'instrument'])['subtype'].value_counts()

Out[40]:

RefID  instrument                                                  
103    Eyberg Child Behaviour Inventory          Intensity Subscale    2
                                                 Problem Subscale      2
371    Eyberg Child Behaviour Inventory          Intensity Subscale    2
                                                 Problem Subscale      2
441    Eyberg Child Behaviour Inventory          Intensity Subscale    2
                                                 Problem Subscale      2
475    Eyberg Child Behaviour Inventory          Intensity Subscale    2
539    Child Behavior Checklist                  Aggression            2
                                                 Externalizing         2
564    Eyberg Child Behaviour Inventory          Intensity Subscale    2
                                                 Problem Subscale      2
836    Strengths and Difficulties Questionnaire  Total Score           2
899    Child Behavior Checklist                  Externalizing         3
       Eyberg Child Behaviour Inventory          Intensity Subscale    3
                                                 Problem Subscale      3
...
7723   Child Behavior Checklist          Externalizing                       2
7848   Child Behavior Checklist          Social problems                     2
                                         Delinquent Subscale                 2
                                         Aggression                          2
                                         Externalizing                       2
       Childrens Global Assessment Scal  hildrens Global Assessment Scale    2
8192   Eyberg Child Behaviour Inventory  Intensity Subscale                  2
8231   Eyberg Child Behaviour Inventory  Intensity Subscale                  4
                                         Problem Subscale                    4
8289   Child Behavior Checklist          Delinquent Subscale                 2
       Eyberg Child Behaviour Inventory  Intensity Subscale                  2
                                         Problem Subscale                    2
8292   Child Behavior Checklist          Externalizing                       2
8401   Eyberg Child Behaviour Inventory  Intensity Subscale                  2
                                         Problem Subscale                    2
Length: 73, dtype: int64

In [41]:

pd.crosstab(eot_subset.instrument, eot_subset.subtype)

Out[41]:

subtype	Aggression	Conduct Problems	Conduct Problems Scale	Delinquent Subscale	Emotional Symptoms Scale	Externalizing	Hyperactivity Scale	Impact Score	Intensity Subscale	Problem Subscale	Social problems	Total Score	hildrens Global Assessment Scale
instrument
Child Behavior Checklist	4	6	0	4	0	32	0	0	0	0	2	0	0
Childrens Global Assessment Scal	0	0	0	0	0	0	0	0	0	0	0	0	2
Eyberg Child Behaviour Inventory	0	0	0	0	0	0	0	0	68	48	0	0	0
Strengths and Difficulties Questionnaire	0	0	6	0	4	0	2	2	0	0	0	8	0

In [42]:

x = eot_subset[eot_subset.instrument=='Eyberg Child Behaviour Inventory']
pd.crosstab(x.instrument, x.subtype)

Out[42]:

subtype	Intensity Subscale	Problem Subscale
instrument
Eyberg Child Behaviour Inventory	68	48

In [43]:

x = eot_subset[eot_subset.instrument=='Child Behavior Checklist']
pd.crosstab(x.instrument, x.subtype)

Out[43]:

subtype	Aggression	Conduct Problems	Delinquent Subscale	Externalizing	Social problems
instrument
Child Behavior Checklist	4	6	4	32	2

In [44]:

x = eot_subset[eot_subset.instrument=='Strengths and Difficulties Questionnaire']
pd.crosstab(x.instrument, x.subtype)

Out[44]:

subtype	Conduct Problems Scale	Emotional Symptoms Scale	Hyperactivity Scale	Impact Score	Total Score
instrument
Strengths and Difficulties Questionnaire	6	4	2	2	8

Filter out non-RCT studies, if requested.

In [47]:

rct_only = True

rct_study_vars = study_vars[study_vars.Design=='RCT']

Merge study variables and outcomes

In [48]:

merged_vars = rct_study_vars.merge(eot_subset, left_index=True, right_on='RefID')
merged_vars.shape

Out[48]:

(156, 37)

For now, restrict to the three most prevalent metrics.

In [49]:

merged_vars.measure_instrument.value_counts()

Out[49]:

Eyberg Child Behaviour Inventory, Intensity Subscale            62
Eyberg Child Behaviour Inventory, Problem Subscale              46
Child Behavior Checklist, Externalizing (T Score)               19
Child Behavior Checklist, Externalizing                          7
Child Behavior Checklist, Conduct Problems (T Score)             6
Eyberg Child Behaviour Inventory, Intensity Subscale (T Score)     4
Strengths and Difficulties Questionnaire, Total Score            4
Child Behavior Checklist, Aggression                             2
Strengths and Difficulties Questionnaire- Conduct Problems Scale     2
Child Behavior Checklist, Delinquent Subscale                    2
Strengths and Difficulties Questionnaire- Emotional Symptoms Scale     2
dtype: int64

Take only the top 3 measures

In [50]:

top_measures = 3

In [51]:

merged_vars.RefID.unique()

Out[51]:

array([ 103,  371,  441,  475,  539,  836,  899, 1236, 1245, 1511, 1585,
       1875, 1951, 2092, 2219, 2239, 3211, 3225, 3399, 3495, 3687, 3716,
       3766, 3915, 3960, 7109, 7723, 8192, 8231, 8289, 8292, 8401])

In [53]:

analysis_subset = merged_vars[merged_vars.measure_instrument.isin(merged_vars.measure_instrument.value_counts().index[:top_measures])]
analysis_subset.groupby('measure_instrument')['baseline_mean'].max()

Out[53]:

measure_instrument
Child Behavior Checklist, Externalizing (T Score)        77.10
Eyberg Child Behaviour Inventory, Intensity Subscale    186.44
Eyberg Child Behaviour Inventory, Problem Subscale       28.62
Name: baseline_mean, dtype: float64

In [54]:

analysis_subset.groupby('measure_instrument').baseline_mean.hist()

Out[54]:

measure_instrument
Child Behavior Checklist, Externalizing (T Score)       Axes(0.125,0.125;0.775x0.775)
Eyberg Child Behaviour Inventory, Intensity Subscale    Axes(0.125,0.125;0.775x0.775)
Eyberg Child Behaviour Inventory, Problem Subscale      Axes(0.125,0.125;0.775x0.775)
Name: baseline_mean, dtype: object

In [55]:

analysis_subset['baseline_mean'].hist(by=analysis_subset['measure_instrument'],sharex=True)

Out[55]:

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x115a4ce48>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x115baf390>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x115bf5a58>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x115c2e780>]], dtype=object)

In [56]:

analysis_subset['eot_diff_mean'].hist(by=analysis_subset['measure_instrument'],sharex=True)

Out[56]:

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x115cce320>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x115637fd0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1157accc0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11567d240>]], dtype=object)

In [57]:

for x in analysis_subset.measure_instrument.unique():
    plt.figure()
    analysis_subset[analysis_subset.measure_instrument==x].baseline_mean.plot(kind='kde', alpha=0.4, grid=False)
    analysis_subset[analysis_subset.measure_instrument==x].end_treat_mean.plot(kind='kde', alpha=0.4, grid=False)
    plt.gca().set_xlabel(x)

Number of TAU/Control arms in subset

In [58]:

analysis_subset.shape[0] - analysis_subset.multi_component.sum() - analysis_subset.child_only.sum() - analysis_subset.parent_only.sum()

Out[58]:

In [59]:

set(analysis_subset[analysis_subset.multi_component==1].RefID.values)

Out[59]:

{539, 899, 1236, 1511, 2092, 3399, 3687, 3766, 3915, 8192, 8231, 8289, 8292}

In [60]:

set(analysis_subset[analysis_subset.child_only==1].RefID.values)

Out[60]:

{1875, 3766, 8289}

In [61]:

set(analysis_subset[analysis_subset.parent_only==1].RefID.values)

Out[61]:

In [62]:

set(analysis_subset[(analysis_subset.parent_only==0) &
               (analysis_subset.child_only==0) &
               (analysis_subset.multi_component==0)].RefID.values)

Out[62]:

Total arm sample sizes

In [63]:

analysis_subset.loc[analysis_subset.child_only==1, 'baseline_n'].sum()

Out[63]:

190.0

In [64]:

analysis_subset.loc[analysis_subset.parent_only==1, 'baseline_n'].sum()

Out[64]:

1540.0

In [65]:

analysis_subset.loc[analysis_subset.multi_component==1, 'baseline_n'].sum()

Out[65]:

929.0

In [66]:

analysis_subset.loc[(analysis_subset.multi_component==0) & (analysis_subset.parent_only==0) & (analysis_subset.child_only==0), 'baseline_n'].sum()

Out[66]:

1348.0

Meta-analysis¶

Number of studies in analysis subset

In [67]:

unique_studies = analysis_subset.RefID.unique().tolist()
len(unique_studies)

Out[67]:

In [68]:

analysis_subset.RefID.unique()

Out[68]:

array([ 103,  371,  441,  539,  899, 1236, 1245, 1511, 1585, 1875, 2092,
       2219, 3211, 3225, 3399, 3495, 3687, 3716, 3766, 3915, 3960, 7109,
       7723, 8192, 8231, 8289, 8292, 8401])

In [69]:

analysis_subset.groupby(['RefID', 'measure_instrument'])['measure_instrument'].count().unstack().fillna(0)

Out[69]:

measure_instrument	Child Behavior Checklist, Externalizing (T Score)	Eyberg Child Behaviour Inventory, Intensity Subscale	Eyberg Child Behaviour Inventory, Problem Subscale
RefID
103	0	2	2
371	0	2	2
441	0	2	2
539	2	0	0
899	3	3	3
1236	0	6	6
1245	2	0	0
1511	3	0	0
1585	0	2	2
1875	3	0	0
2092	0	3	0
2219	0	3	3
3211	0	2	2
3225	0	2	2
3399	2	0	0
3495	0	4	0
3687	0	6	6
3716	0	2	2
3766	0	9	0
3915	0	2	2
3960	0	0	2
7109	0	2	2
7723	2	0	0
8192	0	2	0
8231	0	4	4
8289	0	2	2
8292	2	0	0
8401	0	2	2

Study counts by

In [70]:

data_by_refid = analysis_subset.groupby('RefID')
data_by_refid.apply(lambda x: (x[['child_component', 'parent_component', 'multi_component']].sum()>1)).sum()

Out[70]:

child_component      9
parent_component    21
multi_component      8
dtype: int64

In [71]:

data_by_refid.apply(lambda x: (x[['child_component', 'parent_component', 'multi_component']].sum(1)==0).sum()>0).astype(int)

Out[71]:

RefID
103      0
371      1
441      1
539      1
899      1
1236     1
1245     0
1511     1
1585     1
1875     1
2092     1
2219     1
3211     1
3225     1
3399     1
3495     1
3687     1
3716     1
3766     1
3915     1
3960     0
7109     1
7723     1
8192     1
8231     1
8289     0
8292     1
8401     1
dtype: int64

In [72]:

data_by_refid.apply(lambda x: x.age_cat.unique()[0]).value_counts()

Out[72]:

1    16
0    10
2     2
dtype: int64

We are restricting the analysis to the 4 most prevalent measure instruments in the database.

In [73]:

unique_measures = analysis_subset.measure_instrument.unique().tolist()
k = len(unique_measures)
unique_measures

Out[73]:

['Eyberg Child Behaviour Inventory, Intensity Subscale',
 'Eyberg Child Behaviour Inventory, Problem Subscale',
 'Child Behavior Checklist, Externalizing (T Score)']

Three intervention components were coded:

child_component
parent_component
multi_component

In [74]:

p_male, age_cat, intvn = analysis_subset[['p_male', 'age_cat', 'intvn']].values.T
child_only, parent_only, multi_component = analysis_subset[['child_only', 'parent_only', 
                                                                       'multi_component']].values.T

change_n, change_mean, change_sd = analysis_subset[['eot_diff_n', 'eot_diff_mean', 'eot_diff_sd']].values.T

In [75]:

school = (analysis_subset.age_cat.values==0).astype(int)
pre_k = (analysis_subset.age_cat.values==1).astype(int)
teen = (analysis_subset.age_cat.values==2).astype(int)

The response variable is a multivariate normal of dimension k=3, for each of the measure instruments:

Eyberg Child Behaviour Inventory, Intensity Subscale
Eyberg Child Behaviour Inventory, Problem Subscale
Child Behavior Checklist, Externalizing (T Score)

$$\left({ \begin{array}{c} {m_1}\\ {m_2}\\ {m_3} \end{array} }\right)_i \sim\text{MVN}(\mathbf{\mu},\Sigma)$$

Means for each study are a draw from a multivariate normal.

In [76]:

wishart = False

mu = pm.Normal('mu', 0, 0.001, value=[0]*k)

if wishart:
    T = pm.Wishart('T', k+1, np.eye(k), value=np.eye(k))
    
    m = [pm.MvNormal('m_{}'.format(i), mu, T, value=[0]*k) for i in range(len(unique_studies))]
else:
    sigmas = pm.Uniform('sigmas', 0, 100, value=[10]*k)
    rhos = pm.Uniform('rhos', -1, 1, value=[0]*int(comb(k, 2)))

    Sigma = pm.Lambda('Sigma', lambda s=sigmas, r=rhos: np.array([[s[0]**2, s[0]*s[1]*r[0], s[0]*s[2]*r[1]],
                                                                 [s[0]*s[1]*r[0], s[1]**2, s[1]*s[2]*r[2]],
                                                                 [s[0]*s[2]*r[1], s[1]*s[2]*r[2], s[2]**2]]))
    
    m = [pm.MvNormalCov('m_{}'.format(i), mu, Sigma, value=[0]*k) for i in range(len(unique_studies))]

Unique intervention labels for each component; we will use these for component random effects.

In [77]:

unique_child_intvn = np.unique(intvn[child_only.astype(bool)]).tolist()
unique_parent_intvn = np.unique(intvn[parent_only.astype(bool)]).tolist()
unique_multi_intvn = np.unique(intvn[multi_component.astype(bool)]).tolist()

In [78]:

# Indices to random effect labels
child_component_index = [unique_child_intvn.index(x) for x in intvn[child_only.astype(bool)]]
parent_component_index = [unique_parent_intvn.index(x) for x in intvn[parent_only.astype(bool)]]
multi_component_index = [unique_multi_intvn.index(x) for x in intvn[multi_component.astype(bool)]]

Treatment component random effects

$$X_i = \left[{ \begin{array}{c} {x_c}\\ {x_p}\\ {x_m}\\ \end{array} }\right]_i$$$$\begin{aligned} \beta_j^{(c)} &\sim N(\mu_{\beta}^{(c)},\tau_{\beta}^{(c)}) \\ \beta_j^{(p)} &\sim N(\mu_{\beta}^{(p)},\tau_{\beta}^{(p)}) \\ \beta_j^{(m)} &\sim N(\mu_{\beta}^{(m)},\tau_{\beta}^{(m)}) \end{aligned}$$

In [79]:

mu_beta = pm.Normal('mu_beta', 0, 0.001, value=[0]*3)
# sig_beta = pm.Uniform('sig_beta', 0, 100, value=1)
# tau_beta = sig_beta ** -2
tau_beta = pm.Gamma('tau_beta', 1, 0.1, value=1)

beta_c = pm.Normal('beta_c', mu_beta[0], tau_beta, value=[0]*len(unique_child_intvn))
beta_p = pm.Normal('beta_p', mu_beta[1], tau_beta, value=[0]*len(unique_parent_intvn))
beta_m = pm.Normal('beta_m', mu_beta[2], tau_beta, value=[0]*len(unique_multi_intvn))

b_c = pm.Lambda('b_c', lambda b=beta_c: 
             np.array([b[unique_child_intvn.index(x)] if child_only[i] else 0 for i,x in enumerate(intvn)]))
b_p = pm.Lambda('b_p', lambda b=beta_p: 
             np.array([b[unique_parent_intvn.index(x)] if parent_only[i] else 0 for i,x in enumerate(intvn)]))
b_m = pm.Lambda('b_m', lambda b=beta_m: 
             np.array([b[unique_multi_intvn.index(x)] if multi_component[i] else 0 for i,x in enumerate(intvn)]))

Calculate the probability of being the best intervention.

In [80]:

best = pm.Lambda('best', lambda b=mu_beta:  (b==b.min()).astype(int))

Interaction of parent and multi-component with pre-k children.

In [81]:

interaction = False

if interaction:
    beta_pk_p = pm.Normal('beta_pk_p', 0, 1e-5, value=0)
    beta_pk_m = pm.Normal('beta_pk_m', 0, 1e-5, value=0)
    b_pk_p = pm.Lambda('b_pk_p', lambda b=beta_pk_p: b * parent_only * pre_k)
    b_pk_m = pm.Lambda('b_pk_m', lambda b=beta_pk_m: b * multi_component * pre_k)

In [82]:

betas = b_c + b_p + b_m 

if interaction:
    betas = betas + b_pk_p + b_pk_m

Covariate effects of age and percent female.

$$\alpha \sim N(0, 1e5)$$

In [83]:

alpha_age = pm.Normal('alpha_age', 0, 1e-5, value=[1,2])

Unique study ID (RefID) and measure ID (measure_instrument) values.

In [84]:

study_id = [unique_studies.index(x) for x in analysis_subset.RefID]
measure_id = [unique_measures.index(x) for x in analysis_subset.measure_instrument]

Calculate expected response (treatment difference) as a function of treatment and covariates.

$$\theta_i = m_{j[i]k} + X_i \beta + \alpha x_{age}$$

In [85]:

baseline_sd = analysis_subset.baseline_sd.values

@pm.deterministic
def theta(m=m, betas=betas, alpha_age=alpha_age):  

    mi = [m[i][j] for i,j in zip(study_id, measure_id)]
    
    age_effect = np.array([alpha_age[a-1] if a else 0 for a in age_cat])
    
    return(mi + baseline_sd*(betas + age_effect))

Expected treatment effect for pre-K undergoing multi-component intervention, measused by Eyberg Child Behaviour Inventory, Intensity Subscale

In [86]:

if wishart:
    baseline = pm.MvNormal('baseline', mu, T, value=[0]*k)
else:
    baseline = pm.MvNormalCov('baseline', mu, Sigma, value=[0]*k)

Calculation of expected differences from baseline for each instrument.

In [87]:

# ECBI intensity subscale
ecbi_intensity_sd = baseline_sd[np.array(measure_id)==0].mean()

prek_intensity_pred = pm.Lambda('prek_intensity_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*(b + a[0]) )
school_intensity_pred = pm.Lambda('school_intensity_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*b )
teen_intensity_pred = pm.Lambda('teen_intensity_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*(b + a[1]) )

intensity_diff = pm.Lambda('intensity_diff', 
                           lambda base=baseline, prek=prek_intensity_pred, school=school_intensity_pred, 
                           teen=teen_intensity_pred: np.array([prek, school, teen])-base[0])
                            

# ECBI problem subscale
ecbi_problem_sd = baseline_sd[np.array(measure_id)==1].mean()

prek_problem_pred = pm.Lambda('prek_problem_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*(b + a[0]) )
school_problem_pred = pm.Lambda('school_problem_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*b )
teen_problem_pred = pm.Lambda('teen_problem_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*(b + a[1]) )

problem_diff = pm.Lambda('problem_diff', 
                           lambda base=baseline, prek=prek_problem_pred, school=school_problem_pred, 
                           teen=teen_problem_pred: np.array([prek, school, teen])-base[1])

# CBC T-score
cbct_sd = baseline_sd[np.array(measure_id)==2].mean()

prek_tscore_pred = pm.Lambda('prek_tscore_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[2] + cbct_sd*(b + a[0]) )
school_tscore_pred = pm.Lambda('school_tscore_pred', 
                            lambda mu=baseline, b=mu_beta: mu[2] + cbct_sd*b )
teen_tscore_pred = pm.Lambda('teen_tscore_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[2] + cbct_sd*(b + a[1]) )

tscore_diff = pm.Lambda('tscore_diff', 
                           lambda base=baseline, prek=prek_tscore_pred, school=school_tscore_pred, 
                           teen=teen_tscore_pred: np.array([prek, school, teen])-base[2])

Finally, the likelihood is just a normal distribution, with the observed standard error of the treatment effect as the standard deviation of the estimates.

$$d_i \sim N(\theta_i, \hat{\sigma}^2)$$

In [88]:

change_se = change_sd/np.sqrt(change_n)

In [89]:

d = pm.Normal('d', theta, change_se**-2, observed=True, value=change_mean)

Posterior predictive samples, if desired.

In [90]:

include_gof_samples = False

if include_gof_samples:
    d_sim = pm.Normal('d_sim', theta, change_se**-2, size=len(change_mean))

Instantiate MCMC model, and use Adaptive Metropolis for some vector-valued variables.

In [91]:

M = pm.MCMC(locals())
M.use_step_method(pm.AdaptiveMetropolis, m)
M.use_step_method(pm.AdaptiveMetropolis, mu_beta)
M.use_step_method(pm.AdaptiveMetropolis, [beta_c, beta_p, beta_m])

Run two chains to allow for convergence evaluation.

In [92]:

M.sample(200000, 190000)

 [-----------------100%-----------------] 200000 of 200000 complete in 715.5 sec

In [93]:

M.sample(200000, 190000)

 [-----------------100%-----------------] 200000 of 200000 complete in 763.4 sec

Summary of estimates of intervention components

In [94]:

if wishart:
    pm.Matplot.plot(T)

In [95]:

pm.Matplot.plot(mu_beta)

Plotting mu_beta_0
Plotting mu_beta_1
Plotting mu_beta_2

In [97]:

pm.Matplot.summary_plot([mu_beta], custom_labels=['Child component', 'Parent component', 
                                                              'Multi-component'], chain=0, xlab="Effect size (standard deviations)")

In [98]:

pm.Matplot.summary_plot([mu_beta], custom_labels=['Child component', 'Parent component', 
                                                              'Multi-component'])

In [99]:

mu_beta.summary()

mu_beta:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-1.008           0.281            0.014            [-1.562 -0.452]
	-1.219           0.163            0.007            [-1.535 -0.909]
	-1.278           0.193            0.009            [-1.663 -0.911]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-1.553           -1.192          -1.013         -0.83         -0.439
	-1.546           -1.328          -1.219         -1.108        -0.914
	-1.635           -1.417          -1.278         -1.155        -0.876

In [100]:

mu_trace = M.mu_beta.trace()

In [101]:

multi_child = mu_trace[:, 2] - mu_trace[:, 0]
multi_parent = mu_trace[:, 2] - mu_trace[:, 1]
parent_child = mu_trace[:, 1] - mu_trace[:, 0]

In [102]:

'Multi-component vs child-only: mean={0:.2f}, 95% HPD=({1:.2}, {2:.2})'.format(multi_child.mean(), 
                                                                               *pm.utils.hpd(multi_child, 0.05))

Out[102]:

'Multi-component vs child-only: mean=-0.29, 95% HPD=(-0.87, 0.42)'

In [103]:

'Multi-component vs parent-only: mean={0:.2f}, 95% HPD=({1:.2}, {2:.2})'.format(multi_parent.mean(), 
                                                                               *pm.utils.hpd(multi_parent, 0.05))

Out[103]:

'Multi-component vs parent-only: mean=-0.06, 95% HPD=(-0.56, 0.41)'

In [104]:

'Parent vs child-only: mean={0:.2f}, 95% HPD=({1:.2}, {2:.2})'.format(parent_child.mean(), 
                                                                               *pm.utils.hpd(parent_child, 0.05))

Out[104]:

'Parent vs child-only: mean=-0.23, 95% HPD=(-0.82, 0.39)'

In [105]:

pm.Matplot.plot(mu_beta)

Plotting mu_beta_0
Plotting mu_beta_1
Plotting mu_beta_2

In [106]:

if interaction:
    pm.Matplot.summary_plot([beta_pk_m, beta_pk_p])

Difference means by measure instrument.

In [107]:

plt.figure(figsize=(24,4))
pm.Matplot.summary_plot([mu], custom_labels=unique_measures)

In [108]:

pm.Matplot.plot(mu)

Plotting mu_0
Plotting mu_1
Plotting mu_2

In [109]:

if not wishart:
    plt.figure(figsize=(24,4))
    pm.Matplot.summary_plot([sigmas], custom_labels=unique_measures)

In [110]:

sigmas.summary()

sigmas:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.074            0.013            0.001            [ 0.048  0.101]
	0.141            0.035            0.003            [ 0.086  0.203]
	0.033            0.023            0.002            [ 0.008  0.07 ]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	0.049            0.065           0.073          0.081         0.102
	0.092            0.112           0.136          0.169         0.213
	0.009            0.011           0.029          0.052         0.076

In [111]:

pm.Matplot.plot(sigmas)

Plotting sigmas_0
Plotting sigmas_1
Plotting sigmas_2

In [112]:

if not wishart:
    pm.Matplot.summary_plot([rhos], custom_labels=['rho12', 'rho13', 'rho23'])

Age effects for pre-k (top) and teen (bottom) groups, relative to pre-teen.

In [113]:

pm.Matplot.summary_plot([alpha_age], custom_labels=['pre-K', 'teen'])

In [114]:

alpha_age.summary()

alpha_age:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-0.459           0.071            0.003            [-0.591 -0.319]
	-0.019           0.222            0.012            [-0.45   0.407]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-0.598           -0.508          -0.46          -0.409        -0.323
	-0.473           -0.167          -0.011         0.136         0.389

In [115]:

pm.Matplot.plot(alpha_age)

Plotting alpha_age_0
Plotting alpha_age_1

Outcome Plots¶

In [116]:

traces = [[school_intensity_pred, prek_intensity_pred, teen_intensity_pred],
          [school_problem_pred, prek_problem_pred, teen_problem_pred],
          [school_tscore_pred, prek_tscore_pred, teen_tscore_pred]]

In [117]:

sb.set(style="white", palette="hot")
sb.despine(left=True)

colors = '#7fc97f','#beaed4','#fdc086','#386cb0'
#colors = '#a6611a','#dfc27d','#80cdc1','#018571'
# colors = ('#fef0d9','#fdcc8a','#fc8d59','#d7301f')[::-1]
# colors = sb.cubehelix_palette(4, start=2, rot=0, dark=.25, light=.75, reverse=False)

titles = ['Pre-K Children', 'School Children', 'Teenage Children']

for i,measure in enumerate(unique_measures):
    
    f, axes = plt.subplots(1, 3, figsize=(16, 6), sharex=True, sharey=True)
    measure_traces = traces[i]
    
    for j, trace in enumerate(np.array(measure_traces)[[1,0,2]]):
        
        x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
        
        c1, p1, m1 = trace.trace().T
        
        g = sb.distplot(x + c1, color=colors[0], ax=axes[j])
        sb.distplot(x + p1, color=colors[1], ax=axes[j])
        sb.distplot(x + m1, color=colors[2], ax=axes[j])
        if j:
            age_effect = alpha_age.trace()[:, j-1]
        else:
            age_effect = 0
        sb.distplot(x + baseline.trace()[:, i] + age_effect, color=colors[3], ax=axes[j]);
        g.set_title(titles[j])

        

        if not j:
            g.set_ylabel('Frequency')
            g.legend(g.lines, ['Child-only', 'Parent-only', 'Multi-component', 'Control/TAU'], loc='upper left')
        if j==1:
            g.set_xlabel(measure)

<matplotlib.figure.Figure at 0x11d8d1ba8>

Generate threshold proportions

In [118]:

thresholds = [127, # ECBI, intensity
              11,  # ECBI, problem
              60]  # CBC t-score

age_labels = ['school', 'pre-k', 'teen']

estimates = []

for i,measure in enumerate(unique_measures):
    
    measure_traces = traces[i]
    cutoff = thresholds[i]
    
    for j, trace in enumerate(measure_traces):
        
        x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
        
        c1, p1, m1 = trace.trace().T
        
        _child_only = ((x + c1) > cutoff).mean().round(2)
        _parent_only = ((x + p1) > cutoff).mean().round(2)
        _multi_component = ((x + m1) > cutoff).mean().round(2)
        if j:
            age_effect = alpha_age.trace()[:, j-1]
        else:
            age_effect = 0
        _control = ((x + baseline.trace()[:, i] + age_effect) > cutoff).mean().round(2)
        
        print('\n{0}, {1}'.format(measure, age_labels[j]))
        print('Child-only: {0}\nParent-only: {1}\nMulti-component: {2}\nControl/TAU: {3}'.format(_child_only, 
                                                                                                 _parent_only,
                                                                                                 _multi_component,
                                                                                                 _control))
        
        estimates.append((measure, age_labels[j], _child_only, _parent_only, _multi_component, _control))
        

Eyberg Child Behaviour Inventory, Intensity Subscale, school
Child-only: 0.65
Parent-only: 0.47
Multi-component: 0.42
Control/TAU: 0.95

Eyberg Child Behaviour Inventory, Intensity Subscale, pre-k
Child-only: 0.32
Parent-only: 0.16
Multi-component: 0.14
Control/TAU: 0.95

Eyberg Child Behaviour Inventory, Intensity Subscale, teen
Child-only: 0.64
Parent-only: 0.47
Multi-component: 0.43
Control/TAU: 0.95

Eyberg Child Behaviour Inventory, Problem Subscale, school
Child-only: 0.82
Parent-only: 0.79
Multi-component: 0.75
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, pre-k
Child-only: 0.63
Parent-only: 0.43
Multi-component: 0.37
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, teen
Child-only: 0.82
Parent-only: 0.74
Multi-component: 0.73
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), school
Child-only: 0.55
Parent-only: 0.35
Multi-component: 0.33
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), pre-k
Child-only: 0.27
Parent-only: 0.19
Multi-component: 0.16
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), teen
Child-only: 0.56
Parent-only: 0.39
Multi-component: 0.34
Control/TAU: 1.0

In [119]:

thresholds = [127, # ECBI, intensity
              11,  # ECBI, problem
              60]  # CBC t-score

age_labels = ['school', 'pre-k', 'teen']

estimates = []

for i,measure in enumerate(unique_measures):
    
    measure_traces = traces[i]
    cutoff = thresholds[i]
    
    for j, trace in enumerate(measure_traces):
        
        x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
        
        c1, p1, m1 = trace.trace().T
        
        _child_only = ((x + c1) > cutoff).mean().round(2)
        _parent_only = ((x + p1) > cutoff).mean().round(2)
        _multi_component = ((x + m1) > cutoff).mean().round(2)
        if j:
            age_effect = alpha_age.trace()[:, j-1]
        else:
            age_effect = 0
        _control = ((x + baseline.trace()[:, i] + age_effect) > cutoff).mean().round(2)
        
        print('\n{0}, {1}'.format(measure, age_labels[j]))
        print('Child-only: {0}\nParent-only: {1}\nMulti-component: {2}\nControl/TAU: {3}'.format(_child_only, 
                                                                                                 _parent_only,
                                                                                                 _multi_component,
                                                                                                 _control))
        
        estimates.append((measure, age_labels[j], _child_only, _parent_only, _multi_component, _control))
        

Eyberg Child Behaviour Inventory, Intensity Subscale, school
Child-only: 0.65
Parent-only: 0.47
Multi-component: 0.43
Control/TAU: 0.95

Eyberg Child Behaviour Inventory, Intensity Subscale, pre-k
Child-only: 0.32
Parent-only: 0.16
Multi-component: 0.14
Control/TAU: 0.95

Eyberg Child Behaviour Inventory, Intensity Subscale, teen
Child-only: 0.64
Parent-only: 0.47
Multi-component: 0.42
Control/TAU: 0.95

Eyberg Child Behaviour Inventory, Problem Subscale, school
Child-only: 0.83
Parent-only: 0.79
Multi-component: 0.76
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, pre-k
Child-only: 0.63
Parent-only: 0.43
Multi-component: 0.38
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, teen
Child-only: 0.82
Parent-only: 0.75
Multi-component: 0.73
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), school
Child-only: 0.56
Parent-only: 0.36
Multi-component: 0.33
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), pre-k
Child-only: 0.27
Parent-only: 0.18
Multi-component: 0.16
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), teen
Child-only: 0.56
Parent-only: 0.38
Multi-component: 0.34
Control/TAU: 1.0

In [120]:

estimates

Out[120]:

[('Eyberg Child Behaviour Inventory, Intensity Subscale',
  'school',
  0.65000000000000002,
  0.46999999999999997,
  0.42999999999999999,
  0.94999999999999996),
 ('Eyberg Child Behaviour Inventory, Intensity Subscale',
  'pre-k',
  0.32000000000000001,
  0.16,
  0.14000000000000001,
  0.94999999999999996),
 ('Eyberg Child Behaviour Inventory, Intensity Subscale',
  'teen',
  0.64000000000000001,
  0.46999999999999997,
  0.41999999999999998,
  0.94999999999999996),
 ('Eyberg Child Behaviour Inventory, Problem Subscale',
  'school',
  0.82999999999999996,
  0.79000000000000004,
  0.76000000000000001,
  1.0),
 ('Eyberg Child Behaviour Inventory, Problem Subscale',
  'pre-k',
  0.63,
  0.42999999999999999,
  0.38,
  1.0),
 ('Eyberg Child Behaviour Inventory, Problem Subscale',
  'teen',
  0.81999999999999995,
  0.75,
  0.72999999999999998,
  1.0),
 ('Child Behavior Checklist, Externalizing (T Score)',
  'school',
  0.56000000000000005,
  0.35999999999999999,
  0.33000000000000002,
  1.0),
 ('Child Behavior Checklist, Externalizing (T Score)',
  'pre-k',
  0.27000000000000002,
  0.17999999999999999,
  0.16,
  1.0),
 ('Child Behavior Checklist, Externalizing (T Score)',
  'teen',
  0.56000000000000005,
  0.38,
  0.34000000000000002,
  1.0)]

In [121]:

estimates_filename = 'estimates%s.csv' % (bool(~pcit_as_multicomponent)*'_pcit')
np.savetxt(estimates_filename, np.array([e[2:] for e in estimates]), delimiter='\t', fmt='%1.2f')

In [122]:

pd.DataFrame(estimates, columns=['Instrument', 'Age group', 'Child-only', 
                                 'Parent-only', 'Multi-component', 'TAU/Control']).to_csv('outcome_proportions.csv')

Summary statistics for TAU/control

In [123]:

baseline.summary()

baseline:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.106            0.078            0.002            [-0.042  0.266]
	0.065            0.149            0.003            [-0.235  0.363]
	-0.101           0.045            0.002            [-0.194  0.008]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-0.049           0.056           0.106          0.156         0.26
	-0.233           -0.028          0.066          0.158         0.365
	-0.198           -0.119          -0.106         -0.088        0.005

Summary statistics for intensity subscale

In [124]:

prek_intensity_pred.summary()
school_intensity_pred.summary()
teen_intensity_pred.summary()

prek_intensity_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-38.798          7.654            0.361          [-53.423 -23.57 ]
	-44.408          4.226            0.153          [-52.811 -36.223]
	-45.972          5.153            0.233          [-55.762 -35.684]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-53.404          -43.773         -38.972        -33.768       -23.516
	-52.845          -47.118         -44.368        -41.564       -36.24
	-55.535          -49.562         -46.14         -42.636       -35.276
	

school_intensity_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-26.622          7.46             0.358          [-41.301 -11.872]
	-32.232          4.316            0.195          [-40.641 -23.955]
	-33.796          5.125            0.243          [-44.096 -24.034]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-41.045          -31.509         -26.788        -21.873       -11.468
	-40.873          -35.115         -32.206        -29.289       -24.124
	-43.265          -37.468         -33.792        -30.525       -23.072
	

teen_intensity_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-27.139          8.64             0.374          [-45.08  -11.171]
	-32.749          7.31             0.372          [-47.361 -18.752]
	-34.313          6.714            0.31           [-47.575 -21.585]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-44.859          -32.913         -26.766        -21.232       -10.858
	-47.449          -37.69          -32.592        -27.607       -18.809
	-47.567          -38.791         -34.237        -29.818       -21.574

Summary statistics for problem subscale

In [125]:

prek_problem_pred.summary()
school_problem_pred.summary()
teen_problem_pred.summary()

prek_problem_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-8.918           1.773            0.083          [-12.294  -5.364]
	-10.213          0.989            0.035          [-12.219  -8.329]
	-10.574          1.195            0.054          [-12.787  -8.105]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-12.314          -10.077         -8.952         -7.761        -5.38
	-12.193          -10.844         -10.202        -9.551        -8.292
	-12.784          -11.409         -10.616        -9.801        -8.097
	

school_problem_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-6.106           1.728            0.082            [-9.419 -2.592]
	-7.402           1.008            0.045            [-9.397 -5.478]
	-7.763           1.188            0.056            [-9.951 -5.296]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-9.428           -7.239          -6.149         -4.999        -2.595
	-9.417           -8.06           -7.393         -6.71         -5.496
	-9.972           -8.609          -7.776         -6.993        -5.31
	

teen_problem_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-6.226           2.001            0.086          [-10.335  -2.464]
	-7.521           1.697            0.086          [-10.779  -4.142]
	-7.882           1.556            0.072          [-10.992  -4.964]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-10.347          -7.562          -6.152         -4.864        -2.47
	-10.946          -8.665          -7.49          -6.325        -4.279
	-10.968          -8.909          -7.856         -6.828        -4.93

Summary statistics for t scores

In [126]:

prek_tscore_pred.summary()
school_tscore_pred.summary()
teen_tscore_pred.summary()

prek_tscore_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-12.034          2.347            0.111          [-16.58   -7.423]
	-13.755          1.299            0.047          [-16.319 -11.239]
	-14.235          1.581            0.071          [-17.27  -11.095]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-16.512          -13.552         -12.086        -10.499       -7.343
	-16.346          -14.589         -13.745        -12.876       -11.256
	-17.165          -15.336         -14.288        -13.213       -10.97
	

school_tscore_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-8.299           2.288            0.11           [-12.828  -3.793]
	-10.02           1.326            0.06           [-12.634  -7.51 ]
	-10.5            1.572            0.074          [-13.661  -7.488]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-12.709          -9.797          -8.354         -6.847        -3.663
	-12.658          -10.905         -10.014        -9.114        -7.521
	-13.408          -11.635         -10.502        -9.495        -7.22
	

teen_tscore_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-8.458           2.649            0.114          [-13.921  -3.54 ]
	-10.179          2.243            0.114          [-14.44   -5.657]
	-10.659          2.058            0.095          [-14.742  -6.796]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-13.894          -10.229         -8.343         -6.654        -3.498
	-14.692          -11.692         -10.134        -8.597        -5.887
	-14.715          -12.04          -10.636        -9.275        -6.738

Summary statistics for differences.¶

Each summary corresponds to teh differences between baseline and age x component categories for each outcome. The order is:

child-only, pre-k
child-only, school
child-only, teen
parent-only, pre-k
parent-only, school
parent-only, teen
multi, pre-k
multi, school
multi, teen

In [127]:

intensity_diff.summary()
problem_diff.summary()
tscore_diff.summary()

intensity_diff:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-38.904          7.653            0.361          [-53.491 -23.645]
	-44.514          4.228            0.153          [-52.948 -36.379]
	-46.078          5.152            0.233          [-55.871 -35.808]
	-26.728          7.459            0.358          [-41.425 -11.998]
	-32.338          4.317            0.195          [-40.723 -24.11 ]
	-33.902          5.123            0.243          [-44.127 -24.158]
	-27.245          8.639            0.373          [-45.269 -11.412]
	-32.855          7.31             0.372          [-47.463 -18.871]
	-34.419          6.712            0.31           [-47.746 -21.821]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-53.471          -43.851         -39.075        -33.891       -23.623
	-52.947          -47.222         -44.477        -41.663       -36.374
	-55.632          -49.652         -46.243        -42.755       -35.423
	-41.193          -31.617         -26.883        -22.009       -11.651
	-41.008          -35.237         -32.328        -29.394       -24.251
	-43.362          -37.581         -33.897        -30.634       -23.235
	-44.983          -33.003         -26.874        -21.35        -10.999
	-47.582          -37.792         -32.694        -27.721       -18.911
	-47.679          -38.914         -34.335        -29.922       -21.638
	

problem_diff:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-8.983           1.767            0.083          [-12.351  -5.46 ]
	-10.278          0.976            0.035          [-12.226  -8.4  ]
	-10.64           1.19             0.054          [-12.901  -8.268]
	-6.172           1.722            0.083            [-9.565 -2.77 ]
	-7.467           0.997            0.045            [-9.403 -5.567]
	-7.828           1.183            0.056          [-10.189  -5.578]
	-6.291           1.995            0.086          [-10.453  -2.635]
	-7.586           1.688            0.086          [-10.959  -4.357]
	-7.947           1.55             0.072          [-11.025  -5.039]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-12.346          -10.125         -9.023         -7.826        -5.455
	-12.226          -10.904         -10.27         -9.62         -8.399
	-12.846          -11.465         -10.678        -9.872        -8.179
	-9.512           -7.3            -6.207         -5.082        -2.69
	-9.469           -8.136          -7.465         -6.787        -5.6
	-10.012          -8.678          -7.827         -7.074        -5.365
	-10.387          -7.62           -6.205         -4.93         -2.54
	-10.987          -8.726          -7.549         -6.401        -4.367
	-11.009          -8.985          -7.928         -6.909        -4.996
	

tscore_diff:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-11.933          2.347            0.111          [-16.407  -7.253]
	-13.654          1.297            0.047          [-16.241 -11.159]
	-14.134          1.58             0.071          [-17.137 -10.984]
	-8.198           2.288            0.11           [-12.706  -3.68 ]
	-9.919           1.324            0.06           [-12.491  -7.395]
	-10.399          1.572            0.075          [-13.535  -7.41 ]
	-8.357           2.65             0.115          [-13.886  -3.5  ]
	-10.078          2.242            0.114          [-14.558  -5.788]
	-10.557          2.059            0.095          [-14.645  -6.693]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-16.401          -13.451         -11.986        -10.396       -7.246
	-16.241          -14.485         -13.643        -12.78        -11.157
	-17.064          -15.23          -14.184        -13.114       -10.866
	-12.635          -9.698          -8.246         -6.751        -3.574
	-12.579          -10.808         -9.916         -9.016        -7.439
	-13.301          -11.527         -10.397        -9.397        -7.127
	-13.798          -10.123         -8.243         -6.549        -3.374
	-14.595          -11.592         -10.028        -8.503        -5.801
	-14.625          -11.936         -10.532        -9.178        -6.637

	child_component	parent_component	family_component
125	1	0	0
126	1	0	0
128	1	0	0
129	1	0	0
162	1	0	0
188	1	0	0
194	1	0	0
195	1	0	0
234	1	0	0
236	1	0	0
238	1	0	0

	child_component	parent_component	family_component
125	1	0	0
126	1	0	0
128	1	0	0
129	1	0	0
162	1	0	0
188	1	0	0
194	1	0	0
195	1	0	0
234	1	0	0
236	1	0	0
238	1	0	0

	child_component	parent_component	family_component
125	1	0	0
126	1	0	0
128	1	0	0
129	1	0	0
162	1	0	0
188	1	0	0
194	1	0	0
195	1	0	0
234	1	0	0
236	1	0	0
238	1	0	0