In [1]:

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sb
import numpy as np
import pandas as pd
import pymc as pm
from scipy.misc import comb

In [2]:

np.random.seed(42)

Data import and cleaning¶

In [3]:

study_char = pd.read_excel("DBD Data for Meta Analyses.xlsx", "Study Characteristics", 
                           index_col='RefID', na_values=['-', 'NR'])
outcomes = pd.read_excel("DBD Data for Meta Analyses.xlsx", "Outcomes", 
                           na_values=['ND', 'NR'])
#demographics = pd.read_excel("DBD Data for Meta Analyses.xlsx", "Pt Demographics", na_values=['-', 'NR'])

In [4]:

study_char.tail()

Out[4]:

	Pubmed ID	Rel Incl1	Rel Incl2	Rel Incl3	Rel Excl	Family	NCT	Title	Year	Author	...	Parent Component	Screened (N)	Randomized (N)	Analyzed (N)	Proportion Male (%)	Direction of Effect	DOE_Comments	Other comments	Drugs(s)	-
RefID
8292	na	NaN	NaN	NaN	NaN	NaN	NaN	Reciprocal skills training in the treatment of...	2000	Barrett P, C. Turner, S. Rombouts and A. Duffy	...	Both	NaN	57	57	79	NaN	NaN	RST (hospital), RST (clinic), WLC	NaN	NaN
8320	na	1867	NaN	NaN	NaN	Z-C	NaN	Differential responses of children with varyin...	2008	Wolff JC, R. W. Greene and T. H. Ollendick	...	Both	NaN	50	47	68	NaN	NaN	NaN	NaN	NaN
8365	na	NaN	NaN	NaN	NaN	NaN	NaN	Treatment of depressed mothers with disruptive...	2000	Sanders MR and M. McFarland	...	Both	167	47	47	74	NaN	NaN	NaN	NaN	NaN
8385	na	NaN	NaN	NaN	NaN	NaN	NaN	Reducing Adolescent Oppositional and Conduct D...	2011	Sells SP, K. W. Early and T. E. Smith	...	Both	NaN	38	NaN	57	NaN	NaN	NaN	NaN	NaN
8401	22820873	NaN	NaN	NaN	NaN	NaN	NaN	"Tuning into Kids": reducing young children's ...	2013	Havighurst SS, K. R. Wilson, A. E. Harley, C. ...	...	Both	92	63	54	78	NaN	NaN	NaN	NaN	NaN

5 rows × 45 columns

Data cleaning

In [5]:

# Cast outcomes variables to floats
for col in ('Last FU Mean', 'Last FU SD',):
    outcomes[col] = outcomes[col].astype(float)

In [6]:

# Recode age category
study_char['age_cat'] = study_char.AgeCat.replace({'PRE-K':1, 'SCHOOL':0, 'TEEN':2})

In [7]:

# Fix data label typo
outcomes['Measure Instrument'] = outcomes['Measure Instrument'].replace({'Eyberg Child Behaviour Inventory, problem Subscale': 
                                        'Eyberg Child Behaviour Inventory, Problem Subscale'})
outcomes.Units = outcomes.Units.replace({'scale': 'Scale'})

In [8]:

# Parse followup times and convert to months
split_fut = outcomes.loc[outcomes['Last FU Time'].notnull(), 'Last FU Time'].apply(lambda x: str(x).split(' ')[:2])
fut_months = [float(time)/52.*(unit=='weeks') or float(time) for time, unit in split_fut]
outcomes.loc[outcomes['Last FU Time'].notnull(), 'Last FU Time'] = fut_months

We are assumung all CBC Externalizing values over 50 are T-scores, and those under 50 are raw scores. This recodes those observations.

In [9]:

cbce_ind = outcomes['Measure Instrument'].apply(lambda x: x.startswith('Child Behavior Checklist, Externalizing'))
under_50 = outcomes['BL Mean']<50
outcomes.loc[cbce_ind & (under_50^True), 'Measure Instrument'] = 'Child Behavior Checklist, Externalizing (T Score)'
outcomes.loc[cbce_ind & under_50, 'Measure Instrument'] = 'Child Behavior Checklist, Externalizing'

Recode measure instrument variables

In [10]:

instrument = []
subtype = []
units = []

for i,row in outcomes.iterrows():
    separator = row['Measure Instrument'].find(',')
    if separator == -1:
        separator = row['Measure Instrument'].find('-')
    instrument.append(row['Measure Instrument'][:separator])
    s = row['Measure Instrument'][separator+2:]
    paren = s.find('(')
    if paren > -1:
        subtype.append(s[:paren-1])
        units.append(s[paren+1:-1])
    else:
        subtype.append(s)
        if s.endswith('scale'):
            units.append('Scale')
        else:
            units.append('Score')
            
new_cols = pd.DataFrame({'instrument': instrument, 'subtype': subtype, 
                         'units': units}, index=outcomes.index)

In [11]:

outcomes['Measure Instrument'].value_counts()

Out[11]:

Eyberg Child Behaviour Inventory, Intensity Subscale            79
Eyberg Child Behaviour Inventory, Problem Subscale              57
Child Behavior Checklist, Externalizing (T Score)               44
Child Behavior Checklist, Externalizing                         11
Strengths and Difficulties Questionnaire, Total Score           10
Strengths and Difficulties Questionnaire- Conduct Problems Scale    10
Eyberg Child Behaviour Inventory, Intensity Subscale (T Score)    10
Child Behavior Checklist, Conduct Problems (T Score)             6
Child Behavior Checklist, Aggression                             4
Eyberg Child Behaviour Inventory, Problem Subscale (T Score)     4
Strengths and Difficulties Questionnaire- Emotional Symptoms Scale     4
DSM IV                                                           2
Child Behavior Checklist, Rulebreaking                           2
Strengths and Difficulties Questionnaire- Impact Score           2
Child Behavior Checklist, Conduct Problems                       2
Strengths and Difficulties Questionnaire- Hyperactivity Scale     2
Child Behavior Checklist, Delinquent Subscale                    2
Strengths and Difficulties Questionnaire, Percent Below Clinical Cutoff     2
dtype: int64

In [12]:

new_cols.head()

Out[12]:

	instrument	subtype	units
0	Eyberg Child Behaviour Inventory	Intensity Subscale	T Score
1	Eyberg Child Behaviour Inventory	Intensity Subscale	T Score
2	Eyberg Child Behaviour Inventory	Problem Subscale	T Score
3	Eyberg Child Behaviour Inventory	Problem Subscale	T Score
4	Child Behavior Checklist	Externalizing	Score

In [13]:

# Append new columns
outcomes = outcomes.join(new_cols)

In [14]:

outcomes.intvn.value_counts()

Out[14]:

wlc              47
tau              39
iypt             32
pcit             16
iyptndiyct        8
WLC               7
pppsd             7
PCIT              5
TAU               5
mst               5
modularndn        4
ppcp              4
iyct              4
it                4
pppo              4
pppe              3
spokes            3
pmto              3
pmtndp            3
snap              3
pcitc             3
CT                3
PLL               3
FBT               3
pmtsd             2
pmtnds            2
RST               2
hncte             2
pmtpa             2
pcitabb           2
TIK               2
projndsupport     2
hncstd            2
setpc             2
scip              1
iyptadv           1
cbt               1
pppstd            1
hnc               1
mcfi              1
pstnds            1
modularndcomm     1
sst               1
modularndclin     1
itpt              1
hitkashrut        1
coaching          1
kitkashrut        1
cpp               1
dtype: int64

Data summaries¶

Cross-tabulation of the outcome counts by measure instrument

In [15]:

pd.crosstab(outcomes['instrument'], outcomes['Outcome'])

Out[15]:

Outcome	01 Behavior, disruptive	02 Behavior, aggression	06 Behavior, fighting, destruction, violation	08 Behavior, other
instrument
Child Behavior Checklist	59	4	4	4
DSM I	2	0	0	0
Eyberg Child Behaviour Inventory	148	0	0	2
Strengths and Difficulties Questionnaire	10	0	0	20

Distribution of age categories

In [16]:

study_char.AgeCat.value_counts()

Out[16]:

SCHOOL    44
PRE-K     31
TEEN      18
dtype: int64

Frequencies of various intervention types

In [17]:

study_char['Intervention Type'].value_counts()

Out[17]:

PHARM                                                           20
IY-PT                                                           10
MST                                                              9
PCIT                                                             7
IY-PT + IY-CT                                                    4
BSFT                                                             3
Triple P (enhanced)                                              3
PMTO                                                             3
Triple-P (self-directed)                                         2
PCIT-ABB                                                         2
CPS                                                              2
OTH: Intensive treatment                                         2
UCPP                                                             2
OTH: Modular treatment                                           2
IY-PT (nurse led)                                                2
PT                                                               2
CBT                                                              2
OTH: Family therapy                                              1
IY-CT + IY-PT                                                    1
PMT (perceptive)                                                 1
Triple-P (enhanced)                                              1
IY-PT + ADVANCE                                                  1
PMT (practitioner assisted)                                      1
TIK                                                              1
OTH: Modular treatment (community)                               1
OTH: Project support                                             1
PLL                                                              1
CBFI                                                             1
HNC                                                              1
OTH: Instrumental, emotional support & child management skills     1
IY                                                               1
PONI                                                             1
SNAP Under 12 OutReach Project (enhanced)                        1
Coping Power (cultural adaptation)                               1
Triple-P (online)                                                1
PHARM1 + PHARM2                                                  1
OTH: MTP                                                         1
IY-PT (brief)                                                    1
SET-PC                                                           1
OTH: Booster                                                     1
SCIP (Social cognitive (Dodge's))                                1
SNAP Under 12 \nOutReach Project(ORP)                            1
Parenting Group (SPOKES)                                         1
OTH: Child only treatment                                        1
MF-PEP + TAU                                                     1
IY-PT + IY-CT + IY-TT                                            1
HNC (technology enhanced)                                        1
Family Therapy                                                   1
OTH: FFT                                                         1
SNAP Under 12 OutReach Project                                   1
PCIT (modified)                                                  1
OTH: Modular (nurse administered)                                1
OTH: Parents Plus Children's Program                             1
OTH: Community Parent Education Program                          1
RST                                                              1
Length: 55, dtype: int64

Extract variables of interest and merge tables¶

In [18]:

KQ1 = study_char[study_char.KQ=='KQ1']

In [19]:

study_varnames = ['Year', 'age_cat', 'Geographic setting', 'Age mean (years) ', 'Age SD (years)', 
              'Age min (years)', 'Age max (years)', 'Proportion Male (%)']

study_vars = KQ1[study_varnames].rename(columns={'Geographic setting': 'country', 
                           'Age mean (years) ': 'age_mean', 
                           'Age SD (years)': 'age_sd', 
                           'Age min (years)': 'age_min', 
                           'Age max (years)': 'age_max', 
                           'Proportion Male (%)': 'p_male'})

In [20]:

study_vars.head()

Out[20]:

	Year	age_cat	country	age_mean	age_sd	age_min	age_max	p_male
RefID
23	2013	1	USA	2.80	0.61	2	4	62
100	2013	2	USA	14.60	1.30	11	18	83
103	2013	1	USA	5.67	1.72	3	8	53
141	2013	0	USA	9.90	1.30	8	11	73
156	2013	2	Netherlands	16.00	1.31	12	18	73

In [21]:

study_vars.p_male.hist()

Out[21]:

<matplotlib.axes._subplots.AxesSubplot at 0x1166bbc50>

Proportion missing

In [22]:

study_vars.isnull().mean(0).round(2)

Out[22]:

Year        0.00
age_cat     0.00
country     0.00
age_mean    0.10
age_sd      0.19
age_min     0.06
age_max     0.07
p_male      0.02
dtype: float64

Will assume the mean age for those which are missing is simply the midpoint between minimum and maximum values

In [23]:

est_means = study_vars.apply(lambda x: x.age_min + (x.age_max - x.age_min) / 2, axis=1)[study_vars.age_mean.isnull()]
study_vars.loc[study_vars.age_mean.isnull(), 'age_mean'] = est_means

study_vars.age_mean.isnull().sum()

Out[23]:

In [24]:

outcomes_varnames = ['Ref ID', 'Measure Instrument', 'instrument', 'subtype', 'units', 
                     'intvn', 'cc', 'pc', 'fc',
                     'BL N', 'BL Mean', 'BL SD', 
                     'EOT \nN', 'EOT Mean', 'EOT \nSD', 'Last FU Time', 'Last FU N', 
                     'Last FU Mean', 'Last FU SD', 'CS Group N', 'CS Mean', 'CS SD']

In [25]:

outcomes_vars = outcomes[outcomes_varnames].rename(columns={'Ref ID': 'RefID', 
                                                                       'Measure Instrument': 'measure_instrument',
                                                                       'cc': 'child_component',
                                                                       'pc': 'parent_component',
                                                                       'fc': 'family_component',
                                                                       'oc': 'other_component',
                                                                       'BL N': 'baseline_n',
                                                                       'BL Mean': 'baseline_mean',
                                                                       'BL SD': 'baseline_sd', 
                                                                       'EOT \nN': 'end_treat_n', 
                                                                       'EOT Mean': 'end_treat_mean', 
                                                                       'EOT \nSD': 'end_treat_sd', 
                                                                       'Last FU Time': 'followup_time', 
                                                                       'Last FU N': 'followup_n',
                                                                       'Last FU Mean': 'followup_mean', 
                                                                       'Last FU SD': 'followup_sd', 
                                                                       'CS Group N': 'change_n',
                                                                       'CS Mean': 'change_mean',
                                                                       'CS SD': 'change_sd'})

Recode intervention clasification

In [26]:

outcomes_vars.isnull().sum()

Out[26]:

RefID                   0
measure_instrument      0
instrument              0
subtype                 0
units                   0
intvn                   0
child_component         0
parent_component        0
family_component        0
baseline_n              4
baseline_mean          17
baseline_sd            25
end_treat_n            74
end_treat_mean         67
end_treat_sd           74
followup_time          32
followup_n            103
followup_mean         118
followup_sd           126
change_n              241
change_mean           248
change_sd             253
dtype: int64

In [27]:

control = ((outcomes_vars.child_component^True) & 
           (outcomes_vars.parent_component^True) & 
           (outcomes_vars.family_component^True)).astype(int)
child_only = ((outcomes_vars.child_component) & 
              (outcomes_vars.parent_component^True) & 
              (outcomes_vars.family_component^True)).astype(int)
parent_only = ((outcomes_vars.child_component^True) & 
               (outcomes_vars.parent_component) & 
               (outcomes_vars.family_component^True)).astype(int)
outcomes_vars.ix[child_only.astype(bool), ['child_component', 'parent_component', 'family_component']]

Out[27]:

	child_component	parent_component	family_component
125	1	0	0
126	1	0	0
128	1	0	0
129	1	0	0
162	1	0	0
188	1	0	0
194	1	0	0
195	1	0	0
234	1	0	0
236	1	0	0
238	1	0	0

In [28]:

multi_component = ((parent_only^True) & (child_only^True) & (control^True)).astype(int)

outcomes_vars['child_only'] = child_only
outcomes_vars['parent_only'] = parent_only
outcomes_vars['multi_component'] = multi_component

If requested, change PCIT to parent-only.

In [137]:

pcit_as_multicomponent = True

if not pcit_as_multicomponent:
    outcomes_vars.loc[outcomes_vars.intvn=='pcit', 'parenr_only'] = 1
    outcomes_vars.loc[outcomes_vars.intvn=='pcit', 'multi_component'] = 0

Out[137]:

	RefID	measure_instrument	instrument	subtype	units	intvn	child_component	parent_component	family_component	baseline_n	...	followup_time	followup_n	followup_mean	followup_sd	change_n	change_mean	change_sd	multi_component
40	870	Child Behavior Checklist, Externalizing (T Score)	Child Behavior Checklist	Externalizing	T Score	pcit	1	1	1	11	...	4	11	47.90	6.10	NaN	NaN	NaN	1
42	870	Eyberg Child Behaviour Inventory, Intensity Su...	Eyberg Child Behaviour Inventory	Intensity Subscale	T Score	pcit	1	1	1	11	...	4	11	43.00	4.30	NaN	NaN	NaN	1
44	870	Eyberg Child Behaviour Inventory, Problem Subs...	Eyberg Child Behaviour Inventory	Problem Subscale	T Score	pcit	1	1	1	11	...	4	11	45.60	5.50	NaN	NaN	NaN	1
46	899	Child Behavior Checklist, Externalizing (T Score)	Child Behavior Checklist	Externalizing	T Score	pcit	1	1	1	19	...	24	15	53.33	13.47	NaN	NaN	NaN	1
49	899	Eyberg Child Behaviour Inventory, Intensity Su...	Eyberg Child Behaviour Inventory	Intensity Subscale	Scale	pcit	1	1	1	19	...	24	15	100.93	45.33	NaN	NaN	NaN	1
52	899	Eyberg Child Behaviour Inventory, Problem Subs...	Eyberg Child Behaviour Inventory	Problem Subscale	Scale	pcit	1	1	1	19	...	24	15	14.47	19.10	NaN	NaN	NaN	1
131	2092	Child Behavior Checklist, Externalizing	Child Behavior Checklist	Externalizing	Score	pcit	1	1	1	17	...	6	17	15.24	7.77	NaN	NaN	NaN	1
134	2092	Eyberg Child Behaviour Inventory, Intensity Su...	Eyberg Child Behaviour Inventory	Intensity Subscale	Scale	pcit	1	1	1	17	...	6	17	117.47	31.69	NaN	NaN	NaN	1
170	3687	Eyberg Child Behaviour Inventory, Intensity Su...	Eyberg Child Behaviour Inventory	Intensity Subscale	Scale	pcit	1	1	1	37	...	12	19	126.30	42.10	NaN	NaN	NaN	1
171	3687	Eyberg Child Behaviour Inventory, Intensity Su...	Eyberg Child Behaviour Inventory	Intensity Subscale	Scale	pcit	1	1	1	37	...	12	19	NaN	NaN	NaN	NaN	NaN	1
172	3687	Eyberg Child Behaviour Inventory, Intensity Su...	Eyberg Child Behaviour Inventory	Intensity Subscale	Scale	pcit	1	1	1	37	...	12	19	NaN	NaN	NaN	NaN	NaN	1
176	3687	Eyberg Child Behaviour Inventory, Problem Subs...	Eyberg Child Behaviour Inventory	Problem Subscale	Scale	pcit	1	1	1	37	...	12	19	11.50	9.70	NaN	NaN	NaN	1
177	3687	Eyberg Child Behaviour Inventory, Problem Subs...	Eyberg Child Behaviour Inventory	Problem Subscale	Scale	pcit	1	1	1	37	...	12	19	NaN	NaN	NaN	NaN	NaN	1
178	3687	Eyberg Child Behaviour Inventory, Problem Subs...	Eyberg Child Behaviour Inventory	Problem Subscale	Scale	pcit	1	1	1	37	...	12	19	NaN	NaN	NaN	NaN	NaN	1
201	3915	Eyberg Child Behaviour Inventory, Intensity Su...	Eyberg Child Behaviour Inventory	Intensity Subscale	Scale	pcit	1	1	1	19	...	12	NaN	NaN	NaN	NaN	NaN	NaN	1
203	3915	Eyberg Child Behaviour Inventory, Problem Subs...	Eyberg Child Behaviour Inventory	Problem Subscale	Scale	pcit	1	1	1	19	...	12	NaN	NaN	NaN	NaN	NaN	NaN	1

16 rows × 25 columns

Obtain subset with non-missing EOT data

In [29]:

eot_subset = outcomes_vars[outcomes_vars.end_treat_mean.notnull() & outcomes_vars.end_treat_sd.notnull()].copy()

Calculate EOT difference

In [30]:

eot_subset['eot_diff_mean'] = eot_subset.end_treat_mean - eot_subset.baseline_mean

In [31]:

eot_subset['eot_diff_sd'] = eot_subset.baseline_sd + eot_subset.end_treat_sd

In [32]:

eot_subset['eot_diff_n'] = eot_subset[['baseline_n', 'end_treat_n']].min(1)

Distribution of baseline means among outcome metrics

In [33]:

for instrument in ('Eyberg Child Behaviour Inventory', 
                   'Child Behavior Checklist', 
                   'Strengths and Difficulties Questionnaire'):
    eot_subset[eot_subset.instrument==instrument]['baseline_mean'].hist(by=eot_subset['subtype'], 
                                                                              sharex=True)
    plt.suptitle(instrument);

In [34]:

eot_subset.instrument.value_counts()

Out[34]:

Eyberg Child Behaviour Inventory            116
Child Behavior Checklist                     40
Strengths and Difficulties Questionnaire     22
dtype: int64

In [35]:

eot_subset[eot_subset.RefID==441]

Out[35]:

	RefID	measure_instrument	instrument	subtype	units	intvn	parent_component	baseline_n	...	followup_sd	change_n	change_mean	change_sd	parent_only	eot_diff_mean	eot_diff_sd	eot_diff_n
14	441	Eyberg Child Behaviour Inventory, Intensity Su...	Eyberg Child Behaviour Inventory	Intensity Subscale	Scale	iypt	1	32	...	NaN	NaN	NaN	NaN	1	-31.40	46.80	32
15	441	Eyberg Child Behaviour Inventory, Intensity Su...	Eyberg Child Behaviour Inventory	Intensity Subscale	Scale	wlc	0	20	...	NaN	NaN	NaN	NaN	0	-5.80	49.60	20
16	441	Eyberg Child Behaviour Inventory, Problem Subs...	Eyberg Child Behaviour Inventory	Problem Subscale	Scale	iypt	1	24	...	NaN	NaN	NaN	NaN	1	-9.70	12.02	24
17	441	Eyberg Child Behaviour Inventory, Problem Subs...	Eyberg Child Behaviour Inventory	Problem Subscale	Scale	wlc	0	17	...	NaN	NaN	NaN	NaN	0	-2.88	14.59	17

4 rows × 28 columns

Several studies use multiple instruments and metrics within instruments

In [36]:

eot_subset.groupby(['RefID', 'instrument'])['subtype'].value_counts()

Out[36]:

RefID  instrument                                                  
103    Eyberg Child Behaviour Inventory          Problem Subscale      2
                                                 Intensity Subscale    2
371    Eyberg Child Behaviour Inventory          Problem Subscale      2
                                                 Intensity Subscale    2
441    Eyberg Child Behaviour Inventory          Problem Subscale      2
                                                 Intensity Subscale    2
475    Eyberg Child Behaviour Inventory          Intensity Subscale    2
539    Child Behavior Checklist                  Externalizing         2
                                                 Aggression            2
564    Eyberg Child Behaviour Inventory          Problem Subscale      2
                                                 Intensity Subscale    2
836    Strengths and Difficulties Questionnaire  Total Score           2
899    Child Behavior Checklist                  Externalizing         3
       Eyberg Child Behaviour Inventory          Problem Subscale      3
                                                 Intensity Subscale    3
...
3960   Eyberg Child Behaviour Inventory          Problem Subscale            2
7109   Eyberg Child Behaviour Inventory          Problem Subscale            2
                                                 Intensity Subscale          2
       Strengths and Difficulties Questionnaire  Conduct Problems Scale      2
                                                 Emotional Symptoms Scale    2
7723   Child Behavior Checklist                  Externalizing               2
8192   Eyberg Child Behaviour Inventory          Intensity Subscale          2
8231   Eyberg Child Behaviour Inventory          Problem Subscale            4
                                                 Intensity Subscale          4
8289   Child Behavior Checklist                  Delinquent Subscale         2
       Eyberg Child Behaviour Inventory          Problem Subscale            2
                                                 Intensity Subscale          2
8292   Child Behavior Checklist                  Externalizing               2
8401   Eyberg Child Behaviour Inventory          Problem Subscale            2
                                                 Intensity Subscale          2
Length: 68, dtype: int64

In [37]:

pd.crosstab(eot_subset.instrument, eot_subset.subtype)

Out[37]:

subtype	Aggression	Conduct Problems	Conduct Problems Scale	Delinquent Subscale	Emotional Symptoms Scale	Externalizing	Hyperactivity Scale	Impact Score	Intensity Subscale	Problem Subscale	Total Score
instrument
Child Behavior Checklist	2	6	0	2	0	30	0	0	0	0	0
Eyberg Child Behaviour Inventory	0	0	0	0	0	0	0	0	68	48	0
Strengths and Difficulties Questionnaire	0	0	6	0	4	0	2	2	0	0	8

In [38]:

x = eot_subset[eot_subset.instrument=='Eyberg Child Behaviour Inventory']
pd.crosstab(x.instrument, x.subtype)

Out[38]:

subtype	Intensity Subscale	Problem Subscale
instrument
Eyberg Child Behaviour Inventory	68	48

In [39]:

x = eot_subset[eot_subset.instrument=='Child Behavior Checklist']
pd.crosstab(x.instrument, x.subtype)

Out[39]:

subtype	Aggression	Conduct Problems	Delinquent Subscale	Externalizing
instrument
Child Behavior Checklist	2	6	2	30

In [40]:

x = eot_subset[eot_subset.instrument=='Strengths and Difficulties Questionnaire']
pd.crosstab(x.instrument, x.subtype)

Out[40]:

subtype	Conduct Problems Scale	Emotional Symptoms Scale	Hyperactivity Scale	Impact Score	Total Score
instrument
Strengths and Difficulties Questionnaire	6	4	2	2	8

Merge study variables and outcomes

In [41]:

merged_vars = study_vars.merge(eot_subset, left_index=True, right_on='RefID')
merged_vars.shape

Out[41]:

(176, 36)

For now, restrict to the three most prevalent metrics.

In [42]:

merged_vars.measure_instrument.value_counts()

Out[42]:

Eyberg Child Behaviour Inventory, Intensity Subscale            64
Eyberg Child Behaviour Inventory, Problem Subscale              48
Child Behavior Checklist, Externalizing (T Score)               21
Strengths and Difficulties Questionnaire, Total Score            8
Child Behavior Checklist, Externalizing                          7
Child Behavior Checklist, Conduct Problems (T Score)             6
Strengths and Difficulties Questionnaire- Conduct Problems Scale     6
Eyberg Child Behaviour Inventory, Intensity Subscale (T Score)     4
Strengths and Difficulties Questionnaire- Emotional Symptoms Scale     4
Strengths and Difficulties Questionnaire- Hyperactivity Scale     2
Strengths and Difficulties Questionnaire- Impact Score           2
Child Behavior Checklist, Delinquent Subscale                    2
Child Behavior Checklist, Aggression                             2
dtype: int64

Take only the top 3 measures

In [43]:

top_measures = 3

In [44]:

merged_vars.RefID.unique()

Out[44]:

array([ 103,  371,  441,  475,  539,  564,  836,  899,  993, 1236, 1245,
       1511, 1585, 1875, 1951, 2092, 2117, 2219, 2239, 2347, 3211, 3225,
       3399, 3495, 3687, 3716, 3766, 3915, 3960, 7109, 7723, 8192, 8231,
       8289, 8292, 8401])

In [45]:

analysis_subset = merged_vars[merged_vars.measure_instrument.isin(merged_vars.measure_instrument.value_counts().index[:top_measures])]
analysis_subset.groupby('measure_instrument')['baseline_mean'].max()

Out[45]:

measure_instrument
Child Behavior Checklist, Externalizing (T Score)        77.10
Eyberg Child Behaviour Inventory, Intensity Subscale    186.44
Eyberg Child Behaviour Inventory, Problem Subscale       28.62
Name: baseline_mean, dtype: float64

In [46]:

analysis_subset.groupby('measure_instrument').baseline_mean.hist()

Out[46]:

measure_instrument
Child Behavior Checklist, Externalizing (T Score)       Axes(0.125,0.125;0.775x0.775)
Eyberg Child Behaviour Inventory, Intensity Subscale    Axes(0.125,0.125;0.775x0.775)
Eyberg Child Behaviour Inventory, Problem Subscale      Axes(0.125,0.125;0.775x0.775)
Name: baseline_mean, dtype: object

In [47]:

analysis_subset['baseline_mean'].hist(by=analysis_subset['measure_instrument'],sharex=True)

Out[47]:

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x116fe2550>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x117105e80>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1171536a0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1171892e8>]], dtype=object)

In [48]:

analysis_subset['eot_diff_mean'].hist(by=analysis_subset['measure_instrument'],sharex=True)

Out[48]:

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x116a82860>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x116e75940>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1165d53c8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1169e3588>]], dtype=object)

In [49]:

for x in analysis_subset.measure_instrument.unique():
    plt.figure()
    analysis_subset[analysis_subset.measure_instrument==x].baseline_mean.plot(kind='kde', alpha=0.4, grid=False)
    analysis_subset[analysis_subset.measure_instrument==x].end_treat_mean.plot(kind='kde', alpha=0.4, grid=False)
    plt.gca().set_xlabel(x)

Meta-analysis¶

Number of studies in analysis subset

In [50]:

unique_studies = analysis_subset.RefID.unique().tolist()
len(unique_studies)

Out[50]:

In [51]:

analysis_subset.RefID.unique()

Out[51]:

array([ 103,  371,  441,  539,  564,  899, 1236, 1245, 1511, 1585, 1875,
       2092, 2117, 2219, 3211, 3225, 3399, 3495, 3687, 3716, 3766, 3915,
       3960, 7109, 7723, 8192, 8231, 8289, 8292, 8401])

In [52]:

analysis_subset.groupby(['RefID', 'measure_instrument'])['measure_instrument'].count().unstack().fillna(0)

Out[52]:

measure_instrument	Child Behavior Checklist, Externalizing (T Score)	Eyberg Child Behaviour Inventory, Intensity Subscale	Eyberg Child Behaviour Inventory, Problem Subscale
RefID
103	0	2	2
371	0	2	2
441	0	2	2
539	2	0	0
564	0	2	2
899	3	3	3
1236	0	6	6
1245	2	0	0
1511	3	0	0
1585	0	2	2
1875	3	0	0
2092	0	3	0
2117	2	0	0
2219	0	3	3
3211	0	2	2
3225	0	2	2
3399	2	0	0
3495	0	4	0
3687	0	6	6
3716	0	2	2
3766	0	9	0
3915	0	2	2
3960	0	0	2
7109	0	2	2
7723	2	0	0
8192	0	2	0
8231	0	4	4
8289	0	2	2
8292	2	0	0
8401	0	2	2

Study counts by

In [54]:

data_by_refid = analysis_subset.groupby('RefID')
data_by_refid.apply(lambda x: (x[['child_component', 'parent_component', 'multi_component']].sum()>1)).sum()

Out[54]:

child_component      9
parent_component    22
multi_component      8
dtype: int64

In [55]:

data_by_refid.apply(lambda x: (x[['child_component', 'parent_component', 'multi_component']].sum(1)==0).sum()>0).astype(int)

Out[55]:

RefID
103      0
371      1
441      1
539      1
564      1
899      1
1236     1
1245     0
1511     1
1585     1
1875     1
2092     1
2117     1
2219     1
3211     1
3225     1
3399     1
3495     1
3687     1
3716     1
3766     1
3915     1
3960     0
7109     1
7723     1
8192     1
8231     1
8289     0
8292     1
8401     1
dtype: int64

In [56]:

data_by_refid.apply(lambda x: x.age_cat.unique()[0]).value_counts()

Out[56]:

1    17
0    11
2     2
dtype: int64

We are restricting the analysis to the 4 most prevalent measure instruments in the database.

In [57]:

unique_measures = analysis_subset.measure_instrument.unique().tolist()
k = len(unique_measures)
unique_measures

Out[57]:

['Eyberg Child Behaviour Inventory, Intensity Subscale',
 'Eyberg Child Behaviour Inventory, Problem Subscale',
 'Child Behavior Checklist, Externalizing (T Score)']

Three intervention components were coded:

child_component
parent_component
multi_component

In [58]:

p_male, age_cat, intvn = analysis_subset[['p_male', 'age_cat', 'intvn']].values.T
child_only, parent_only, multi_component = analysis_subset[['child_only', 'parent_only', 
                                                                       'multi_component']].values.T

change_n, change_mean, change_sd = analysis_subset[['eot_diff_n', 'eot_diff_mean', 'eot_diff_sd']].values.T

In [59]:

school = (analysis_subset.age_cat.values==0).astype(int)
pre_k = (analysis_subset.age_cat.values==1).astype(int)
teen = (analysis_subset.age_cat.values==2).astype(int)

The response variable is a multivariate normal of dimension k=3, for each of the measure instruments:

Eyberg Child Behaviour Inventory, Intensity Subscale
Eyberg Child Behaviour Inventory, Problem Subscale
Child Behavior Checklist, Externalizing (T Score)

$$\left({ \begin{array}{c} {m_1}\\ {m_2}\\ {m_3} \end{array} }\right)_i \sim\text{MVN}(\mathbf{\mu},\Sigma)$$

Means for each study are a draw from a multivariate normal.

In [60]:

wishart = False

mu = pm.Normal('mu', 0, 0.001, value=[0]*k)

if wishart:
    T = pm.Wishart('T', k+1, np.eye(k), value=np.eye(k))
    
    m = [pm.MvNormal('m_{}'.format(i), mu, T, value=[0]*k) for i in range(len(unique_studies))]
else:
    sigmas = pm.Uniform('sigmas', 0, 100, value=[10]*k)
    rhos = pm.Uniform('rhos', -1, 1, value=[0]*int(comb(k, 2)))

    Sigma = pm.Lambda('Sigma', lambda s=sigmas, r=rhos: np.array([[s[0]**2, s[0]*s[1]*r[0], s[0]*s[2]*r[1]],
                                                                 [s[0]*s[1]*r[0], s[1]**2, s[1]*s[2]*r[2]],
                                                                 [s[0]*s[2]*r[1], s[1]*s[2]*r[2], s[2]**2]]))
    
    m = [pm.MvNormalCov('m_{}'.format(i), mu, Sigma, value=[0]*k) for i in range(len(unique_studies))]

Unique intervention labels for each component; we will use these for component random effects.

In [61]:

unique_child_intvn = np.unique(intvn[child_only.astype(bool)]).tolist()
unique_parent_intvn = np.unique(intvn[parent_only.astype(bool)]).tolist()
unique_multi_intvn = np.unique(intvn[multi_component.astype(bool)]).tolist()

In [62]:

# Indices to random effect labels
child_component_index = [unique_child_intvn.index(x) for x in intvn[child_only.astype(bool)]]
parent_component_index = [unique_parent_intvn.index(x) for x in intvn[parent_only.astype(bool)]]
multi_component_index = [unique_multi_intvn.index(x) for x in intvn[multi_component.astype(bool)]]

Treatment component random effects

$$X_i = \left[{ \begin{array}{c} {x_c}\\ {x_p}\\ {x_m}\\ \end{array} }\right]_i$$$$\begin{aligned} \beta_j^{(c)} &\sim N(\mu_{\beta}^{(c)},\tau_{\beta}^{(c)}) \\ \beta_j^{(p)} &\sim N(\mu_{\beta}^{(p)},\tau_{\beta}^{(p)}) \\ \beta_j^{(m)} &\sim N(\mu_{\beta}^{(m)},\tau_{\beta}^{(m)}) \end{aligned}$$

In [63]:

mu_beta = pm.Normal('mu_beta', 0, 0.001, value=[0]*3)
# sig_beta = pm.Uniform('sig_beta', 0, 100, value=1)
# tau_beta = sig_beta ** -2
tau_beta = pm.Gamma('tau_beta', 1, 0.1, value=1)

beta_c = pm.Normal('beta_c', mu_beta[0], tau_beta, value=[0]*len(unique_child_intvn))
beta_p = pm.Normal('beta_p', mu_beta[1], tau_beta, value=[0]*len(unique_parent_intvn))
beta_m = pm.Normal('beta_m', mu_beta[2], tau_beta, value=[0]*len(unique_multi_intvn))

b_c = pm.Lambda('b_c', lambda b=beta_c: 
             np.array([b[unique_child_intvn.index(x)] if child_only[i] else 0 for i,x in enumerate(intvn)]))
b_p = pm.Lambda('b_p', lambda b=beta_p: 
             np.array([b[unique_parent_intvn.index(x)] if parent_only[i] else 0 for i,x in enumerate(intvn)]))
b_m = pm.Lambda('b_m', lambda b=beta_m: 
             np.array([b[unique_multi_intvn.index(x)] if multi_component[i] else 0 for i,x in enumerate(intvn)]))

Calculate the probability of being the best intervention.

In [64]:

best = pm.Lambda('best', lambda b=mu_beta:  (b==b.min()).astype(int))

Interaction of parent and multi-component with pre-k children.

In [65]:

interaction = False

if interaction:
    beta_pk_p = pm.Normal('beta_pk_p', 0, 1e-5, value=0)
    beta_pk_m = pm.Normal('beta_pk_m', 0, 1e-5, value=0)
    b_pk_p = pm.Lambda('b_pk_p', lambda b=beta_pk_p: b * parent_only * pre_k)
    b_pk_m = pm.Lambda('b_pk_m', lambda b=beta_pk_m: b * multi_component * pre_k)

In [66]:

betas = b_c + b_p + b_m 

if interaction:
    betas = betas + b_pk_p + b_pk_m

Covariate effects of age and percent female.

$$\alpha \sim N(0, 1e5)$$

In [67]:

alpha_age = pm.Normal('alpha_age', 0, 1e-5, value=[1,2])

Unique study ID (RefID) and measure ID (measure_instrument) values.

In [68]:

study_id = [unique_studies.index(x) for x in analysis_subset.RefID]
measure_id = [unique_measures.index(x) for x in analysis_subset.measure_instrument]

Calculate expected response (treatment difference) as a function of treatment and covariates.

$$\theta_i = m_{j[i]k} + X_i \beta + \alpha x_{age}$$

In [69]:

baseline_sd = analysis_subset.baseline_sd.values

@pm.deterministic
def theta(m=m, betas=betas, alpha_age=alpha_age):  

    mi = [m[i][j] for i,j in zip(study_id, measure_id)]
    
    age_effect = np.array([alpha_age[a-1] if a else 0 for a in age_cat])
    
    return(mi + baseline_sd*(betas + age_effect))

Expected treatment effect for pre-K undergoing multi-component intervention, measused by Eyberg Child Behaviour Inventory, Intensity Subscale

In [70]:

if wishart:
    baseline = pm.MvNormal('baseline', mu, T, value=[0]*k)
else:
    baseline = pm.MvNormalCov('baseline', mu, Sigma, value=[0]*k)

Calculation of expected differences from baseline for each instrument.

In [71]:

# ECBI intensity subscale
ecbi_intensity_sd = baseline_sd[np.array(measure_id)==0].mean()

prek_intensity_pred = pm.Lambda('prek_intensity_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*(b + a[0]) )
school_intensity_pred = pm.Lambda('school_intensity_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*b )
teen_intensity_pred = pm.Lambda('teen_intensity_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*(b + a[1]) )

intensity_diff = pm.Lambda('intensity_diff', 
                           lambda base=baseline, prek=prek_intensity_pred, school=school_intensity_pred, 
                           teen=teen_intensity_pred: np.array([prek, school, teen])-base[0])
                            

# ECBI problem subscale
ecbi_problem_sd = baseline_sd[np.array(measure_id)==1].mean()

prek_problem_pred = pm.Lambda('prek_problem_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*(b + a[0]) )
school_problem_pred = pm.Lambda('school_problem_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*b )
teen_problem_pred = pm.Lambda('teen_problem_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*(b + a[1]) )

problem_diff = pm.Lambda('problem_diff', 
                           lambda base=baseline, prek=prek_problem_pred, school=school_problem_pred, 
                           teen=teen_problem_pred: np.array([prek, school, teen])-base[1])

# CBC T-score
cbct_sd = baseline_sd[np.array(measure_id)==2].mean()

prek_tscore_pred = pm.Lambda('prek_tscore_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[2] + cbct_sd*(b + a[0]) )
school_tscore_pred = pm.Lambda('school_tscore_pred', 
                            lambda mu=baseline, b=mu_beta: mu[2] + cbct_sd*b )
teen_tscore_pred = pm.Lambda('teen_tscore_pred', 
                            lambda mu=baseline, a=alpha_age, b=mu_beta: mu[2] + cbct_sd*(b + a[1]) )

tscore_diff = pm.Lambda('tscore_diff', 
                           lambda base=baseline, prek=prek_tscore_pred, school=school_tscore_pred, 
                           teen=teen_tscore_pred: np.array([prek, school, teen])-base[2])

Finally, the likelihood is just a normal distribution, with the observed standard error of the treatment effect as the standard deviation of the estimates.

$$d_i \sim N(\theta_i, \hat{\sigma}^2)$$

In [72]:

change_se = change_sd/np.sqrt(change_n)

In [73]:

d = pm.Normal('d', theta, change_se**-2, observed=True, value=change_mean)

Posterior predictive samples, if desired.

In [74]:

include_gof_samples = False

if include_gof_samples:
    d_sim = pm.Normal('d_sim', theta, change_se**-2, size=len(change_mean))

Instantiate MCMC model, and use Adaptive Metropolis for some vector-valued variables.

In [75]:

M = pm.MCMC(locals())
M.use_step_method(pm.AdaptiveMetropolis, m)
M.use_step_method(pm.AdaptiveMetropolis, mu_beta)
M.use_step_method(pm.AdaptiveMetropolis, [beta_c, beta_p, beta_m])

Run two chains to allow for convergence evaluation.

In [76]:

M.sample(100000, 90000)

 [-----------------100%-----------------] 100000 of 100000 complete in 823.3 sec

In [107]:

M.sample(100000, 90000)

 [-----------------100%-----------------] 100000 of 100000 complete in 511.4 sec

Summary of estimates of intervention components

In [108]:

if wishart:
    pm.Matplot.plot(T)

In [109]:

pm.Matplot.plot(mu_beta)

Plotting mu_beta_0
Plotting mu_beta_1
Plotting mu_beta_2

In [110]:

pm.Matplot.summary_plot([mu_beta], custom_labels=['Child component', 'Parent component', 
                                                              'Multi-component'])

In [111]:

mu_beta.summary()

mu_beta:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-1.02            0.303            0.013            [-1.594 -0.434]
	-1.208           0.178            0.008            [-1.54  -0.859]
	-1.335           0.204            0.01             [-1.709 -0.929]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-1.599           -1.225          -1.019         -0.821        -0.435
	-1.56            -1.327          -1.202         -1.092        -0.873
	-1.708           -1.473          -1.339         -1.2          -0.926

In [112]:

best.summary()

best:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.139            0.346            0.011                  [ 0.  1.]
	0.249            0.433            0.015                  [ 0.  1.]
	0.612            0.487            0.02                   [ 0.  1.]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	0.0              0.0             0.0            0.0           1.0
	0.0              0.0             0.0            0.0           1.0
	0.0              0.0             1.0            1.0           1.0

In [113]:

pm.Matplot.plot(mu_beta)

Plotting mu_beta_0
Plotting mu_beta_1
Plotting mu_beta_2

In [114]:

if interaction:
    pm.Matplot.summary_plot([beta_pk_m, beta_pk_p])

Difference means by measure instrument.

In [115]:

plt.figure(figsize=(24,4))
pm.Matplot.summary_plot([mu], custom_labels=unique_measures)

In [116]:

pm.Matplot.plot(mu)

Plotting mu_0
Plotting mu_1
Plotting mu_2

In [117]:

if not wishart:
    plt.figure(figsize=(24,4))
    pm.Matplot.summary_plot([sigmas], custom_labels=unique_measures)

In [118]:

sigmas.summary()

sigmas:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.283            0.043            0.003            [ 0.205  0.371]
	0.086            0.022            0.002            [ 0.05   0.132]
	0.072            0.029            0.003              [ 0.03  0.13]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	0.21             0.253           0.279          0.31          0.38
	0.052            0.07            0.083          0.1           0.137
	0.033            0.05            0.065          0.093         0.138

In [119]:

pm.Matplot.plot(sigmas)

Plotting sigmas_0
Plotting sigmas_1
Plotting sigmas_2

In [120]:

if not wishart:
    pm.Matplot.summary_plot([rhos], custom_labels=['rho12', 'rho13', 'rho23'])

Age effects for pre-k (top) and teen (bottom) groups, relative to pre-teen.

In [121]:

pm.Matplot.summary_plot([alpha_age], custom_labels=['pre-K', 'teen'])

In [122]:

alpha_age.summary()

alpha_age:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-0.312           0.069            0.003            [-0.445 -0.181]
	-0.052           0.236            0.013            [-0.504  0.415]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-0.445           -0.359          -0.31          -0.264        -0.18
	-0.52            -0.21           -0.04          0.107         0.406

In [123]:

pm.Matplot.plot(alpha_age)

Plotting alpha_age_0
Plotting alpha_age_1

Outcome Plots¶

In [124]:

traces = [[school_intensity_pred, prek_intensity_pred, teen_intensity_pred],
          [school_problem_pred, prek_problem_pred, teen_problem_pred],
          [school_tscore_pred, prek_tscore_pred, teen_tscore_pred]]

In [125]:

sb.set(style="white", palette="hot")
sb.despine(left=True)

colors = '#7fc97f','#beaed4','#fdc086','#386cb0'
#colors = '#a6611a','#dfc27d','#80cdc1','#018571'
# colors = ('#fef0d9','#fdcc8a','#fc8d59','#d7301f')[::-1]
# colors = sb.cubehelix_palette(4, start=2, rot=0, dark=.25, light=.75, reverse=False)

titles = ['Pre-K Children', 'School Children', 'Teenage Children']

for i,measure in enumerate(unique_measures):
    
    f, axes = plt.subplots(1, 3, figsize=(16, 6), sharex=True, sharey=True)
    measure_traces = traces[i]
    
    for j, trace in enumerate(np.array(measure_traces)[[1,0,2]]):
        
        x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
        
        c1, p1, m1 = trace.trace().T
        
        g = sb.distplot(x + c1, color=colors[0], ax=axes[j])
        sb.distplot(x + p1, color=colors[1], ax=axes[j])
        sb.distplot(x + m1, color=colors[2], ax=axes[j])
        if j:
            age_effect = alpha_age.trace()[:, j-1]
        else:
            age_effect = 0
        sb.distplot(x + baseline.trace()[:, i] + age_effect, color=colors[3], ax=axes[j]);
        g.set_title(titles[j])

        

        if not j:
            g.set_ylabel('Frequency')
            g.legend(g.lines, ['Child-only', 'Parent-only', 'Multi-component', 'Control/TAU'], loc='upper left')
        if j==1:
            g.set_xlabel(measure)

<matplotlib.figure.Figure at 0x11d4991d0>

Generate threshold proportions

In [126]:

thresholds = [127, # ECBI, intensity
              11,  # ECBI, problem
              60]  # CBC t-score

age_labels = ['school', 'pre-k', 'teen']

estimates = []

for i,measure in enumerate(unique_measures):
    
    measure_traces = traces[i]
    cutoff = thresholds[i]
    
    for j, trace in enumerate(measure_traces):
        
        x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
        
        c1, p1, m1 = trace.trace().T
        
        _child_only = ((x + c1) > cutoff).mean().round(2)
        _parent_only = ((x + p1) > cutoff).mean().round(2)
        _multi_component = ((x + m1) > cutoff).mean().round(2)
        if j:
            age_effect = alpha_age.trace()[:, j-1]
        else:
            age_effect = 0
        _control = ((x + baseline.trace()[:, i] + age_effect) > cutoff).mean().round(2)
        
        print('\n{0}, {1}'.format(measure, age_labels[j]))
        print('Child-only: {0}\nParent-only: {1}\nMulti-component: {2}\nControl/TAU: {3}'.format(_child_only, 
                                                                                                 _parent_only,
                                                                                                 _multi_component,
                                                                                                 _control))
        
        estimates.append((measure, age_labels[j], _child_only, _parent_only, _multi_component, _control))
        

Eyberg Child Behaviour Inventory, Intensity Subscale, school
Child-only: 0.6
Parent-only: 0.48
Multi-component: 0.39
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Intensity Subscale, pre-k
Child-only: 0.38
Parent-only: 0.24
Multi-component: 0.19
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Intensity Subscale, teen
Child-only: 0.56
Parent-only: 0.44
Multi-component: 0.36
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Problem Subscale, school
Child-only: 0.78
Parent-only: 0.75
Multi-component: 0.7
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, pre-k
Child-only: 0.64
Parent-only: 0.56
Multi-component: 0.44
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, teen
Child-only: 0.75
Parent-only: 0.69
Multi-component: 0.64
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), school
Child-only: 0.57
Parent-only: 0.42
Multi-component: 0.35
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), pre-k
Child-only: 0.35
Parent-only: 0.24
Multi-component: 0.19
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), teen
Child-only: 0.53
Parent-only: 0.39
Multi-component: 0.33
Control/TAU: 1.0

In [127]:

thresholds = [127, # ECBI, intensity
              11,  # ECBI, problem
              60]  # CBC t-score

age_labels = ['school', 'pre-k', 'teen']

estimates = []

for i,measure in enumerate(unique_measures):
    
    measure_traces = traces[i]
    cutoff = thresholds[i]
    
    for j, trace in enumerate(measure_traces):
        
        x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
        
        c1, p1, m1 = trace.trace().T
        
        _child_only = ((x + c1) > cutoff).mean().round(2)
        _parent_only = ((x + p1) > cutoff).mean().round(2)
        _multi_component = ((x + m1) > cutoff).mean().round(2)
        if j:
            age_effect = alpha_age.trace()[:, j-1]
        else:
            age_effect = 0
        _control = ((x + baseline.trace()[:, i] + age_effect) > cutoff).mean().round(2)
        
        print('\n{0}, {1}'.format(measure, age_labels[j]))
        print('Child-only: {0}\nParent-only: {1}\nMulti-component: {2}\nControl/TAU: {3}'.format(_child_only, 
                                                                                                 _parent_only,
                                                                                                 _multi_component,
                                                                                                 _control))
        
        estimates.append((measure, age_labels[j], _child_only, _parent_only, _multi_component, _control))
        

Eyberg Child Behaviour Inventory, Intensity Subscale, school
Child-only: 0.59
Parent-only: 0.48
Multi-component: 0.39
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Intensity Subscale, pre-k
Child-only: 0.39
Parent-only: 0.25
Multi-component: 0.2
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Intensity Subscale, teen
Child-only: 0.56
Parent-only: 0.45
Multi-component: 0.36
Control/TAU: 0.94

Eyberg Child Behaviour Inventory, Problem Subscale, school
Child-only: 0.78
Parent-only: 0.75
Multi-component: 0.7
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, pre-k
Child-only: 0.63
Parent-only: 0.56
Multi-component: 0.42
Control/TAU: 1.0

Eyberg Child Behaviour Inventory, Problem Subscale, teen
Child-only: 0.76
Parent-only: 0.7
Multi-component: 0.65
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), school
Child-only: 0.57
Parent-only: 0.42
Multi-component: 0.35
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), pre-k
Child-only: 0.35
Parent-only: 0.22
Multi-component: 0.19
Control/TAU: 1.0

Child Behavior Checklist, Externalizing (T Score), teen
Child-only: 0.53
Parent-only: 0.4
Multi-component: 0.33
Control/TAU: 1.0

In [128]:

estimates

Out[128]:

[('Eyberg Child Behaviour Inventory, Intensity Subscale',
  'school',
  0.58999999999999997,
  0.47999999999999998,
  0.39000000000000001,
  0.93999999999999995),
 ('Eyberg Child Behaviour Inventory, Intensity Subscale',
  'pre-k',
  0.39000000000000001,
  0.25,
  0.20000000000000001,
  0.93999999999999995),
 ('Eyberg Child Behaviour Inventory, Intensity Subscale',
  'teen',
  0.56000000000000005,
  0.45000000000000001,
  0.35999999999999999,
  0.93999999999999995),
 ('Eyberg Child Behaviour Inventory, Problem Subscale',
  'school',
  0.78000000000000003,
  0.75,
  0.69999999999999996,
  1.0),
 ('Eyberg Child Behaviour Inventory, Problem Subscale',
  'pre-k',
  0.63,
  0.56000000000000005,
  0.41999999999999998,
  1.0),
 ('Eyberg Child Behaviour Inventory, Problem Subscale',
  'teen',
  0.76000000000000001,
  0.69999999999999996,
  0.65000000000000002,
  1.0),
 ('Child Behavior Checklist, Externalizing (T Score)',
  'school',
  0.56999999999999995,
  0.41999999999999998,
  0.34999999999999998,
  1.0),
 ('Child Behavior Checklist, Externalizing (T Score)',
  'pre-k',
  0.34999999999999998,
  0.22,
  0.19,
  1.0),
 ('Child Behavior Checklist, Externalizing (T Score)',
  'teen',
  0.53000000000000003,
  0.40000000000000002,
  0.33000000000000002,
  1.0)]

In [129]:

np.savetxt('estimates.csv', np.array([e[2:] for e in estimates]), delimiter='\t', fmt='%1.2f')

In [130]:

pd.DataFrame(estimates, columns=['Instrument', 'Age group', 'Child-only', 
                                 'Parent-only', 'Multi-component', 'TAU/Control']).to_csv('outcome_proportions.csv')

Summary statistics for TAU/control

In [131]:

baseline.summary()

baseline:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.031            0.291            0.004            [-0.519  0.632]
	-0.02            0.094            0.003            [-0.211  0.165]
	0.026            0.08             0.002            [-0.137  0.193]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-0.545           -0.159          0.03           0.222         0.607
	-0.219           -0.077          -0.017         0.04          0.159
	-0.134           -0.02           0.024          0.07          0.199

Summary statistics for intensity subscale

In [132]:

prek_intensity_pred.summary()
school_intensity_pred.summary()
teen_intensity_pred.summary()

prek_intensity_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-35.29           8.205            0.34           [-50.713 -19.181]
	-40.285          4.587            0.171          [-49.584 -31.483]
	-43.643          5.454            0.247          [-53.763 -32.583]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-50.772          -40.985         -35.293        -29.796       -19.219
	-49.591          -43.33          -40.121        -37.254       -31.486
	-53.702          -47.416         -43.756        -40.263       -32.468
	

school_intensity_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-27.021          8.041            0.339          [-42.202 -11.295]
	-32.016          4.722            0.205          [-40.865 -22.625]
	-35.374          5.423            0.256          [-45.526 -24.697]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-42.418          -32.456         -26.945        -21.717       -11.465
	-41.426          -35.201         -31.867        -28.924       -23.111
	-45.369          -39.059         -35.509        -31.776       -24.496
	

teen_intensity_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-28.407          9.275            0.392          [-46.792 -10.405]
	-33.403          7.802            0.405          [-49.639 -19.162]
	-36.76           7.263            0.327          [-51.158 -22.201]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-47.492          -34.376         -28.167        -22.047       -10.896
	-49.563          -38.588         -33.079        -27.927       -19.065
	-51.943          -41.429         -36.474        -31.982       -22.863

Summary statistics for problem subscale

In [133]:

prek_problem_pred.summary()
school_problem_pred.summary()
teen_problem_pred.summary()

prek_problem_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-8.249           1.912            0.079          [-11.863  -4.519]
	-9.413           1.07             0.04           [-11.5    -7.282]
	-10.195          1.273            0.058          [-12.542  -7.608]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-11.855          -9.572          -8.249         -6.977        -4.51
	-11.57           -10.13          -9.377         -8.705        -7.338
	-12.545          -11.073         -10.224        -9.409        -7.61
	

school_problem_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-6.323           1.873            0.079            [-9.855 -2.683]
	-7.486           1.101            0.048            [-9.599 -5.353]
	-8.269           1.265            0.06           [-10.584  -5.689]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-9.899           -7.587          -6.307         -5.088        -2.709
	-9.682           -8.221          -7.449         -6.759        -5.414
	-10.623          -9.125          -8.305         -7.431        -5.714
	

teen_problem_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-6.646           2.16             0.091          [-10.971  -2.454]
	-7.809           1.817            0.094          [-11.621  -4.519]
	-8.592           1.692            0.076          [-12.091  -5.347]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-11.132          -8.032          -6.586         -5.164        -2.558
	-11.59           -9.019          -7.737         -6.529        -4.463
	-12.108          -9.682          -8.522         -7.475        -5.349

Summary statistics for t scores

In [134]:

prek_tscore_pred.summary()
school_tscore_pred.summary()
teen_tscore_pred.summary()

prek_tscore_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-10.999          2.56             0.106          [-15.917  -6.081]
	-12.558          1.431            0.053          [-15.472  -9.819]
	-13.606          1.7              0.077          [-16.771 -10.167]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-15.847          -12.78          -11.0          -9.295        -5.992
	-15.466          -13.508         -12.509        -11.615       -9.809
	-16.747          -14.781         -13.637        -12.55        -10.128
	

school_tscore_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-8.418           2.509            0.106          [-13.219  -3.612]
	-9.977           1.474            0.064          [-12.739  -7.048]
	-11.025          1.69             0.08           [-14.198  -7.716]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-13.208          -10.12          -8.398         -6.763        -3.591
	-12.919          -10.964         -9.933         -9.009        -7.185
	-14.135          -12.175         -11.067        -9.904        -7.647
	

teen_tscore_pred:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-8.851           2.894            0.122          [-14.691  -3.316]
	-10.41           2.435            0.127          [-15.431  -5.927]
	-11.458          2.265            0.102          [-15.877  -6.845]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-14.816          -10.707         -8.771         -6.863        -3.387
	-15.465          -12.031         -10.307        -8.704        -5.949
	-16.197          -12.911         -11.365        -9.965        -7.145

Summary statistics for differences.¶

Each summary corresponds to teh differences between baseline and age x component categories for each outcome. The order is:

child-only, pre-k
child-only, school
child-only, teen
parent-only, pre-k
parent-only, school
parent-only, teen
multi, pre-k
multi, school
multi, teen

In [135]:

intensity_diff.summary()
problem_diff.summary()
tscore_diff.summary()

intensity_diff:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-35.321          8.202            0.34           [-51.054 -19.576]
	-40.316          4.577            0.17           [-49.029 -30.961]
	-43.674          5.444            0.247          [-54.092 -33.006]
	-27.052          8.039            0.34           [-42.292 -11.508]
	-32.048          4.713            0.205          [-40.843 -22.784]
	-35.405          5.413            0.257          [-45.328 -24.656]
	-28.438          9.274            0.392          [-46.906 -10.498]
	-33.434          7.797            0.405          [-49.647 -19.242]
	-36.791          7.256            0.327          [-50.733 -21.894]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-50.762          -41.004         -35.33         -29.856       -19.205
	-49.612          -43.372         -40.151        -37.292       -31.516
	-53.751          -47.449         -43.761        -40.3         -32.577
	-42.412          -32.493         -27.028        -21.79        -11.538
	-41.393          -35.195         -31.895        -28.957       -23.15
	-45.312          -39.073         -35.529        -31.827       -24.573
	-47.61           -34.378         -28.168        -22.033       -10.89
	-49.645          -38.6           -33.105        -27.968       -19.234
	-51.945          -41.446         -36.494        -32.013       -22.883
	

problem_diff:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-8.229           1.911            0.079          [-11.895  -4.561]
	-9.393           1.066            0.04           [-11.423  -7.213]
	-10.175          1.268            0.058          [-12.602  -7.69 ]
	-6.303           1.873            0.079            [-9.853 -2.681]
	-7.466           1.098            0.048            [-9.516 -5.308]
	-8.249           1.261            0.06           [-10.561  -5.744]
	-6.626           2.161            0.091          [-10.928  -2.446]
	-7.789           1.817            0.094          [-11.567  -4.483]
	-8.572           1.691            0.076          [-11.82   -5.101]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-11.827          -9.553          -8.231         -6.956        -4.474
	-11.559          -10.105         -9.354         -8.688        -7.343
	-12.523          -11.055         -10.195        -9.389        -7.59
	-9.881           -7.57           -6.297         -5.077        -2.688
	-9.644           -8.2            -7.431         -6.746        -5.393
	-10.557          -9.103          -8.277         -7.415        -5.725
	-11.092          -8.009          -6.563         -5.133        -2.537
	-11.566          -8.993          -7.713         -6.516        -4.481
	-12.102          -9.656          -8.502         -7.459        -5.331
	

tscore_diff:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	-11.025          2.56             0.106          [-15.936  -6.111]
	-12.584          1.429            0.053          [-15.304  -9.664]
	-13.632          1.699            0.077          [-16.885 -10.303]
	-8.444           2.509            0.106          [-13.201  -3.592]
	-10.003          1.471            0.064          [-12.749  -7.112]
	-11.051          1.69             0.08           [-14.149  -7.696]
	-8.877           2.895            0.122          [-14.641  -3.277]
	-10.436          2.434            0.126          [-15.497  -6.006]
	-11.484          2.265            0.102          [-15.836  -6.834]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	-15.845          -12.799         -11.028        -9.319        -5.995
	-15.486          -13.538         -12.533        -11.64        -9.838
	-16.778          -14.811         -13.66         -12.579       -10.169
	-13.239          -10.142         -8.437         -6.802        -3.602
	-12.92           -10.986         -9.956         -9.039        -7.226
	-14.144          -12.196         -11.09         -9.935        -7.67
	-14.861          -10.731         -8.792         -6.878        -3.399
	-15.496          -12.049         -10.334        -8.73         -6.004
	-16.214          -12.937         -11.391        -9.993        -7.143

	child_component	parent_component	family_component
125	1	0	0
126	1	0	0
128	1	0	0
129	1	0	0
162	1	0	0
188	1	0	0
194	1	0	0
195	1	0	0
234	1	0	0
236	1	0	0
238	1	0	0

	child_component	parent_component	family_component
125	1	0	0
126	1	0	0
128	1	0	0
129	1	0	0
162	1	0	0
188	1	0	0
194	1	0	0
195	1	0	0
234	1	0	0
236	1	0	0
238	1	0	0

	child_component	parent_component	family_component
125	1	0	0
126	1	0	0
128	1	0	0
129	1	0	0
162	1	0	0
188	1	0	0
194	1	0	0
195	1	0	0
234	1	0	0
236	1	0	0
238	1	0	0