%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sb
import numpy as np
import pandas as pd
import pymc as pm
from scipy.misc import comb
np.random.seed(42)
study_char = pd.read_excel("DBD Data for Meta Analyses.xlsx", "Study Characteristics",
index_col='RefID', na_values=['-', 'NR'])
outcomes = pd.read_excel("DBD Data for Meta Analyses.xlsx", "Outcomes",
na_values=['ND', 'NR'])
#demographics = pd.read_excel("DBD Data for Meta Analyses.xlsx", "Pt Demographics", na_values=['-', 'NR'])
study_char.tail()
Pubmed ID | Rel Incl1 | Rel Incl2 | Rel Incl3 | Rel Excl | Family | NCT | Title | Year | Author | ... | Parent Component | Screened (N) | Randomized (N) | Analyzed (N) | Proportion Male (%) | Direction of Effect | DOE_Comments | Other comments | Drugs(s) | - | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
RefID | |||||||||||||||||||||
8292 | na | NaN | NaN | NaN | NaN | NaN | NaN | Reciprocal skills training in the treatment of... | 2000 | Barrett P, C. Turner, S. Rombouts and A. Duffy | ... | Both | NaN | 57 | 57 | 79 | NaN | NaN | RST (hospital), RST (clinic), WLC | NaN | NaN |
8320 | na | 1867 | NaN | NaN | NaN | Z-C | NaN | Differential responses of children with varyin... | 2008 | Wolff JC, R. W. Greene and T. H. Ollendick | ... | Both | NaN | 50 | 47 | 68 | NaN | NaN | NaN | NaN | NaN |
8365 | na | NaN | NaN | NaN | NaN | NaN | NaN | Treatment of depressed mothers with disruptive... | 2000 | Sanders MR and M. McFarland | ... | Both | 167 | 47 | 47 | 74 | NaN | NaN | NaN | NaN | NaN |
8385 | na | NaN | NaN | NaN | NaN | NaN | NaN | Reducing Adolescent Oppositional and Conduct D... | 2011 | Sells SP, K. W. Early and T. E. Smith | ... | Both | NaN | 38 | NaN | 57 | NaN | NaN | NaN | NaN | NaN |
8401 | 22820873 | NaN | NaN | NaN | NaN | NaN | NaN | "Tuning into Kids": reducing young children's ... | 2013 | Havighurst SS, K. R. Wilson, A. E. Harley, C. ... | ... | Both | 92 | 63 | 54 | 78 | NaN | NaN | NaN | NaN | NaN |
5 rows × 45 columns
Data cleaning
# Cast outcomes variables to floats
for col in ('Last FU Mean', 'Last FU SD',):
outcomes[col] = outcomes[col].astype(float)
# Recode age category
study_char['age_cat'] = study_char.AgeCat.replace({'PRE-K':1, 'SCHOOL':0, 'TEEN':2})
# Fix data label typo
outcomes['Measure Instrument'] = outcomes['Measure Instrument'].replace({'Eyberg Child Behaviour Inventory, problem Subscale':
'Eyberg Child Behaviour Inventory, Problem Subscale'})
outcomes.Units = outcomes.Units.replace({'scale': 'Scale'})
# Parse followup times and convert to months
split_fut = outcomes.loc[outcomes['Last FU Time'].notnull(), 'Last FU Time'].apply(lambda x: str(x).split(' ')[:2])
fut_months = [float(time)/52.*(unit=='weeks') or float(time) for time, unit in split_fut]
outcomes.loc[outcomes['Last FU Time'].notnull(), 'Last FU Time'] = fut_months
We are assumung all CBC Externalizing values over 50 are T-scores, and those under 50 are raw scores. This recodes those observations.
cbce_ind = outcomes['Measure Instrument'].apply(lambda x: x.startswith('Child Behavior Checklist, Externalizing'))
under_50 = outcomes['BL Mean']<50
outcomes.loc[cbce_ind & (under_50^True), 'Measure Instrument'] = 'Child Behavior Checklist, Externalizing (T Score)'
outcomes.loc[cbce_ind & under_50, 'Measure Instrument'] = 'Child Behavior Checklist, Externalizing'
Recode measure instrument variables
instrument = []
subtype = []
units = []
for i,row in outcomes.iterrows():
separator = row['Measure Instrument'].find(',')
if separator == -1:
separator = row['Measure Instrument'].find('-')
instrument.append(row['Measure Instrument'][:separator])
s = row['Measure Instrument'][separator+2:]
paren = s.find('(')
if paren > -1:
subtype.append(s[:paren-1])
units.append(s[paren+1:-1])
else:
subtype.append(s)
if s.endswith('scale'):
units.append('Scale')
else:
units.append('Score')
new_cols = pd.DataFrame({'instrument': instrument, 'subtype': subtype,
'units': units}, index=outcomes.index)
outcomes['Measure Instrument'].value_counts()
Eyberg Child Behaviour Inventory, Intensity Subscale 79 Eyberg Child Behaviour Inventory, Problem Subscale 57 Child Behavior Checklist, Externalizing (T Score) 44 Child Behavior Checklist, Externalizing 11 Strengths and Difficulties Questionnaire, Total Score 10 Strengths and Difficulties Questionnaire- Conduct Problems Scale 10 Eyberg Child Behaviour Inventory, Intensity Subscale (T Score) 10 Child Behavior Checklist, Conduct Problems (T Score) 6 Child Behavior Checklist, Aggression 4 Eyberg Child Behaviour Inventory, Problem Subscale (T Score) 4 Strengths and Difficulties Questionnaire- Emotional Symptoms Scale 4 DSM IV 2 Child Behavior Checklist, Rulebreaking 2 Strengths and Difficulties Questionnaire- Impact Score 2 Child Behavior Checklist, Conduct Problems 2 Strengths and Difficulties Questionnaire- Hyperactivity Scale 2 Child Behavior Checklist, Delinquent Subscale 2 Strengths and Difficulties Questionnaire, Percent Below Clinical Cutoff 2 dtype: int64
new_cols.head()
instrument | subtype | units | |
---|---|---|---|
0 | Eyberg Child Behaviour Inventory | Intensity Subscale | T Score |
1 | Eyberg Child Behaviour Inventory | Intensity Subscale | T Score |
2 | Eyberg Child Behaviour Inventory | Problem Subscale | T Score |
3 | Eyberg Child Behaviour Inventory | Problem Subscale | T Score |
4 | Child Behavior Checklist | Externalizing | Score |
# Append new columns
outcomes = outcomes.join(new_cols)
outcomes.intvn.value_counts()
wlc 47 tau 39 iypt 32 pcit 16 iyptndiyct 8 WLC 7 pppsd 7 PCIT 5 TAU 5 mst 5 modularndn 4 ppcp 4 iyct 4 it 4 pppo 4 pppe 3 spokes 3 pmto 3 pmtndp 3 snap 3 pcitc 3 CT 3 PLL 3 FBT 3 pmtsd 2 pmtnds 2 RST 2 hncte 2 pmtpa 2 pcitabb 2 TIK 2 projndsupport 2 hncstd 2 setpc 2 scip 1 iyptadv 1 cbt 1 pppstd 1 hnc 1 mcfi 1 pstnds 1 modularndcomm 1 sst 1 modularndclin 1 itpt 1 hitkashrut 1 coaching 1 kitkashrut 1 cpp 1 dtype: int64
Cross-tabulation of the outcome counts by measure instrument
pd.crosstab(outcomes['instrument'], outcomes['Outcome'])
Outcome | 01 Behavior, disruptive | 02 Behavior, aggression | 06 Behavior, fighting, destruction, violation | 08 Behavior, other |
---|---|---|---|---|
instrument | ||||
Child Behavior Checklist | 59 | 4 | 4 | 4 |
DSM I | 2 | 0 | 0 | 0 |
Eyberg Child Behaviour Inventory | 148 | 0 | 0 | 2 |
Strengths and Difficulties Questionnaire | 10 | 0 | 0 | 20 |
Distribution of age categories
study_char.AgeCat.value_counts()
SCHOOL 44 PRE-K 31 TEEN 18 dtype: int64
Frequencies of various intervention types
study_char['Intervention Type'].value_counts()
PHARM 20 IY-PT 10 MST 9 PCIT 7 IY-PT + IY-CT 4 BSFT 3 Triple P (enhanced) 3 PMTO 3 Triple-P (self-directed) 2 PCIT-ABB 2 CPS 2 OTH: Intensive treatment 2 UCPP 2 OTH: Modular treatment 2 IY-PT (nurse led) 2 PT 2 CBT 2 OTH: Family therapy 1 IY-CT + IY-PT 1 PMT (perceptive) 1 Triple-P (enhanced) 1 IY-PT + ADVANCE 1 PMT (practitioner assisted) 1 TIK 1 OTH: Modular treatment (community) 1 OTH: Project support 1 PLL 1 CBFI 1 HNC 1 OTH: Instrumental, emotional support & child management skills 1 IY 1 PONI 1 SNAP Under 12 OutReach Project (enhanced) 1 Coping Power (cultural adaptation) 1 Triple-P (online) 1 PHARM1 + PHARM2 1 OTH: MTP 1 IY-PT (brief) 1 SET-PC 1 OTH: Booster 1 SCIP (Social cognitive (Dodge's)) 1 SNAP Under 12 \nOutReach Project(ORP) 1 Parenting Group (SPOKES) 1 OTH: Child only treatment 1 MF-PEP + TAU 1 IY-PT + IY-CT + IY-TT 1 HNC (technology enhanced) 1 Family Therapy 1 OTH: FFT 1 SNAP Under 12 OutReach Project 1 PCIT (modified) 1 OTH: Modular (nurse administered) 1 OTH: Parents Plus Children's Program 1 OTH: Community Parent Education Program 1 RST 1 Length: 55, dtype: int64
KQ1 = study_char[study_char.KQ=='KQ1']
study_varnames = ['Year', 'age_cat', 'Geographic setting', 'Age mean (years) ', 'Age SD (years)',
'Age min (years)', 'Age max (years)', 'Proportion Male (%)']
study_vars = KQ1[study_varnames].rename(columns={'Geographic setting': 'country',
'Age mean (years) ': 'age_mean',
'Age SD (years)': 'age_sd',
'Age min (years)': 'age_min',
'Age max (years)': 'age_max',
'Proportion Male (%)': 'p_male'})
study_vars.head()
Year | age_cat | country | age_mean | age_sd | age_min | age_max | p_male | |
---|---|---|---|---|---|---|---|---|
RefID | ||||||||
23 | 2013 | 1 | USA | 2.80 | 0.61 | 2 | 4 | 62 |
100 | 2013 | 2 | USA | 14.60 | 1.30 | 11 | 18 | 83 |
103 | 2013 | 1 | USA | 5.67 | 1.72 | 3 | 8 | 53 |
141 | 2013 | 0 | USA | 9.90 | 1.30 | 8 | 11 | 73 |
156 | 2013 | 2 | Netherlands | 16.00 | 1.31 | 12 | 18 | 73 |
study_vars.p_male.hist()
<matplotlib.axes._subplots.AxesSubplot at 0x1166bbc50>
Proportion missing
study_vars.isnull().mean(0).round(2)
Year 0.00 age_cat 0.00 country 0.00 age_mean 0.10 age_sd 0.19 age_min 0.06 age_max 0.07 p_male 0.02 dtype: float64
Will assume the mean age for those which are missing is simply the midpoint between minimum and maximum values
est_means = study_vars.apply(lambda x: x.age_min + (x.age_max - x.age_min) / 2, axis=1)[study_vars.age_mean.isnull()]
study_vars.loc[study_vars.age_mean.isnull(), 'age_mean'] = est_means
study_vars.age_mean.isnull().sum()
2
outcomes_varnames = ['Ref ID', 'Measure Instrument', 'instrument', 'subtype', 'units',
'intvn', 'cc', 'pc', 'fc',
'BL N', 'BL Mean', 'BL SD',
'EOT \nN', 'EOT Mean', 'EOT \nSD', 'Last FU Time', 'Last FU N',
'Last FU Mean', 'Last FU SD', 'CS Group N', 'CS Mean', 'CS SD']
outcomes_vars = outcomes[outcomes_varnames].rename(columns={'Ref ID': 'RefID',
'Measure Instrument': 'measure_instrument',
'cc': 'child_component',
'pc': 'parent_component',
'fc': 'family_component',
'oc': 'other_component',
'BL N': 'baseline_n',
'BL Mean': 'baseline_mean',
'BL SD': 'baseline_sd',
'EOT \nN': 'end_treat_n',
'EOT Mean': 'end_treat_mean',
'EOT \nSD': 'end_treat_sd',
'Last FU Time': 'followup_time',
'Last FU N': 'followup_n',
'Last FU Mean': 'followup_mean',
'Last FU SD': 'followup_sd',
'CS Group N': 'change_n',
'CS Mean': 'change_mean',
'CS SD': 'change_sd'})
Recode intervention clasification
outcomes_vars.isnull().sum()
RefID 0 measure_instrument 0 instrument 0 subtype 0 units 0 intvn 0 child_component 0 parent_component 0 family_component 0 baseline_n 4 baseline_mean 17 baseline_sd 25 end_treat_n 74 end_treat_mean 67 end_treat_sd 74 followup_time 32 followup_n 103 followup_mean 118 followup_sd 126 change_n 241 change_mean 248 change_sd 253 dtype: int64
control = ((outcomes_vars.child_component^True) &
(outcomes_vars.parent_component^True) &
(outcomes_vars.family_component^True)).astype(int)
child_only = ((outcomes_vars.child_component) &
(outcomes_vars.parent_component^True) &
(outcomes_vars.family_component^True)).astype(int)
parent_only = ((outcomes_vars.child_component^True) &
(outcomes_vars.parent_component) &
(outcomes_vars.family_component^True)).astype(int)
outcomes_vars.ix[child_only.astype(bool), ['child_component', 'parent_component', 'family_component']]
child_component | parent_component | family_component | |
---|---|---|---|
125 | 1 | 0 | 0 |
126 | 1 | 0 | 0 |
128 | 1 | 0 | 0 |
129 | 1 | 0 | 0 |
162 | 1 | 0 | 0 |
188 | 1 | 0 | 0 |
194 | 1 | 0 | 0 |
195 | 1 | 0 | 0 |
234 | 1 | 0 | 0 |
236 | 1 | 0 | 0 |
238 | 1 | 0 | 0 |
multi_component = ((parent_only^True) & (child_only^True) & (control^True)).astype(int)
outcomes_vars['child_only'] = child_only
outcomes_vars['parent_only'] = parent_only
outcomes_vars['multi_component'] = multi_component
If requested, change PCIT to parent-only.
pcit_as_multicomponent = True
if not pcit_as_multicomponent:
outcomes_vars.loc[outcomes_vars.intvn=='pcit', 'parenr_only'] = 1
outcomes_vars.loc[outcomes_vars.intvn=='pcit', 'multi_component'] = 0
RefID | measure_instrument | instrument | subtype | units | intvn | child_component | parent_component | family_component | baseline_n | ... | followup_time | followup_n | followup_mean | followup_sd | change_n | change_mean | change_sd | child_only | parent_only | multi_component | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
40 | 870 | Child Behavior Checklist, Externalizing (T Score) | Child Behavior Checklist | Externalizing | T Score | pcit | 1 | 1 | 1 | 11 | ... | 4 | 11 | 47.90 | 6.10 | NaN | NaN | NaN | 0 | 0 | 1 |
42 | 870 | Eyberg Child Behaviour Inventory, Intensity Su... | Eyberg Child Behaviour Inventory | Intensity Subscale | T Score | pcit | 1 | 1 | 1 | 11 | ... | 4 | 11 | 43.00 | 4.30 | NaN | NaN | NaN | 0 | 0 | 1 |
44 | 870 | Eyberg Child Behaviour Inventory, Problem Subs... | Eyberg Child Behaviour Inventory | Problem Subscale | T Score | pcit | 1 | 1 | 1 | 11 | ... | 4 | 11 | 45.60 | 5.50 | NaN | NaN | NaN | 0 | 0 | 1 |
46 | 899 | Child Behavior Checklist, Externalizing (T Score) | Child Behavior Checklist | Externalizing | T Score | pcit | 1 | 1 | 1 | 19 | ... | 24 | 15 | 53.33 | 13.47 | NaN | NaN | NaN | 0 | 0 | 1 |
49 | 899 | Eyberg Child Behaviour Inventory, Intensity Su... | Eyberg Child Behaviour Inventory | Intensity Subscale | Scale | pcit | 1 | 1 | 1 | 19 | ... | 24 | 15 | 100.93 | 45.33 | NaN | NaN | NaN | 0 | 0 | 1 |
52 | 899 | Eyberg Child Behaviour Inventory, Problem Subs... | Eyberg Child Behaviour Inventory | Problem Subscale | Scale | pcit | 1 | 1 | 1 | 19 | ... | 24 | 15 | 14.47 | 19.10 | NaN | NaN | NaN | 0 | 0 | 1 |
131 | 2092 | Child Behavior Checklist, Externalizing | Child Behavior Checklist | Externalizing | Score | pcit | 1 | 1 | 1 | 17 | ... | 6 | 17 | 15.24 | 7.77 | NaN | NaN | NaN | 0 | 0 | 1 |
134 | 2092 | Eyberg Child Behaviour Inventory, Intensity Su... | Eyberg Child Behaviour Inventory | Intensity Subscale | Scale | pcit | 1 | 1 | 1 | 17 | ... | 6 | 17 | 117.47 | 31.69 | NaN | NaN | NaN | 0 | 0 | 1 |
170 | 3687 | Eyberg Child Behaviour Inventory, Intensity Su... | Eyberg Child Behaviour Inventory | Intensity Subscale | Scale | pcit | 1 | 1 | 1 | 37 | ... | 12 | 19 | 126.30 | 42.10 | NaN | NaN | NaN | 0 | 0 | 1 |
171 | 3687 | Eyberg Child Behaviour Inventory, Intensity Su... | Eyberg Child Behaviour Inventory | Intensity Subscale | Scale | pcit | 1 | 1 | 1 | 37 | ... | 12 | 19 | NaN | NaN | NaN | NaN | NaN | 0 | 0 | 1 |
172 | 3687 | Eyberg Child Behaviour Inventory, Intensity Su... | Eyberg Child Behaviour Inventory | Intensity Subscale | Scale | pcit | 1 | 1 | 1 | 37 | ... | 12 | 19 | NaN | NaN | NaN | NaN | NaN | 0 | 0 | 1 |
176 | 3687 | Eyberg Child Behaviour Inventory, Problem Subs... | Eyberg Child Behaviour Inventory | Problem Subscale | Scale | pcit | 1 | 1 | 1 | 37 | ... | 12 | 19 | 11.50 | 9.70 | NaN | NaN | NaN | 0 | 0 | 1 |
177 | 3687 | Eyberg Child Behaviour Inventory, Problem Subs... | Eyberg Child Behaviour Inventory | Problem Subscale | Scale | pcit | 1 | 1 | 1 | 37 | ... | 12 | 19 | NaN | NaN | NaN | NaN | NaN | 0 | 0 | 1 |
178 | 3687 | Eyberg Child Behaviour Inventory, Problem Subs... | Eyberg Child Behaviour Inventory | Problem Subscale | Scale | pcit | 1 | 1 | 1 | 37 | ... | 12 | 19 | NaN | NaN | NaN | NaN | NaN | 0 | 0 | 1 |
201 | 3915 | Eyberg Child Behaviour Inventory, Intensity Su... | Eyberg Child Behaviour Inventory | Intensity Subscale | Scale | pcit | 1 | 1 | 1 | 19 | ... | 12 | NaN | NaN | NaN | NaN | NaN | NaN | 0 | 0 | 1 |
203 | 3915 | Eyberg Child Behaviour Inventory, Problem Subs... | Eyberg Child Behaviour Inventory | Problem Subscale | Scale | pcit | 1 | 1 | 1 | 19 | ... | 12 | NaN | NaN | NaN | NaN | NaN | NaN | 0 | 0 | 1 |
16 rows × 25 columns
Obtain subset with non-missing EOT data
eot_subset = outcomes_vars[outcomes_vars.end_treat_mean.notnull() & outcomes_vars.end_treat_sd.notnull()].copy()
Calculate EOT difference
eot_subset['eot_diff_mean'] = eot_subset.end_treat_mean - eot_subset.baseline_mean
eot_subset['eot_diff_sd'] = eot_subset.baseline_sd + eot_subset.end_treat_sd
eot_subset['eot_diff_n'] = eot_subset[['baseline_n', 'end_treat_n']].min(1)
Distribution of baseline means among outcome metrics
for instrument in ('Eyberg Child Behaviour Inventory',
'Child Behavior Checklist',
'Strengths and Difficulties Questionnaire'):
eot_subset[eot_subset.instrument==instrument]['baseline_mean'].hist(by=eot_subset['subtype'],
sharex=True)
plt.suptitle(instrument);
eot_subset.instrument.value_counts()
Eyberg Child Behaviour Inventory 116 Child Behavior Checklist 40 Strengths and Difficulties Questionnaire 22 dtype: int64
eot_subset[eot_subset.RefID==441]
RefID | measure_instrument | instrument | subtype | units | intvn | child_component | parent_component | family_component | baseline_n | ... | followup_sd | change_n | change_mean | change_sd | child_only | parent_only | multi_component | eot_diff_mean | eot_diff_sd | eot_diff_n | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
14 | 441 | Eyberg Child Behaviour Inventory, Intensity Su... | Eyberg Child Behaviour Inventory | Intensity Subscale | Scale | iypt | 0 | 1 | 0 | 32 | ... | NaN | NaN | NaN | NaN | 0 | 1 | 0 | -31.40 | 46.80 | 32 |
15 | 441 | Eyberg Child Behaviour Inventory, Intensity Su... | Eyberg Child Behaviour Inventory | Intensity Subscale | Scale | wlc | 0 | 0 | 0 | 20 | ... | NaN | NaN | NaN | NaN | 0 | 0 | 0 | -5.80 | 49.60 | 20 |
16 | 441 | Eyberg Child Behaviour Inventory, Problem Subs... | Eyberg Child Behaviour Inventory | Problem Subscale | Scale | iypt | 0 | 1 | 0 | 24 | ... | NaN | NaN | NaN | NaN | 0 | 1 | 0 | -9.70 | 12.02 | 24 |
17 | 441 | Eyberg Child Behaviour Inventory, Problem Subs... | Eyberg Child Behaviour Inventory | Problem Subscale | Scale | wlc | 0 | 0 | 0 | 17 | ... | NaN | NaN | NaN | NaN | 0 | 0 | 0 | -2.88 | 14.59 | 17 |
4 rows × 28 columns
Several studies use multiple instruments and metrics within instruments
eot_subset.groupby(['RefID', 'instrument'])['subtype'].value_counts()
RefID instrument 103 Eyberg Child Behaviour Inventory Problem Subscale 2 Intensity Subscale 2 371 Eyberg Child Behaviour Inventory Problem Subscale 2 Intensity Subscale 2 441 Eyberg Child Behaviour Inventory Problem Subscale 2 Intensity Subscale 2 475 Eyberg Child Behaviour Inventory Intensity Subscale 2 539 Child Behavior Checklist Externalizing 2 Aggression 2 564 Eyberg Child Behaviour Inventory Problem Subscale 2 Intensity Subscale 2 836 Strengths and Difficulties Questionnaire Total Score 2 899 Child Behavior Checklist Externalizing 3 Eyberg Child Behaviour Inventory Problem Subscale 3 Intensity Subscale 3 ... 3960 Eyberg Child Behaviour Inventory Problem Subscale 2 7109 Eyberg Child Behaviour Inventory Problem Subscale 2 Intensity Subscale 2 Strengths and Difficulties Questionnaire Conduct Problems Scale 2 Emotional Symptoms Scale 2 7723 Child Behavior Checklist Externalizing 2 8192 Eyberg Child Behaviour Inventory Intensity Subscale 2 8231 Eyberg Child Behaviour Inventory Problem Subscale 4 Intensity Subscale 4 8289 Child Behavior Checklist Delinquent Subscale 2 Eyberg Child Behaviour Inventory Problem Subscale 2 Intensity Subscale 2 8292 Child Behavior Checklist Externalizing 2 8401 Eyberg Child Behaviour Inventory Problem Subscale 2 Intensity Subscale 2 Length: 68, dtype: int64
pd.crosstab(eot_subset.instrument, eot_subset.subtype)
subtype | Aggression | Conduct Problems | Conduct Problems Scale | Delinquent Subscale | Emotional Symptoms Scale | Externalizing | Hyperactivity Scale | Impact Score | Intensity Subscale | Problem Subscale | Total Score |
---|---|---|---|---|---|---|---|---|---|---|---|
instrument | |||||||||||
Child Behavior Checklist | 2 | 6 | 0 | 2 | 0 | 30 | 0 | 0 | 0 | 0 | 0 |
Eyberg Child Behaviour Inventory | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 68 | 48 | 0 |
Strengths and Difficulties Questionnaire | 0 | 0 | 6 | 0 | 4 | 0 | 2 | 2 | 0 | 0 | 8 |
x = eot_subset[eot_subset.instrument=='Eyberg Child Behaviour Inventory']
pd.crosstab(x.instrument, x.subtype)
subtype | Intensity Subscale | Problem Subscale |
---|---|---|
instrument | ||
Eyberg Child Behaviour Inventory | 68 | 48 |
x = eot_subset[eot_subset.instrument=='Child Behavior Checklist']
pd.crosstab(x.instrument, x.subtype)
subtype | Aggression | Conduct Problems | Delinquent Subscale | Externalizing |
---|---|---|---|---|
instrument | ||||
Child Behavior Checklist | 2 | 6 | 2 | 30 |
x = eot_subset[eot_subset.instrument=='Strengths and Difficulties Questionnaire']
pd.crosstab(x.instrument, x.subtype)
subtype | Conduct Problems Scale | Emotional Symptoms Scale | Hyperactivity Scale | Impact Score | Total Score |
---|---|---|---|---|---|
instrument | |||||
Strengths and Difficulties Questionnaire | 6 | 4 | 2 | 2 | 8 |
Merge study variables and outcomes
merged_vars = study_vars.merge(eot_subset, left_index=True, right_on='RefID')
merged_vars.shape
(176, 36)
For now, restrict to the three most prevalent metrics.
merged_vars.measure_instrument.value_counts()
Eyberg Child Behaviour Inventory, Intensity Subscale 64 Eyberg Child Behaviour Inventory, Problem Subscale 48 Child Behavior Checklist, Externalizing (T Score) 21 Strengths and Difficulties Questionnaire, Total Score 8 Child Behavior Checklist, Externalizing 7 Child Behavior Checklist, Conduct Problems (T Score) 6 Strengths and Difficulties Questionnaire- Conduct Problems Scale 6 Eyberg Child Behaviour Inventory, Intensity Subscale (T Score) 4 Strengths and Difficulties Questionnaire- Emotional Symptoms Scale 4 Strengths and Difficulties Questionnaire- Hyperactivity Scale 2 Strengths and Difficulties Questionnaire- Impact Score 2 Child Behavior Checklist, Delinquent Subscale 2 Child Behavior Checklist, Aggression 2 dtype: int64
Take only the top 3 measures
top_measures = 3
merged_vars.RefID.unique()
array([ 103, 371, 441, 475, 539, 564, 836, 899, 993, 1236, 1245, 1511, 1585, 1875, 1951, 2092, 2117, 2219, 2239, 2347, 3211, 3225, 3399, 3495, 3687, 3716, 3766, 3915, 3960, 7109, 7723, 8192, 8231, 8289, 8292, 8401])
analysis_subset = merged_vars[merged_vars.measure_instrument.isin(merged_vars.measure_instrument.value_counts().index[:top_measures])]
analysis_subset.groupby('measure_instrument')['baseline_mean'].max()
measure_instrument Child Behavior Checklist, Externalizing (T Score) 77.10 Eyberg Child Behaviour Inventory, Intensity Subscale 186.44 Eyberg Child Behaviour Inventory, Problem Subscale 28.62 Name: baseline_mean, dtype: float64
analysis_subset.groupby('measure_instrument').baseline_mean.hist()
measure_instrument Child Behavior Checklist, Externalizing (T Score) Axes(0.125,0.125;0.775x0.775) Eyberg Child Behaviour Inventory, Intensity Subscale Axes(0.125,0.125;0.775x0.775) Eyberg Child Behaviour Inventory, Problem Subscale Axes(0.125,0.125;0.775x0.775) Name: baseline_mean, dtype: object
analysis_subset['baseline_mean'].hist(by=analysis_subset['measure_instrument'],sharex=True)
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x116fe2550>, <matplotlib.axes._subplots.AxesSubplot object at 0x117105e80>], [<matplotlib.axes._subplots.AxesSubplot object at 0x1171536a0>, <matplotlib.axes._subplots.AxesSubplot object at 0x1171892e8>]], dtype=object)
analysis_subset['eot_diff_mean'].hist(by=analysis_subset['measure_instrument'],sharex=True)
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x116a82860>, <matplotlib.axes._subplots.AxesSubplot object at 0x116e75940>], [<matplotlib.axes._subplots.AxesSubplot object at 0x1165d53c8>, <matplotlib.axes._subplots.AxesSubplot object at 0x1169e3588>]], dtype=object)
for x in analysis_subset.measure_instrument.unique():
plt.figure()
analysis_subset[analysis_subset.measure_instrument==x].baseline_mean.plot(kind='kde', alpha=0.4, grid=False)
analysis_subset[analysis_subset.measure_instrument==x].end_treat_mean.plot(kind='kde', alpha=0.4, grid=False)
plt.gca().set_xlabel(x)
Number of studies in analysis subset
unique_studies = analysis_subset.RefID.unique().tolist()
len(unique_studies)
30
analysis_subset.RefID.unique()
array([ 103, 371, 441, 539, 564, 899, 1236, 1245, 1511, 1585, 1875, 2092, 2117, 2219, 3211, 3225, 3399, 3495, 3687, 3716, 3766, 3915, 3960, 7109, 7723, 8192, 8231, 8289, 8292, 8401])
analysis_subset.groupby(['RefID', 'measure_instrument'])['measure_instrument'].count().unstack().fillna(0)
measure_instrument | Child Behavior Checklist, Externalizing (T Score) | Eyberg Child Behaviour Inventory, Intensity Subscale | Eyberg Child Behaviour Inventory, Problem Subscale |
---|---|---|---|
RefID | |||
103 | 0 | 2 | 2 |
371 | 0 | 2 | 2 |
441 | 0 | 2 | 2 |
539 | 2 | 0 | 0 |
564 | 0 | 2 | 2 |
899 | 3 | 3 | 3 |
1236 | 0 | 6 | 6 |
1245 | 2 | 0 | 0 |
1511 | 3 | 0 | 0 |
1585 | 0 | 2 | 2 |
1875 | 3 | 0 | 0 |
2092 | 0 | 3 | 0 |
2117 | 2 | 0 | 0 |
2219 | 0 | 3 | 3 |
3211 | 0 | 2 | 2 |
3225 | 0 | 2 | 2 |
3399 | 2 | 0 | 0 |
3495 | 0 | 4 | 0 |
3687 | 0 | 6 | 6 |
3716 | 0 | 2 | 2 |
3766 | 0 | 9 | 0 |
3915 | 0 | 2 | 2 |
3960 | 0 | 0 | 2 |
7109 | 0 | 2 | 2 |
7723 | 2 | 0 | 0 |
8192 | 0 | 2 | 0 |
8231 | 0 | 4 | 4 |
8289 | 0 | 2 | 2 |
8292 | 2 | 0 | 0 |
8401 | 0 | 2 | 2 |
Study counts by
data_by_refid = analysis_subset.groupby('RefID')
data_by_refid.apply(lambda x: (x[['child_component', 'parent_component', 'multi_component']].sum()>1)).sum()
child_component 9 parent_component 22 multi_component 8 dtype: int64
data_by_refid.apply(lambda x: (x[['child_component', 'parent_component', 'multi_component']].sum(1)==0).sum()>0).astype(int)
RefID 103 0 371 1 441 1 539 1 564 1 899 1 1236 1 1245 0 1511 1 1585 1 1875 1 2092 1 2117 1 2219 1 3211 1 3225 1 3399 1 3495 1 3687 1 3716 1 3766 1 3915 1 3960 0 7109 1 7723 1 8192 1 8231 1 8289 0 8292 1 8401 1 dtype: int64
data_by_refid.apply(lambda x: x.age_cat.unique()[0]).value_counts()
1 17 0 11 2 2 dtype: int64
We are restricting the analysis to the 4 most prevalent measure instruments in the database.
unique_measures = analysis_subset.measure_instrument.unique().tolist()
k = len(unique_measures)
unique_measures
['Eyberg Child Behaviour Inventory, Intensity Subscale', 'Eyberg Child Behaviour Inventory, Problem Subscale', 'Child Behavior Checklist, Externalizing (T Score)']
Three intervention components were coded:
child_component
parent_component
multi_component
p_male, age_cat, intvn = analysis_subset[['p_male', 'age_cat', 'intvn']].values.T
child_only, parent_only, multi_component = analysis_subset[['child_only', 'parent_only',
'multi_component']].values.T
change_n, change_mean, change_sd = analysis_subset[['eot_diff_n', 'eot_diff_mean', 'eot_diff_sd']].values.T
school = (analysis_subset.age_cat.values==0).astype(int)
pre_k = (analysis_subset.age_cat.values==1).astype(int)
teen = (analysis_subset.age_cat.values==2).astype(int)
The response variable is a multivariate normal of dimension k=3, for each of the measure instruments:
Means for each study are a draw from a multivariate normal.
wishart = False
mu = pm.Normal('mu', 0, 0.001, value=[0]*k)
if wishart:
T = pm.Wishart('T', k+1, np.eye(k), value=np.eye(k))
m = [pm.MvNormal('m_{}'.format(i), mu, T, value=[0]*k) for i in range(len(unique_studies))]
else:
sigmas = pm.Uniform('sigmas', 0, 100, value=[10]*k)
rhos = pm.Uniform('rhos', -1, 1, value=[0]*int(comb(k, 2)))
Sigma = pm.Lambda('Sigma', lambda s=sigmas, r=rhos: np.array([[s[0]**2, s[0]*s[1]*r[0], s[0]*s[2]*r[1]],
[s[0]*s[1]*r[0], s[1]**2, s[1]*s[2]*r[2]],
[s[0]*s[2]*r[1], s[1]*s[2]*r[2], s[2]**2]]))
m = [pm.MvNormalCov('m_{}'.format(i), mu, Sigma, value=[0]*k) for i in range(len(unique_studies))]
Unique intervention labels for each component; we will use these for component random effects.
unique_child_intvn = np.unique(intvn[child_only.astype(bool)]).tolist()
unique_parent_intvn = np.unique(intvn[parent_only.astype(bool)]).tolist()
unique_multi_intvn = np.unique(intvn[multi_component.astype(bool)]).tolist()
# Indices to random effect labels
child_component_index = [unique_child_intvn.index(x) for x in intvn[child_only.astype(bool)]]
parent_component_index = [unique_parent_intvn.index(x) for x in intvn[parent_only.astype(bool)]]
multi_component_index = [unique_multi_intvn.index(x) for x in intvn[multi_component.astype(bool)]]
Treatment component random effects
$$X_i = \left[{ \begin{array}{c} {x_c}\\ {x_p}\\ {x_m}\\ \end{array} }\right]_i$$$$\begin{aligned} \beta_j^{(c)} &\sim N(\mu_{\beta}^{(c)},\tau_{\beta}^{(c)}) \\ \beta_j^{(p)} &\sim N(\mu_{\beta}^{(p)},\tau_{\beta}^{(p)}) \\ \beta_j^{(m)} &\sim N(\mu_{\beta}^{(m)},\tau_{\beta}^{(m)}) \end{aligned}$$mu_beta = pm.Normal('mu_beta', 0, 0.001, value=[0]*3)
# sig_beta = pm.Uniform('sig_beta', 0, 100, value=1)
# tau_beta = sig_beta ** -2
tau_beta = pm.Gamma('tau_beta', 1, 0.1, value=1)
beta_c = pm.Normal('beta_c', mu_beta[0], tau_beta, value=[0]*len(unique_child_intvn))
beta_p = pm.Normal('beta_p', mu_beta[1], tau_beta, value=[0]*len(unique_parent_intvn))
beta_m = pm.Normal('beta_m', mu_beta[2], tau_beta, value=[0]*len(unique_multi_intvn))
b_c = pm.Lambda('b_c', lambda b=beta_c:
np.array([b[unique_child_intvn.index(x)] if child_only[i] else 0 for i,x in enumerate(intvn)]))
b_p = pm.Lambda('b_p', lambda b=beta_p:
np.array([b[unique_parent_intvn.index(x)] if parent_only[i] else 0 for i,x in enumerate(intvn)]))
b_m = pm.Lambda('b_m', lambda b=beta_m:
np.array([b[unique_multi_intvn.index(x)] if multi_component[i] else 0 for i,x in enumerate(intvn)]))
Calculate the probability of being the best intervention.
best = pm.Lambda('best', lambda b=mu_beta: (b==b.min()).astype(int))
Interaction of parent and multi-component with pre-k children.
interaction = False
if interaction:
beta_pk_p = pm.Normal('beta_pk_p', 0, 1e-5, value=0)
beta_pk_m = pm.Normal('beta_pk_m', 0, 1e-5, value=0)
b_pk_p = pm.Lambda('b_pk_p', lambda b=beta_pk_p: b * parent_only * pre_k)
b_pk_m = pm.Lambda('b_pk_m', lambda b=beta_pk_m: b * multi_component * pre_k)
betas = b_c + b_p + b_m
if interaction:
betas = betas + b_pk_p + b_pk_m
Covariate effects of age and percent female.
$$\alpha \sim N(0, 1e5)$$alpha_age = pm.Normal('alpha_age', 0, 1e-5, value=[1,2])
Unique study ID (RefID
) and measure ID (measure_instrument
) values.
study_id = [unique_studies.index(x) for x in analysis_subset.RefID]
measure_id = [unique_measures.index(x) for x in analysis_subset.measure_instrument]
Calculate expected response (treatment difference) as a function of treatment and covariates.
$$\theta_i = m_{j[i]k} + X_i \beta + \alpha x_{age}$$baseline_sd = analysis_subset.baseline_sd.values
@pm.deterministic
def theta(m=m, betas=betas, alpha_age=alpha_age):
mi = [m[i][j] for i,j in zip(study_id, measure_id)]
age_effect = np.array([alpha_age[a-1] if a else 0 for a in age_cat])
return(mi + baseline_sd*(betas + age_effect))
Expected treatment effect for pre-K undergoing multi-component intervention, measused by Eyberg Child Behaviour Inventory, Intensity Subscale
if wishart:
baseline = pm.MvNormal('baseline', mu, T, value=[0]*k)
else:
baseline = pm.MvNormalCov('baseline', mu, Sigma, value=[0]*k)
Calculation of expected differences from baseline for each instrument.
# ECBI intensity subscale
ecbi_intensity_sd = baseline_sd[np.array(measure_id)==0].mean()
prek_intensity_pred = pm.Lambda('prek_intensity_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*(b + a[0]) )
school_intensity_pred = pm.Lambda('school_intensity_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*b )
teen_intensity_pred = pm.Lambda('teen_intensity_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*(b + a[1]) )
intensity_diff = pm.Lambda('intensity_diff',
lambda base=baseline, prek=prek_intensity_pred, school=school_intensity_pred,
teen=teen_intensity_pred: np.array([prek, school, teen])-base[0])
# ECBI problem subscale
ecbi_problem_sd = baseline_sd[np.array(measure_id)==1].mean()
prek_problem_pred = pm.Lambda('prek_problem_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*(b + a[0]) )
school_problem_pred = pm.Lambda('school_problem_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*b )
teen_problem_pred = pm.Lambda('teen_problem_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*(b + a[1]) )
problem_diff = pm.Lambda('problem_diff',
lambda base=baseline, prek=prek_problem_pred, school=school_problem_pred,
teen=teen_problem_pred: np.array([prek, school, teen])-base[1])
# CBC T-score
cbct_sd = baseline_sd[np.array(measure_id)==2].mean()
prek_tscore_pred = pm.Lambda('prek_tscore_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[2] + cbct_sd*(b + a[0]) )
school_tscore_pred = pm.Lambda('school_tscore_pred',
lambda mu=baseline, b=mu_beta: mu[2] + cbct_sd*b )
teen_tscore_pred = pm.Lambda('teen_tscore_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[2] + cbct_sd*(b + a[1]) )
tscore_diff = pm.Lambda('tscore_diff',
lambda base=baseline, prek=prek_tscore_pred, school=school_tscore_pred,
teen=teen_tscore_pred: np.array([prek, school, teen])-base[2])
Finally, the likelihood is just a normal distribution, with the observed standard error of the treatment effect as the standard deviation of the estimates.
$$d_i \sim N(\theta_i, \hat{\sigma}^2)$$change_se = change_sd/np.sqrt(change_n)
d = pm.Normal('d', theta, change_se**-2, observed=True, value=change_mean)
Posterior predictive samples, if desired.
include_gof_samples = False
if include_gof_samples:
d_sim = pm.Normal('d_sim', theta, change_se**-2, size=len(change_mean))
Instantiate MCMC model, and use Adaptive Metropolis for some vector-valued variables.
M = pm.MCMC(locals())
M.use_step_method(pm.AdaptiveMetropolis, m)
M.use_step_method(pm.AdaptiveMetropolis, mu_beta)
M.use_step_method(pm.AdaptiveMetropolis, [beta_c, beta_p, beta_m])
Run two chains to allow for convergence evaluation.
M.sample(100000, 90000)
[-----------------100%-----------------] 100000 of 100000 complete in 823.3 sec
M.sample(100000, 90000)
[-----------------100%-----------------] 100000 of 100000 complete in 511.4 sec
Summary of estimates of intervention components
if wishart:
pm.Matplot.plot(T)
pm.Matplot.plot(mu_beta)
Plotting mu_beta_0 Plotting mu_beta_1 Plotting mu_beta_2
pm.Matplot.summary_plot([mu_beta], custom_labels=['Child component', 'Parent component',
'Multi-component'])
mu_beta.summary()
mu_beta: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -1.02 0.303 0.013 [-1.594 -0.434] -1.208 0.178 0.008 [-1.54 -0.859] -1.335 0.204 0.01 [-1.709 -0.929] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -1.599 -1.225 -1.019 -0.821 -0.435 -1.56 -1.327 -1.202 -1.092 -0.873 -1.708 -1.473 -1.339 -1.2 -0.926
best.summary()
best: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.139 0.346 0.011 [ 0. 1.] 0.249 0.433 0.015 [ 0. 1.] 0.612 0.487 0.02 [ 0. 1.] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 1.0
pm.Matplot.plot(mu_beta)
Plotting mu_beta_0 Plotting mu_beta_1 Plotting mu_beta_2
if interaction:
pm.Matplot.summary_plot([beta_pk_m, beta_pk_p])
Difference means by measure instrument.
plt.figure(figsize=(24,4))
pm.Matplot.summary_plot([mu], custom_labels=unique_measures)
pm.Matplot.plot(mu)
Plotting mu_0 Plotting mu_1 Plotting mu_2
if not wishart:
plt.figure(figsize=(24,4))
pm.Matplot.summary_plot([sigmas], custom_labels=unique_measures)
sigmas.summary()
sigmas: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.283 0.043 0.003 [ 0.205 0.371] 0.086 0.022 0.002 [ 0.05 0.132] 0.072 0.029 0.003 [ 0.03 0.13] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.21 0.253 0.279 0.31 0.38 0.052 0.07 0.083 0.1 0.137 0.033 0.05 0.065 0.093 0.138
pm.Matplot.plot(sigmas)
Plotting sigmas_0 Plotting sigmas_1 Plotting sigmas_2
if not wishart:
pm.Matplot.summary_plot([rhos], custom_labels=['rho12', 'rho13', 'rho23'])
Age effects for pre-k (top) and teen (bottom) groups, relative to pre-teen.
pm.Matplot.summary_plot([alpha_age], custom_labels=['pre-K', 'teen'])
alpha_age.summary()
alpha_age: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -0.312 0.069 0.003 [-0.445 -0.181] -0.052 0.236 0.013 [-0.504 0.415] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -0.445 -0.359 -0.31 -0.264 -0.18 -0.52 -0.21 -0.04 0.107 0.406
pm.Matplot.plot(alpha_age)
Plotting alpha_age_0 Plotting alpha_age_1
traces = [[school_intensity_pred, prek_intensity_pred, teen_intensity_pred],
[school_problem_pred, prek_problem_pred, teen_problem_pred],
[school_tscore_pred, prek_tscore_pred, teen_tscore_pred]]
sb.set(style="white", palette="hot")
sb.despine(left=True)
colors = '#7fc97f','#beaed4','#fdc086','#386cb0'
#colors = '#a6611a','#dfc27d','#80cdc1','#018571'
# colors = ('#fef0d9','#fdcc8a','#fc8d59','#d7301f')[::-1]
# colors = sb.cubehelix_palette(4, start=2, rot=0, dark=.25, light=.75, reverse=False)
titles = ['Pre-K Children', 'School Children', 'Teenage Children']
for i,measure in enumerate(unique_measures):
f, axes = plt.subplots(1, 3, figsize=(16, 6), sharex=True, sharey=True)
measure_traces = traces[i]
for j, trace in enumerate(np.array(measure_traces)[[1,0,2]]):
x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
c1, p1, m1 = trace.trace().T
g = sb.distplot(x + c1, color=colors[0], ax=axes[j])
sb.distplot(x + p1, color=colors[1], ax=axes[j])
sb.distplot(x + m1, color=colors[2], ax=axes[j])
if j:
age_effect = alpha_age.trace()[:, j-1]
else:
age_effect = 0
sb.distplot(x + baseline.trace()[:, i] + age_effect, color=colors[3], ax=axes[j]);
g.set_title(titles[j])
if not j:
g.set_ylabel('Frequency')
g.legend(g.lines, ['Child-only', 'Parent-only', 'Multi-component', 'Control/TAU'], loc='upper left')
if j==1:
g.set_xlabel(measure)
<matplotlib.figure.Figure at 0x11d4991d0>
Generate threshold proportions
thresholds = [127, # ECBI, intensity
11, # ECBI, problem
60] # CBC t-score
age_labels = ['school', 'pre-k', 'teen']
estimates = []
for i,measure in enumerate(unique_measures):
measure_traces = traces[i]
cutoff = thresholds[i]
for j, trace in enumerate(measure_traces):
x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
c1, p1, m1 = trace.trace().T
_child_only = ((x + c1) > cutoff).mean().round(2)
_parent_only = ((x + p1) > cutoff).mean().round(2)
_multi_component = ((x + m1) > cutoff).mean().round(2)
if j:
age_effect = alpha_age.trace()[:, j-1]
else:
age_effect = 0
_control = ((x + baseline.trace()[:, i] + age_effect) > cutoff).mean().round(2)
print('\n{0}, {1}'.format(measure, age_labels[j]))
print('Child-only: {0}\nParent-only: {1}\nMulti-component: {2}\nControl/TAU: {3}'.format(_child_only,
_parent_only,
_multi_component,
_control))
estimates.append((measure, age_labels[j], _child_only, _parent_only, _multi_component, _control))
Eyberg Child Behaviour Inventory, Intensity Subscale, school Child-only: 0.6 Parent-only: 0.48 Multi-component: 0.39 Control/TAU: 0.94 Eyberg Child Behaviour Inventory, Intensity Subscale, pre-k Child-only: 0.38 Parent-only: 0.24 Multi-component: 0.19 Control/TAU: 0.94 Eyberg Child Behaviour Inventory, Intensity Subscale, teen Child-only: 0.56 Parent-only: 0.44 Multi-component: 0.36 Control/TAU: 0.94 Eyberg Child Behaviour Inventory, Problem Subscale, school Child-only: 0.78 Parent-only: 0.75 Multi-component: 0.7 Control/TAU: 1.0 Eyberg Child Behaviour Inventory, Problem Subscale, pre-k Child-only: 0.64 Parent-only: 0.56 Multi-component: 0.44 Control/TAU: 1.0 Eyberg Child Behaviour Inventory, Problem Subscale, teen Child-only: 0.75 Parent-only: 0.69 Multi-component: 0.64 Control/TAU: 1.0 Child Behavior Checklist, Externalizing (T Score), school Child-only: 0.57 Parent-only: 0.42 Multi-component: 0.35 Control/TAU: 1.0 Child Behavior Checklist, Externalizing (T Score), pre-k Child-only: 0.35 Parent-only: 0.24 Multi-component: 0.19 Control/TAU: 1.0 Child Behavior Checklist, Externalizing (T Score), teen Child-only: 0.53 Parent-only: 0.39 Multi-component: 0.33 Control/TAU: 1.0
thresholds = [127, # ECBI, intensity
11, # ECBI, problem
60] # CBC t-score
age_labels = ['school', 'pre-k', 'teen']
estimates = []
for i,measure in enumerate(unique_measures):
measure_traces = traces[i]
cutoff = thresholds[i]
for j, trace in enumerate(measure_traces):
x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
c1, p1, m1 = trace.trace().T
_child_only = ((x + c1) > cutoff).mean().round(2)
_parent_only = ((x + p1) > cutoff).mean().round(2)
_multi_component = ((x + m1) > cutoff).mean().round(2)
if j:
age_effect = alpha_age.trace()[:, j-1]
else:
age_effect = 0
_control = ((x + baseline.trace()[:, i] + age_effect) > cutoff).mean().round(2)
print('\n{0}, {1}'.format(measure, age_labels[j]))
print('Child-only: {0}\nParent-only: {1}\nMulti-component: {2}\nControl/TAU: {3}'.format(_child_only,
_parent_only,
_multi_component,
_control))
estimates.append((measure, age_labels[j], _child_only, _parent_only, _multi_component, _control))
Eyberg Child Behaviour Inventory, Intensity Subscale, school Child-only: 0.59 Parent-only: 0.48 Multi-component: 0.39 Control/TAU: 0.94 Eyberg Child Behaviour Inventory, Intensity Subscale, pre-k Child-only: 0.39 Parent-only: 0.25 Multi-component: 0.2 Control/TAU: 0.94 Eyberg Child Behaviour Inventory, Intensity Subscale, teen Child-only: 0.56 Parent-only: 0.45 Multi-component: 0.36 Control/TAU: 0.94 Eyberg Child Behaviour Inventory, Problem Subscale, school Child-only: 0.78 Parent-only: 0.75 Multi-component: 0.7 Control/TAU: 1.0 Eyberg Child Behaviour Inventory, Problem Subscale, pre-k Child-only: 0.63 Parent-only: 0.56 Multi-component: 0.42 Control/TAU: 1.0 Eyberg Child Behaviour Inventory, Problem Subscale, teen Child-only: 0.76 Parent-only: 0.7 Multi-component: 0.65 Control/TAU: 1.0 Child Behavior Checklist, Externalizing (T Score), school Child-only: 0.57 Parent-only: 0.42 Multi-component: 0.35 Control/TAU: 1.0 Child Behavior Checklist, Externalizing (T Score), pre-k Child-only: 0.35 Parent-only: 0.22 Multi-component: 0.19 Control/TAU: 1.0 Child Behavior Checklist, Externalizing (T Score), teen Child-only: 0.53 Parent-only: 0.4 Multi-component: 0.33 Control/TAU: 1.0
estimates
[('Eyberg Child Behaviour Inventory, Intensity Subscale', 'school', 0.58999999999999997, 0.47999999999999998, 0.39000000000000001, 0.93999999999999995), ('Eyberg Child Behaviour Inventory, Intensity Subscale', 'pre-k', 0.39000000000000001, 0.25, 0.20000000000000001, 0.93999999999999995), ('Eyberg Child Behaviour Inventory, Intensity Subscale', 'teen', 0.56000000000000005, 0.45000000000000001, 0.35999999999999999, 0.93999999999999995), ('Eyberg Child Behaviour Inventory, Problem Subscale', 'school', 0.78000000000000003, 0.75, 0.69999999999999996, 1.0), ('Eyberg Child Behaviour Inventory, Problem Subscale', 'pre-k', 0.63, 0.56000000000000005, 0.41999999999999998, 1.0), ('Eyberg Child Behaviour Inventory, Problem Subscale', 'teen', 0.76000000000000001, 0.69999999999999996, 0.65000000000000002, 1.0), ('Child Behavior Checklist, Externalizing (T Score)', 'school', 0.56999999999999995, 0.41999999999999998, 0.34999999999999998, 1.0), ('Child Behavior Checklist, Externalizing (T Score)', 'pre-k', 0.34999999999999998, 0.22, 0.19, 1.0), ('Child Behavior Checklist, Externalizing (T Score)', 'teen', 0.53000000000000003, 0.40000000000000002, 0.33000000000000002, 1.0)]
np.savetxt('estimates.csv', np.array([e[2:] for e in estimates]), delimiter='\t', fmt='%1.2f')
pd.DataFrame(estimates, columns=['Instrument', 'Age group', 'Child-only',
'Parent-only', 'Multi-component', 'TAU/Control']).to_csv('outcome_proportions.csv')
Summary statistics for TAU/control
baseline.summary()
baseline: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.031 0.291 0.004 [-0.519 0.632] -0.02 0.094 0.003 [-0.211 0.165] 0.026 0.08 0.002 [-0.137 0.193] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -0.545 -0.159 0.03 0.222 0.607 -0.219 -0.077 -0.017 0.04 0.159 -0.134 -0.02 0.024 0.07 0.199
Summary statistics for intensity subscale
prek_intensity_pred.summary()
school_intensity_pred.summary()
teen_intensity_pred.summary()
prek_intensity_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -35.29 8.205 0.34 [-50.713 -19.181] -40.285 4.587 0.171 [-49.584 -31.483] -43.643 5.454 0.247 [-53.763 -32.583] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -50.772 -40.985 -35.293 -29.796 -19.219 -49.591 -43.33 -40.121 -37.254 -31.486 -53.702 -47.416 -43.756 -40.263 -32.468 school_intensity_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -27.021 8.041 0.339 [-42.202 -11.295] -32.016 4.722 0.205 [-40.865 -22.625] -35.374 5.423 0.256 [-45.526 -24.697] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -42.418 -32.456 -26.945 -21.717 -11.465 -41.426 -35.201 -31.867 -28.924 -23.111 -45.369 -39.059 -35.509 -31.776 -24.496 teen_intensity_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -28.407 9.275 0.392 [-46.792 -10.405] -33.403 7.802 0.405 [-49.639 -19.162] -36.76 7.263 0.327 [-51.158 -22.201] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -47.492 -34.376 -28.167 -22.047 -10.896 -49.563 -38.588 -33.079 -27.927 -19.065 -51.943 -41.429 -36.474 -31.982 -22.863
Summary statistics for problem subscale
prek_problem_pred.summary()
school_problem_pred.summary()
teen_problem_pred.summary()
prek_problem_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -8.249 1.912 0.079 [-11.863 -4.519] -9.413 1.07 0.04 [-11.5 -7.282] -10.195 1.273 0.058 [-12.542 -7.608] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -11.855 -9.572 -8.249 -6.977 -4.51 -11.57 -10.13 -9.377 -8.705 -7.338 -12.545 -11.073 -10.224 -9.409 -7.61 school_problem_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -6.323 1.873 0.079 [-9.855 -2.683] -7.486 1.101 0.048 [-9.599 -5.353] -8.269 1.265 0.06 [-10.584 -5.689] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -9.899 -7.587 -6.307 -5.088 -2.709 -9.682 -8.221 -7.449 -6.759 -5.414 -10.623 -9.125 -8.305 -7.431 -5.714 teen_problem_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -6.646 2.16 0.091 [-10.971 -2.454] -7.809 1.817 0.094 [-11.621 -4.519] -8.592 1.692 0.076 [-12.091 -5.347] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -11.132 -8.032 -6.586 -5.164 -2.558 -11.59 -9.019 -7.737 -6.529 -4.463 -12.108 -9.682 -8.522 -7.475 -5.349
Summary statistics for t scores
prek_tscore_pred.summary()
school_tscore_pred.summary()
teen_tscore_pred.summary()
prek_tscore_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -10.999 2.56 0.106 [-15.917 -6.081] -12.558 1.431 0.053 [-15.472 -9.819] -13.606 1.7 0.077 [-16.771 -10.167] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -15.847 -12.78 -11.0 -9.295 -5.992 -15.466 -13.508 -12.509 -11.615 -9.809 -16.747 -14.781 -13.637 -12.55 -10.128 school_tscore_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -8.418 2.509 0.106 [-13.219 -3.612] -9.977 1.474 0.064 [-12.739 -7.048] -11.025 1.69 0.08 [-14.198 -7.716] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -13.208 -10.12 -8.398 -6.763 -3.591 -12.919 -10.964 -9.933 -9.009 -7.185 -14.135 -12.175 -11.067 -9.904 -7.647 teen_tscore_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -8.851 2.894 0.122 [-14.691 -3.316] -10.41 2.435 0.127 [-15.431 -5.927] -11.458 2.265 0.102 [-15.877 -6.845] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -14.816 -10.707 -8.771 -6.863 -3.387 -15.465 -12.031 -10.307 -8.704 -5.949 -16.197 -12.911 -11.365 -9.965 -7.145
Each summary corresponds to teh differences between baseline and age x component categories for each outcome. The order is:
intensity_diff.summary()
problem_diff.summary()
tscore_diff.summary()
intensity_diff: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -35.321 8.202 0.34 [-51.054 -19.576] -40.316 4.577 0.17 [-49.029 -30.961] -43.674 5.444 0.247 [-54.092 -33.006] -27.052 8.039 0.34 [-42.292 -11.508] -32.048 4.713 0.205 [-40.843 -22.784] -35.405 5.413 0.257 [-45.328 -24.656] -28.438 9.274 0.392 [-46.906 -10.498] -33.434 7.797 0.405 [-49.647 -19.242] -36.791 7.256 0.327 [-50.733 -21.894] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -50.762 -41.004 -35.33 -29.856 -19.205 -49.612 -43.372 -40.151 -37.292 -31.516 -53.751 -47.449 -43.761 -40.3 -32.577 -42.412 -32.493 -27.028 -21.79 -11.538 -41.393 -35.195 -31.895 -28.957 -23.15 -45.312 -39.073 -35.529 -31.827 -24.573 -47.61 -34.378 -28.168 -22.033 -10.89 -49.645 -38.6 -33.105 -27.968 -19.234 -51.945 -41.446 -36.494 -32.013 -22.883 problem_diff: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -8.229 1.911 0.079 [-11.895 -4.561] -9.393 1.066 0.04 [-11.423 -7.213] -10.175 1.268 0.058 [-12.602 -7.69 ] -6.303 1.873 0.079 [-9.853 -2.681] -7.466 1.098 0.048 [-9.516 -5.308] -8.249 1.261 0.06 [-10.561 -5.744] -6.626 2.161 0.091 [-10.928 -2.446] -7.789 1.817 0.094 [-11.567 -4.483] -8.572 1.691 0.076 [-11.82 -5.101] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -11.827 -9.553 -8.231 -6.956 -4.474 -11.559 -10.105 -9.354 -8.688 -7.343 -12.523 -11.055 -10.195 -9.389 -7.59 -9.881 -7.57 -6.297 -5.077 -2.688 -9.644 -8.2 -7.431 -6.746 -5.393 -10.557 -9.103 -8.277 -7.415 -5.725 -11.092 -8.009 -6.563 -5.133 -2.537 -11.566 -8.993 -7.713 -6.516 -4.481 -12.102 -9.656 -8.502 -7.459 -5.331 tscore_diff: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -11.025 2.56 0.106 [-15.936 -6.111] -12.584 1.429 0.053 [-15.304 -9.664] -13.632 1.699 0.077 [-16.885 -10.303] -8.444 2.509 0.106 [-13.201 -3.592] -10.003 1.471 0.064 [-12.749 -7.112] -11.051 1.69 0.08 [-14.149 -7.696] -8.877 2.895 0.122 [-14.641 -3.277] -10.436 2.434 0.126 [-15.497 -6.006] -11.484 2.265 0.102 [-15.836 -6.834] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -15.845 -12.799 -11.028 -9.319 -5.995 -15.486 -13.538 -12.533 -11.64 -9.838 -16.778 -14.811 -13.66 -12.579 -10.169 -13.239 -10.142 -8.437 -6.802 -3.602 -12.92 -10.986 -9.956 -9.039 -7.226 -14.144 -12.196 -11.09 -9.935 -7.67 -14.861 -10.731 -8.792 -6.878 -3.399 -15.496 -12.049 -10.334 -8.73 -6.004 -16.214 -12.937 -11.391 -9.993 -7.143