%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sb
import numpy as np
import pandas as pd
import pymc as pm
from scipy.misc import comb
np.random.seed(42)
excel_sucks = True
if excel_sucks:
study_char = pd.read_csv("study_characteristics.csv",
index_col='RefID', na_values=['-', 'NR'])
outcomes = pd.read_csv("outcomes.csv", na_values=['ND', 'NR'])
else:
data_file = 'DBD Data for Meta Analyses_4-7-15_withNRCTs_RAE.xlsx'
study_char = pd.read_excel(data_file, "Study Characteristics",
index_col='RefID', na_values=['-', 'NR'])
outcomes = pd.read_excel(data_file, "Outcomes",
na_values=['ND', 'NR'])
study_char.tail()
Pubmed ID | Rel Incl1 | Rel Incl2 | Rel Incl3 | Rel Excl | Family | NCT | Title | Year | Author | ... | Screened (N) | Randomized (N) | Analyzed (N) | Proportion Male (%) | Direction of Effect | DOE_Comments | Other comments | Drugs(s) | - | Unnamed: 46 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
RefID | |||||||||||||||||||||
8385 | na | NaN | NaN | NaN | NaN | NaN | NaN | Reducing Adolescent Oppositional and Conduct D... | 2011 | Sells SP, K. W. Early and T. E. Smith | ... | NaN | 38 | NaN | 57 | NaN | NaN | NaN | NaN | NaN | NaN |
8401 | 22820873 | NaN | NaN | NaN | NaN | NaN | NaN | "Tuning into Kids": reducing young children's ... | 2013 | Havighurst SS, K. R. Wilson, A. E. Harley, C. ... | ... | 92 | 63 | 54 | 78 | NaN | NaN | NaN | NaN | NaN | NaN |
NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 46 columns
Data cleaning
# Cast outcomes variables to floats
for col in ('Last FU Mean', 'Last FU SD',):
outcomes[col] = outcomes[col].astype(float)
# Recode age category
study_char['age_cat'] = study_char.AgeCat.replace({'PRE-K':1, 'SCHOOL':0, 'TEEN':2})
# Fix data label typo
outcomes['Measure Instrument'] = outcomes['Measure Instrument'].replace({'Eyberg Child Behaviour Inventory, problem Subscale':
'Eyberg Child Behaviour Inventory, Problem Subscale'})
outcomes.Units = outcomes.Units.replace({'scale': 'Scale'})
# Parse followup times and convert to months
split_fut = outcomes.loc[outcomes['Last FU Time'].notnull(), 'Last FU Time'].apply(lambda x: str(x).split(' ')[:2])
fut_months = [float(time)/52.*(unit=='weeks') or float(time) for time, unit in split_fut]
outcomes.loc[outcomes['Last FU Time'].notnull(), 'Last FU Time'] = fut_months
We are assumung all CBC Externalizing values over 50 are T-scores, and those under 50 are raw scores. This recodes those observations.
cbce_ind = outcomes['Measure Instrument'].apply(lambda x: x.startswith('Child Behavior Checklist, Externalizing'))
under_50 = outcomes['BL Mean']<50
outcomes.loc[cbce_ind & (under_50^True), 'Measure Instrument'] = 'Child Behavior Checklist, Externalizing (T Score)'
outcomes.loc[cbce_ind & under_50, 'Measure Instrument'] = 'Child Behavior Checklist, Externalizing'
Recode measure instrument variables
instrument = []
subtype = []
units = []
for i,row in outcomes.iterrows():
separator = row['Measure Instrument'].find(',')
if separator == -1:
separator = row['Measure Instrument'].find('-')
instrument.append(row['Measure Instrument'][:separator])
s = row['Measure Instrument'][separator+2:]
paren = s.find('(')
if paren > -1:
subtype.append(s[:paren-1])
units.append(s[paren+1:-1])
else:
subtype.append(s)
if s.endswith('scale'):
units.append('Scale')
else:
units.append('Score')
new_cols = pd.DataFrame({'instrument': instrument, 'subtype': subtype,
'units': units}, index=outcomes.index)
outcomes['Measure Instrument'].value_counts()
Eyberg Child Behaviour Inventory, Intensity Subscale 79 Eyberg Child Behaviour Inventory, Problem Subscale 57 Child Behavior Checklist, Externalizing (T Score) 46 Child Behavior Checklist, Externalizing 11 Strengths and Difficulties Questionnaire, Total Score 10 Strengths and Difficulties Questionnaire- Conduct Problems Scale 10 Eyberg Child Behaviour Inventory, Intensity Subscale (T Score) 10 Child Behavior Checklist, Conduct Problems (T Score) 6 Child Behavior Checklist, Aggression 6 Strengths and Difficulties Questionnaire- Emotional Symptoms Scale 4 Eyberg Child Behaviour Inventory, Problem Subscale (T Score) 4 Child Behavior Checklist, Delinquent Subscale 4 Child Behavior Checklist, Conduct Problems 2 Child Behavior Checklist, Social problems 2 DSM IV 2 Strengths and Difficulties Questionnaire- Hyperactivity Scale 2 Strengths and Difficulties Questionnaire- Impact Score 2 Child Behavior Checklist, Rulebreaking 2 Strengths and Difficulties Questionnaire, Percent Below Clinical Cutoff 2 Childrens Global Assessment Scale 2 dtype: int64
new_cols.head()
instrument | subtype | units | |
---|---|---|---|
0 | Eyberg Child Behaviour Inventory | Intensity Subscale | T Score |
1 | Eyberg Child Behaviour Inventory | Intensity Subscale | T Score |
2 | Eyberg Child Behaviour Inventory | Problem Subscale | T Score |
3 | Eyberg Child Behaviour Inventory | Problem Subscale | T Score |
4 | Child Behavior Checklist | Externalizing | Score |
# Append new columns
outcomes = outcomes.join(new_cols)
outcomes.intvn.value_counts()
wlc 47 tau 39 iypt 32 pcit 16 TAU 10 iyptndiyct 8 pppsd 7 WLC 7 PCIT 5 mst 5 MTP 5 modularndn 4 it 4 iyct 4 pppo 4 ppcp 4 pmtndp 3 FBT 3 snap 3 pcitc 3 pppe 3 spokes 3 PLL 3 CT 3 pmto 3 TIK 2 pcitabb 2 pmtsd 2 RST 2 projndsupport 2 setpc 2 hncte 2 pmtnds 2 hncstd 2 pmtpa 2 itpt 1 scip 1 hnc 1 coaching 1 cbt 1 modularndcomm 1 hitkashrut 1 modularndclin 1 iyptadv 1 pstnds 1 kitkashrut 1 sst 1 cpp 1 pppstd 1 mcfi 1 dtype: int64
outcomes['Ref ID'].unique()
array([ 23, 100, 103, 371, 441, 475, 539, 564, 757, 836, 870, 899, 993, 995, 1018, 1096, 1201, 1215, 1236, 1245, 1292, 1386, 1511, 1521, 1585, 1875, 1951, 2092, 2117, 2219, 2239, 2347, 3211, 3225, 3397, 3399, 3495, 3687, 3716, 3766, 3915, 3960, 7109, 7723, 8192, 8231, 8289, 8292, 8385, 8401, 7848])
Cross-tabulation of the outcome counts by measure instrument
pd.crosstab(outcomes['instrument'], outcomes['Outcome'])
Outcome | 01 Behavior, disruptive | 02 Behavior, aggression | 06 Behavior, fighting, destruction, violation | 08 Behavior, other | 11 Function, interpersonal and social |
---|---|---|---|---|---|
instrument | |||||
Child Behavior Checklist | 65 | 4 | 4 | 4 | 2 |
Childrens Global Assessment Scal | 0 | 0 | 0 | 0 | 2 |
DSM I | 2 | 0 | 0 | 0 | 0 |
Eyberg Child Behaviour Inventory | 148 | 0 | 0 | 2 | 0 |
Strengths and Difficulties Questionnaire | 10 | 0 | 0 | 20 | 0 |
Distribution of age categories
study_char.AgeCat.value_counts()
SCHOOL 44 PRE-K 31 TEEN 18 dtype: int64
Frequencies of various intervention types
study_char['Intervention Type'].value_counts()
PHARM 20 IY-PT 10 MST 9 PCIT 7 IY-PT + IY-CT 4 PMTO 3 Triple P (enhanced) 3 BSFT 3 CBT 2 Triple-P (self-directed) 2 PCIT-ABB 2 PT 2 UCPP 2 IY-PT (nurse led) 2 CPS 2 OTH: Intensive treatment 2 OTH: Modular treatment 2 PMT (practitioner assisted) 1 PONI 1 MF-PEP + TAU 1 Family Therapy 1 OTH: Booster 1 OTH: FFT 1 PLL 1 IY-PT + ADVANCE 1 OTH: Child only treatment 1 IY-CT + IY-PT 1 OTH: Community Parent Education Program 1 IY 1 SNAP Under 12 OutReach Project (enhanced) 1 RST 1 OTH: Instrumental, emotional support & child management skills 1 SET-PC 1 TIK 1 IY-PT (brief) 1 OTH: Modular (nurse administered) 1 OTH: MTP 1 Triple-P (online) 1 HNC 1 PMT (perceptive) 1 SNAP Under 12 OutReach Project 1 CBFI 1 HNC (technology enhanced) 1 OTH: Project support 1 Coping Power (cultural adaptation) 1 Triple-P (enhanced) 1 SCIP (Social cognitive (Dodge's)) 1 OTH: Modular treatment (community) 1 IY-PT + IY-CT + IY-TT 1 PHARM1 + PHARM2 1 OTH: Parents Plus Children's Program 1 Parenting Group (SPOKES) 1 SNAP Under 12 \nOutReach Project(ORP) 1 PCIT (modified) 1 OTH: Family therapy 1 Length: 55, dtype: int64
KQ1 = study_char[study_char.KQ=='KQ1']
study_varnames = ['Year', 'age_cat', 'Geographic setting', 'Age mean (years) ', 'Age SD (years)',
'Age min (years)', 'Age max (years)', 'Proportion Male (%)', 'Design']
study_vars = KQ1[study_varnames].rename(columns={'Geographic setting': 'country',
'Age mean (years) ': 'age_mean',
'Age SD (years)': 'age_sd',
'Age min (years)': 'age_min',
'Age max (years)': 'age_max',
'Proportion Male (%)': 'p_male'})
study_vars.head()
Year | age_cat | country | age_mean | age_sd | age_min | age_max | p_male | Design | |
---|---|---|---|---|---|---|---|---|---|
RefID | |||||||||
23 | 2013 | 1 | USA | 2.80 | 0.61 | 2 | 4 | 62 | RCT |
100 | 2013 | 2 | USA | 14.60 | 1.30 | 11 | 18 | 83 | RCT |
103 | 2013 | 1 | USA | 5.67 | 1.72 | 3 | 8 | 53 | RCT |
141 | 2013 | 0 | USA | 9.90 | 1.30 | 8 | 11 | 73 | RCT |
156 | 2013 | 2 | Netherlands | 16.00 | 1.31 | 12 | 18 | 73 | RCT |
study_vars.p_male.hist()
<matplotlib.axes._subplots.AxesSubplot at 0x10cc2d908>
Proportion missing
study_vars.isnull().mean(0).round(2)
Year 0.00 age_cat 0.00 country 0.00 age_mean 0.10 age_sd 0.19 age_min 0.06 age_max 0.07 p_male 0.02 Design 0.00 dtype: float64
Will assume the mean age for those which are missing is simply the midpoint between minimum and maximum values
est_means = study_vars.apply(lambda x: x.age_min + (x.age_max - x.age_min) / 2, axis=1)[study_vars.age_mean.isnull()]
study_vars.loc[study_vars.age_mean.isnull(), 'age_mean'] = est_means
study_vars.age_mean.isnull().sum()
2
outcomes_varnames = ['Ref ID', 'Measure Instrument', 'instrument', 'subtype', 'units',
'intvn', 'cc', 'pc', 'fc',
'BL N', 'BL Mean', 'BL SD',
'EOT \nN', 'EOT Mean', 'EOT \nSD', 'Last FU Time', 'Last FU N',
'Last FU Mean', 'Last FU SD', 'CS Group N', 'CS Mean', 'CS SD']
outcomes_vars = outcomes[outcomes_varnames].rename(columns={'Ref ID': 'RefID',
'Measure Instrument': 'measure_instrument',
'cc': 'child_component',
'pc': 'parent_component',
'fc': 'family_component',
'oc': 'other_component',
'BL N': 'baseline_n',
'BL Mean': 'baseline_mean',
'BL SD': 'baseline_sd',
'EOT \nN': 'end_treat_n',
'EOT Mean': 'end_treat_mean',
'EOT \nSD': 'end_treat_sd',
'Last FU Time': 'followup_time',
'Last FU N': 'followup_n',
'Last FU Mean': 'followup_mean',
'Last FU SD': 'followup_sd',
'CS Group N': 'change_n',
'CS Mean': 'change_mean',
'CS SD': 'change_sd'})
Recode intervention clasification
outcomes_vars.isnull().sum()
RefID 0 measure_instrument 0 instrument 0 subtype 0 units 0 intvn 0 child_component 0 parent_component 0 family_component 0 baseline_n 4 baseline_mean 17 baseline_sd 25 end_treat_n 74 end_treat_mean 67 end_treat_sd 74 followup_time 32 followup_n 103 followup_mean 118 followup_sd 126 change_n 251 change_mean 258 change_sd 263 dtype: int64
control = ((outcomes_vars.child_component^True) &
(outcomes_vars.parent_component^True) &
(outcomes_vars.family_component^True)).astype(int)
child_only = ((outcomes_vars.child_component) &
(outcomes_vars.parent_component^True) &
(outcomes_vars.family_component^True)).astype(int)
parent_only = ((outcomes_vars.child_component^True) &
(outcomes_vars.parent_component) &
(outcomes_vars.family_component^True)).astype(int)
outcomes_vars.ix[child_only.astype(bool), ['child_component', 'parent_component', 'family_component']]
child_component | parent_component | family_component | |
---|---|---|---|
125 | 1 | 0 | 0 |
126 | 1 | 0 | 0 |
128 | 1 | 0 | 0 |
129 | 1 | 0 | 0 |
162 | 1 | 0 | 0 |
188 | 1 | 0 | 0 |
194 | 1 | 0 | 0 |
195 | 1 | 0 | 0 |
234 | 1 | 0 | 0 |
236 | 1 | 0 | 0 |
238 | 1 | 0 | 0 |
multi_component = ((parent_only^True) & (child_only^True) & (control^True)).astype(int)
outcomes_vars['child_only'] = child_only
outcomes_vars['parent_only'] = parent_only
outcomes_vars['multi_component'] = multi_component
If requested, change PCIT to parent-only.
pcit_as_multicomponent = True
if not pcit_as_multicomponent:
outcomes_vars.loc[outcomes_vars.intvn=='pcit', 'parenr_only'] = 1
outcomes_vars.loc[outcomes_vars.intvn=='pcit', 'multi_component'] = 0
Obtain subset with non-missing EOT data
eot_subset = outcomes_vars[outcomes_vars.end_treat_mean.notnull() & outcomes_vars.end_treat_sd.notnull()].copy()
Calculate EOT difference
eot_subset['eot_diff_mean'] = eot_subset.end_treat_mean - eot_subset.baseline_mean
eot_subset['eot_diff_sd'] = eot_subset.baseline_sd + eot_subset.end_treat_sd
eot_subset['eot_diff_n'] = eot_subset[['baseline_n', 'end_treat_n']].min(1)
Distribution of baseline means among outcome metrics
for instrument in ('Eyberg Child Behaviour Inventory',
'Child Behavior Checklist',
'Strengths and Difficulties Questionnaire'):
eot_subset[eot_subset.instrument==instrument]['baseline_mean'].hist(by=eot_subset['subtype'],
sharex=True)
plt.suptitle(instrument);
eot_subset.instrument.value_counts()
Eyberg Child Behaviour Inventory 116 Child Behavior Checklist 48 Strengths and Difficulties Questionnaire 22 Childrens Global Assessment Scal 2 dtype: int64
eot_subset.instrument.value_counts()
Eyberg Child Behaviour Inventory 116 Child Behavior Checklist 48 Strengths and Difficulties Questionnaire 22 Childrens Global Assessment Scal 2 dtype: int64
eot_subset[eot_subset.RefID==441]
RefID | measure_instrument | instrument | subtype | units | intvn | child_component | parent_component | family_component | baseline_n | ... | followup_sd | change_n | change_mean | change_sd | child_only | parent_only | multi_component | eot_diff_mean | eot_diff_sd | eot_diff_n | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
14 | 441 | Eyberg Child Behaviour Inventory, Intensity Su... | Eyberg Child Behaviour Inventory | Intensity Subscale | Scale | iypt | 0 | 1 | 0 | 32 | ... | NaN | NaN | NaN | NaN | 0 | 1 | 0 | -31.40 | 46.80 | 32 |
15 | 441 | Eyberg Child Behaviour Inventory, Intensity Su... | Eyberg Child Behaviour Inventory | Intensity Subscale | Scale | wlc | 0 | 0 | 0 | 20 | ... | NaN | NaN | NaN | NaN | 0 | 0 | 0 | -5.80 | 49.60 | 20 |
16 | 441 | Eyberg Child Behaviour Inventory, Problem Subs... | Eyberg Child Behaviour Inventory | Problem Subscale | Scale | iypt | 0 | 1 | 0 | 24 | ... | NaN | NaN | NaN | NaN | 0 | 1 | 0 | -9.70 | 12.02 | 24 |
17 | 441 | Eyberg Child Behaviour Inventory, Problem Subs... | Eyberg Child Behaviour Inventory | Problem Subscale | Scale | wlc | 0 | 0 | 0 | 17 | ... | NaN | NaN | NaN | NaN | 0 | 0 | 0 | -2.88 | 14.59 | 17 |
4 rows × 28 columns
Several studies use multiple instruments and metrics within instruments
eot_subset.groupby(['RefID', 'instrument'])['subtype'].value_counts()
RefID instrument 103 Eyberg Child Behaviour Inventory Intensity Subscale 2 Problem Subscale 2 371 Eyberg Child Behaviour Inventory Intensity Subscale 2 Problem Subscale 2 441 Eyberg Child Behaviour Inventory Intensity Subscale 2 Problem Subscale 2 475 Eyberg Child Behaviour Inventory Intensity Subscale 2 539 Child Behavior Checklist Aggression 2 Externalizing 2 564 Eyberg Child Behaviour Inventory Intensity Subscale 2 Problem Subscale 2 836 Strengths and Difficulties Questionnaire Total Score 2 899 Child Behavior Checklist Externalizing 3 Eyberg Child Behaviour Inventory Intensity Subscale 3 Problem Subscale 3 ... 7723 Child Behavior Checklist Externalizing 2 7848 Child Behavior Checklist Social problems 2 Delinquent Subscale 2 Aggression 2 Externalizing 2 Childrens Global Assessment Scal hildrens Global Assessment Scale 2 8192 Eyberg Child Behaviour Inventory Intensity Subscale 2 8231 Eyberg Child Behaviour Inventory Intensity Subscale 4 Problem Subscale 4 8289 Child Behavior Checklist Delinquent Subscale 2 Eyberg Child Behaviour Inventory Intensity Subscale 2 Problem Subscale 2 8292 Child Behavior Checklist Externalizing 2 8401 Eyberg Child Behaviour Inventory Intensity Subscale 2 Problem Subscale 2 Length: 73, dtype: int64
pd.crosstab(eot_subset.instrument, eot_subset.subtype)
subtype | Aggression | Conduct Problems | Conduct Problems Scale | Delinquent Subscale | Emotional Symptoms Scale | Externalizing | Hyperactivity Scale | Impact Score | Intensity Subscale | Problem Subscale | Social problems | Total Score | hildrens Global Assessment Scale |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
instrument | |||||||||||||
Child Behavior Checklist | 4 | 6 | 0 | 4 | 0 | 32 | 0 | 0 | 0 | 0 | 2 | 0 | 0 |
Childrens Global Assessment Scal | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 |
Eyberg Child Behaviour Inventory | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 68 | 48 | 0 | 0 | 0 |
Strengths and Difficulties Questionnaire | 0 | 0 | 6 | 0 | 4 | 0 | 2 | 2 | 0 | 0 | 0 | 8 | 0 |
x = eot_subset[eot_subset.instrument=='Eyberg Child Behaviour Inventory']
pd.crosstab(x.instrument, x.subtype)
subtype | Intensity Subscale | Problem Subscale |
---|---|---|
instrument | ||
Eyberg Child Behaviour Inventory | 68 | 48 |
x = eot_subset[eot_subset.instrument=='Child Behavior Checklist']
pd.crosstab(x.instrument, x.subtype)
subtype | Aggression | Conduct Problems | Delinquent Subscale | Externalizing | Social problems |
---|---|---|---|---|---|
instrument | |||||
Child Behavior Checklist | 4 | 6 | 4 | 32 | 2 |
x = eot_subset[eot_subset.instrument=='Strengths and Difficulties Questionnaire']
pd.crosstab(x.instrument, x.subtype)
subtype | Conduct Problems Scale | Emotional Symptoms Scale | Hyperactivity Scale | Impact Score | Total Score |
---|---|---|---|---|---|
instrument | |||||
Strengths and Difficulties Questionnaire | 6 | 4 | 2 | 2 | 8 |
Filter out non-RCT studies, if requested.
rct_only = True
rct_study_vars = study_vars[study_vars.Design=='RCT']
Merge study variables and outcomes
merged_vars = rct_study_vars.merge(eot_subset, left_index=True, right_on='RefID')
merged_vars.shape
(156, 37)
For now, restrict to the three most prevalent metrics.
merged_vars.measure_instrument.value_counts()
Eyberg Child Behaviour Inventory, Intensity Subscale 62 Eyberg Child Behaviour Inventory, Problem Subscale 46 Child Behavior Checklist, Externalizing (T Score) 19 Child Behavior Checklist, Externalizing 7 Child Behavior Checklist, Conduct Problems (T Score) 6 Eyberg Child Behaviour Inventory, Intensity Subscale (T Score) 4 Strengths and Difficulties Questionnaire, Total Score 4 Child Behavior Checklist, Aggression 2 Strengths and Difficulties Questionnaire- Conduct Problems Scale 2 Child Behavior Checklist, Delinquent Subscale 2 Strengths and Difficulties Questionnaire- Emotional Symptoms Scale 2 dtype: int64
Take only the top 3 measures
top_measures = 3
merged_vars.RefID.unique()
array([ 103, 371, 441, 475, 539, 836, 899, 1236, 1245, 1511, 1585, 1875, 1951, 2092, 2219, 2239, 3211, 3225, 3399, 3495, 3687, 3716, 3766, 3915, 3960, 7109, 7723, 8192, 8231, 8289, 8292, 8401])
analysis_subset = merged_vars[merged_vars.measure_instrument.isin(merged_vars.measure_instrument.value_counts().index[:top_measures])]
analysis_subset.groupby('measure_instrument')['baseline_mean'].max()
measure_instrument Child Behavior Checklist, Externalizing (T Score) 77.10 Eyberg Child Behaviour Inventory, Intensity Subscale 186.44 Eyberg Child Behaviour Inventory, Problem Subscale 28.62 Name: baseline_mean, dtype: float64
analysis_subset.groupby('measure_instrument').baseline_mean.hist()
measure_instrument Child Behavior Checklist, Externalizing (T Score) Axes(0.125,0.125;0.775x0.775) Eyberg Child Behaviour Inventory, Intensity Subscale Axes(0.125,0.125;0.775x0.775) Eyberg Child Behaviour Inventory, Problem Subscale Axes(0.125,0.125;0.775x0.775) Name: baseline_mean, dtype: object
analysis_subset['baseline_mean'].hist(by=analysis_subset['measure_instrument'],sharex=True)
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x115a4ce48>, <matplotlib.axes._subplots.AxesSubplot object at 0x115baf390>], [<matplotlib.axes._subplots.AxesSubplot object at 0x115bf5a58>, <matplotlib.axes._subplots.AxesSubplot object at 0x115c2e780>]], dtype=object)
analysis_subset['eot_diff_mean'].hist(by=analysis_subset['measure_instrument'],sharex=True)
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x115cce320>, <matplotlib.axes._subplots.AxesSubplot object at 0x115637fd0>], [<matplotlib.axes._subplots.AxesSubplot object at 0x1157accc0>, <matplotlib.axes._subplots.AxesSubplot object at 0x11567d240>]], dtype=object)
for x in analysis_subset.measure_instrument.unique():
plt.figure()
analysis_subset[analysis_subset.measure_instrument==x].baseline_mean.plot(kind='kde', alpha=0.4, grid=False)
analysis_subset[analysis_subset.measure_instrument==x].end_treat_mean.plot(kind='kde', alpha=0.4, grid=False)
plt.gca().set_xlabel(x)
Number of TAU/Control arms in subset
analysis_subset.shape[0] - analysis_subset.multi_component.sum() - analysis_subset.child_only.sum() - analysis_subset.parent_only.sum()
49
set(analysis_subset[analysis_subset.multi_component==1].RefID.values)
{539, 899, 1236, 1511, 2092, 3399, 3687, 3766, 3915, 8192, 8231, 8289, 8292}
set(analysis_subset[analysis_subset.child_only==1].RefID.values)
{1875, 3766, 8289}
set(analysis_subset[analysis_subset.parent_only==1].RefID.values)
{103, 371, 441, 1236, 1245, 1585, 2219, 3211, 3225, 3495, 3716, 3766, 3960, 7109, 7723, 8401}
set(analysis_subset[(analysis_subset.parent_only==0) &
(analysis_subset.child_only==0) &
(analysis_subset.multi_component==0)].RefID.values)
{371, 441, 539, 899, 1236, 1511, 1585, 1875, 2092, 2219, 3211, 3225, 3399, 3495, 3687, 3716, 3766, 3915, 7109, 7723, 8192, 8231, 8292, 8401}
Total arm sample sizes
analysis_subset.loc[analysis_subset.child_only==1, 'baseline_n'].sum()
190.0
analysis_subset.loc[analysis_subset.parent_only==1, 'baseline_n'].sum()
1540.0
analysis_subset.loc[analysis_subset.multi_component==1, 'baseline_n'].sum()
929.0
analysis_subset.loc[(analysis_subset.multi_component==0) & (analysis_subset.parent_only==0) & (analysis_subset.child_only==0), 'baseline_n'].sum()
1348.0
Number of studies in analysis subset
unique_studies = analysis_subset.RefID.unique().tolist()
len(unique_studies)
28
analysis_subset.RefID.unique()
array([ 103, 371, 441, 539, 899, 1236, 1245, 1511, 1585, 1875, 2092, 2219, 3211, 3225, 3399, 3495, 3687, 3716, 3766, 3915, 3960, 7109, 7723, 8192, 8231, 8289, 8292, 8401])
analysis_subset.groupby(['RefID', 'measure_instrument'])['measure_instrument'].count().unstack().fillna(0)
measure_instrument | Child Behavior Checklist, Externalizing (T Score) | Eyberg Child Behaviour Inventory, Intensity Subscale | Eyberg Child Behaviour Inventory, Problem Subscale |
---|---|---|---|
RefID | |||
103 | 0 | 2 | 2 |
371 | 0 | 2 | 2 |
441 | 0 | 2 | 2 |
539 | 2 | 0 | 0 |
899 | 3 | 3 | 3 |
1236 | 0 | 6 | 6 |
1245 | 2 | 0 | 0 |
1511 | 3 | 0 | 0 |
1585 | 0 | 2 | 2 |
1875 | 3 | 0 | 0 |
2092 | 0 | 3 | 0 |
2219 | 0 | 3 | 3 |
3211 | 0 | 2 | 2 |
3225 | 0 | 2 | 2 |
3399 | 2 | 0 | 0 |
3495 | 0 | 4 | 0 |
3687 | 0 | 6 | 6 |
3716 | 0 | 2 | 2 |
3766 | 0 | 9 | 0 |
3915 | 0 | 2 | 2 |
3960 | 0 | 0 | 2 |
7109 | 0 | 2 | 2 |
7723 | 2 | 0 | 0 |
8192 | 0 | 2 | 0 |
8231 | 0 | 4 | 4 |
8289 | 0 | 2 | 2 |
8292 | 2 | 0 | 0 |
8401 | 0 | 2 | 2 |
Study counts by
data_by_refid = analysis_subset.groupby('RefID')
data_by_refid.apply(lambda x: (x[['child_component', 'parent_component', 'multi_component']].sum()>1)).sum()
child_component 9 parent_component 21 multi_component 8 dtype: int64
data_by_refid.apply(lambda x: (x[['child_component', 'parent_component', 'multi_component']].sum(1)==0).sum()>0).astype(int)
RefID 103 0 371 1 441 1 539 1 899 1 1236 1 1245 0 1511 1 1585 1 1875 1 2092 1 2219 1 3211 1 3225 1 3399 1 3495 1 3687 1 3716 1 3766 1 3915 1 3960 0 7109 1 7723 1 8192 1 8231 1 8289 0 8292 1 8401 1 dtype: int64
data_by_refid.apply(lambda x: x.age_cat.unique()[0]).value_counts()
1 16 0 10 2 2 dtype: int64
We are restricting the analysis to the 4 most prevalent measure instruments in the database.
unique_measures = analysis_subset.measure_instrument.unique().tolist()
k = len(unique_measures)
unique_measures
['Eyberg Child Behaviour Inventory, Intensity Subscale', 'Eyberg Child Behaviour Inventory, Problem Subscale', 'Child Behavior Checklist, Externalizing (T Score)']
Three intervention components were coded:
child_component
parent_component
multi_component
p_male, age_cat, intvn = analysis_subset[['p_male', 'age_cat', 'intvn']].values.T
child_only, parent_only, multi_component = analysis_subset[['child_only', 'parent_only',
'multi_component']].values.T
change_n, change_mean, change_sd = analysis_subset[['eot_diff_n', 'eot_diff_mean', 'eot_diff_sd']].values.T
school = (analysis_subset.age_cat.values==0).astype(int)
pre_k = (analysis_subset.age_cat.values==1).astype(int)
teen = (analysis_subset.age_cat.values==2).astype(int)
The response variable is a multivariate normal of dimension k=3, for each of the measure instruments:
Means for each study are a draw from a multivariate normal.
wishart = False
mu = pm.Normal('mu', 0, 0.001, value=[0]*k)
if wishart:
T = pm.Wishart('T', k+1, np.eye(k), value=np.eye(k))
m = [pm.MvNormal('m_{}'.format(i), mu, T, value=[0]*k) for i in range(len(unique_studies))]
else:
sigmas = pm.Uniform('sigmas', 0, 100, value=[10]*k)
rhos = pm.Uniform('rhos', -1, 1, value=[0]*int(comb(k, 2)))
Sigma = pm.Lambda('Sigma', lambda s=sigmas, r=rhos: np.array([[s[0]**2, s[0]*s[1]*r[0], s[0]*s[2]*r[1]],
[s[0]*s[1]*r[0], s[1]**2, s[1]*s[2]*r[2]],
[s[0]*s[2]*r[1], s[1]*s[2]*r[2], s[2]**2]]))
m = [pm.MvNormalCov('m_{}'.format(i), mu, Sigma, value=[0]*k) for i in range(len(unique_studies))]
Unique intervention labels for each component; we will use these for component random effects.
unique_child_intvn = np.unique(intvn[child_only.astype(bool)]).tolist()
unique_parent_intvn = np.unique(intvn[parent_only.astype(bool)]).tolist()
unique_multi_intvn = np.unique(intvn[multi_component.astype(bool)]).tolist()
# Indices to random effect labels
child_component_index = [unique_child_intvn.index(x) for x in intvn[child_only.astype(bool)]]
parent_component_index = [unique_parent_intvn.index(x) for x in intvn[parent_only.astype(bool)]]
multi_component_index = [unique_multi_intvn.index(x) for x in intvn[multi_component.astype(bool)]]
Treatment component random effects
$$X_i = \left[{ \begin{array}{c} {x_c}\\ {x_p}\\ {x_m}\\ \end{array} }\right]_i$$$$\begin{aligned} \beta_j^{(c)} &\sim N(\mu_{\beta}^{(c)},\tau_{\beta}^{(c)}) \\ \beta_j^{(p)} &\sim N(\mu_{\beta}^{(p)},\tau_{\beta}^{(p)}) \\ \beta_j^{(m)} &\sim N(\mu_{\beta}^{(m)},\tau_{\beta}^{(m)}) \end{aligned}$$mu_beta = pm.Normal('mu_beta', 0, 0.001, value=[0]*3)
# sig_beta = pm.Uniform('sig_beta', 0, 100, value=1)
# tau_beta = sig_beta ** -2
tau_beta = pm.Gamma('tau_beta', 1, 0.1, value=1)
beta_c = pm.Normal('beta_c', mu_beta[0], tau_beta, value=[0]*len(unique_child_intvn))
beta_p = pm.Normal('beta_p', mu_beta[1], tau_beta, value=[0]*len(unique_parent_intvn))
beta_m = pm.Normal('beta_m', mu_beta[2], tau_beta, value=[0]*len(unique_multi_intvn))
b_c = pm.Lambda('b_c', lambda b=beta_c:
np.array([b[unique_child_intvn.index(x)] if child_only[i] else 0 for i,x in enumerate(intvn)]))
b_p = pm.Lambda('b_p', lambda b=beta_p:
np.array([b[unique_parent_intvn.index(x)] if parent_only[i] else 0 for i,x in enumerate(intvn)]))
b_m = pm.Lambda('b_m', lambda b=beta_m:
np.array([b[unique_multi_intvn.index(x)] if multi_component[i] else 0 for i,x in enumerate(intvn)]))
Calculate the probability of being the best intervention.
best = pm.Lambda('best', lambda b=mu_beta: (b==b.min()).astype(int))
Interaction of parent and multi-component with pre-k children.
interaction = False
if interaction:
beta_pk_p = pm.Normal('beta_pk_p', 0, 1e-5, value=0)
beta_pk_m = pm.Normal('beta_pk_m', 0, 1e-5, value=0)
b_pk_p = pm.Lambda('b_pk_p', lambda b=beta_pk_p: b * parent_only * pre_k)
b_pk_m = pm.Lambda('b_pk_m', lambda b=beta_pk_m: b * multi_component * pre_k)
betas = b_c + b_p + b_m
if interaction:
betas = betas + b_pk_p + b_pk_m
Covariate effects of age and percent female.
$$\alpha \sim N(0, 1e5)$$alpha_age = pm.Normal('alpha_age', 0, 1e-5, value=[1,2])
Unique study ID (RefID
) and measure ID (measure_instrument
) values.
study_id = [unique_studies.index(x) for x in analysis_subset.RefID]
measure_id = [unique_measures.index(x) for x in analysis_subset.measure_instrument]
Calculate expected response (treatment difference) as a function of treatment and covariates.
$$\theta_i = m_{j[i]k} + X_i \beta + \alpha x_{age}$$baseline_sd = analysis_subset.baseline_sd.values
@pm.deterministic
def theta(m=m, betas=betas, alpha_age=alpha_age):
mi = [m[i][j] for i,j in zip(study_id, measure_id)]
age_effect = np.array([alpha_age[a-1] if a else 0 for a in age_cat])
return(mi + baseline_sd*(betas + age_effect))
Expected treatment effect for pre-K undergoing multi-component intervention, measused by Eyberg Child Behaviour Inventory, Intensity Subscale
if wishart:
baseline = pm.MvNormal('baseline', mu, T, value=[0]*k)
else:
baseline = pm.MvNormalCov('baseline', mu, Sigma, value=[0]*k)
Calculation of expected differences from baseline for each instrument.
# ECBI intensity subscale
ecbi_intensity_sd = baseline_sd[np.array(measure_id)==0].mean()
prek_intensity_pred = pm.Lambda('prek_intensity_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*(b + a[0]) )
school_intensity_pred = pm.Lambda('school_intensity_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*b )
teen_intensity_pred = pm.Lambda('teen_intensity_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[0] + ecbi_intensity_sd*(b + a[1]) )
intensity_diff = pm.Lambda('intensity_diff',
lambda base=baseline, prek=prek_intensity_pred, school=school_intensity_pred,
teen=teen_intensity_pred: np.array([prek, school, teen])-base[0])
# ECBI problem subscale
ecbi_problem_sd = baseline_sd[np.array(measure_id)==1].mean()
prek_problem_pred = pm.Lambda('prek_problem_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*(b + a[0]) )
school_problem_pred = pm.Lambda('school_problem_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*b )
teen_problem_pred = pm.Lambda('teen_problem_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[1] + ecbi_problem_sd*(b + a[1]) )
problem_diff = pm.Lambda('problem_diff',
lambda base=baseline, prek=prek_problem_pred, school=school_problem_pred,
teen=teen_problem_pred: np.array([prek, school, teen])-base[1])
# CBC T-score
cbct_sd = baseline_sd[np.array(measure_id)==2].mean()
prek_tscore_pred = pm.Lambda('prek_tscore_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[2] + cbct_sd*(b + a[0]) )
school_tscore_pred = pm.Lambda('school_tscore_pred',
lambda mu=baseline, b=mu_beta: mu[2] + cbct_sd*b )
teen_tscore_pred = pm.Lambda('teen_tscore_pred',
lambda mu=baseline, a=alpha_age, b=mu_beta: mu[2] + cbct_sd*(b + a[1]) )
tscore_diff = pm.Lambda('tscore_diff',
lambda base=baseline, prek=prek_tscore_pred, school=school_tscore_pred,
teen=teen_tscore_pred: np.array([prek, school, teen])-base[2])
Finally, the likelihood is just a normal distribution, with the observed standard error of the treatment effect as the standard deviation of the estimates.
$$d_i \sim N(\theta_i, \hat{\sigma}^2)$$change_se = change_sd/np.sqrt(change_n)
d = pm.Normal('d', theta, change_se**-2, observed=True, value=change_mean)
Posterior predictive samples, if desired.
include_gof_samples = False
if include_gof_samples:
d_sim = pm.Normal('d_sim', theta, change_se**-2, size=len(change_mean))
Instantiate MCMC model, and use Adaptive Metropolis for some vector-valued variables.
M = pm.MCMC(locals())
M.use_step_method(pm.AdaptiveMetropolis, m)
M.use_step_method(pm.AdaptiveMetropolis, mu_beta)
M.use_step_method(pm.AdaptiveMetropolis, [beta_c, beta_p, beta_m])
Run two chains to allow for convergence evaluation.
M.sample(200000, 190000)
[-----------------100%-----------------] 200000 of 200000 complete in 715.5 sec
M.sample(200000, 190000)
[-----------------100%-----------------] 200000 of 200000 complete in 763.4 sec
Summary of estimates of intervention components
if wishart:
pm.Matplot.plot(T)
pm.Matplot.plot(mu_beta)
Plotting mu_beta_0 Plotting mu_beta_1 Plotting mu_beta_2
pm.Matplot.summary_plot([mu_beta], custom_labels=['Child component', 'Parent component',
'Multi-component'], chain=0, xlab="Effect size (standard deviations)")
pm.Matplot.summary_plot([mu_beta], custom_labels=['Child component', 'Parent component',
'Multi-component'])
mu_beta.summary()
mu_beta: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -1.008 0.281 0.014 [-1.562 -0.452] -1.219 0.163 0.007 [-1.535 -0.909] -1.278 0.193 0.009 [-1.663 -0.911] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -1.553 -1.192 -1.013 -0.83 -0.439 -1.546 -1.328 -1.219 -1.108 -0.914 -1.635 -1.417 -1.278 -1.155 -0.876
mu_trace = M.mu_beta.trace()
multi_child = mu_trace[:, 2] - mu_trace[:, 0]
multi_parent = mu_trace[:, 2] - mu_trace[:, 1]
parent_child = mu_trace[:, 1] - mu_trace[:, 0]
'Multi-component vs child-only: mean={0:.2f}, 95% HPD=({1:.2}, {2:.2})'.format(multi_child.mean(),
*pm.utils.hpd(multi_child, 0.05))
'Multi-component vs child-only: mean=-0.29, 95% HPD=(-0.87, 0.42)'
'Multi-component vs parent-only: mean={0:.2f}, 95% HPD=({1:.2}, {2:.2})'.format(multi_parent.mean(),
*pm.utils.hpd(multi_parent, 0.05))
'Multi-component vs parent-only: mean=-0.06, 95% HPD=(-0.56, 0.41)'
'Parent vs child-only: mean={0:.2f}, 95% HPD=({1:.2}, {2:.2})'.format(parent_child.mean(),
*pm.utils.hpd(parent_child, 0.05))
'Parent vs child-only: mean=-0.23, 95% HPD=(-0.82, 0.39)'
pm.Matplot.plot(mu_beta)
Plotting mu_beta_0 Plotting mu_beta_1 Plotting mu_beta_2
if interaction:
pm.Matplot.summary_plot([beta_pk_m, beta_pk_p])
Difference means by measure instrument.
plt.figure(figsize=(24,4))
pm.Matplot.summary_plot([mu], custom_labels=unique_measures)
pm.Matplot.plot(mu)
Plotting mu_0 Plotting mu_1 Plotting mu_2
if not wishart:
plt.figure(figsize=(24,4))
pm.Matplot.summary_plot([sigmas], custom_labels=unique_measures)
sigmas.summary()
sigmas: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.074 0.013 0.001 [ 0.048 0.101] 0.141 0.035 0.003 [ 0.086 0.203] 0.033 0.023 0.002 [ 0.008 0.07 ] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.049 0.065 0.073 0.081 0.102 0.092 0.112 0.136 0.169 0.213 0.009 0.011 0.029 0.052 0.076
pm.Matplot.plot(sigmas)
Plotting sigmas_0 Plotting sigmas_1 Plotting sigmas_2
if not wishart:
pm.Matplot.summary_plot([rhos], custom_labels=['rho12', 'rho13', 'rho23'])
Age effects for pre-k (top) and teen (bottom) groups, relative to pre-teen.
pm.Matplot.summary_plot([alpha_age], custom_labels=['pre-K', 'teen'])
alpha_age.summary()
alpha_age: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -0.459 0.071 0.003 [-0.591 -0.319] -0.019 0.222 0.012 [-0.45 0.407] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -0.598 -0.508 -0.46 -0.409 -0.323 -0.473 -0.167 -0.011 0.136 0.389
pm.Matplot.plot(alpha_age)
Plotting alpha_age_0 Plotting alpha_age_1
traces = [[school_intensity_pred, prek_intensity_pred, teen_intensity_pred],
[school_problem_pred, prek_problem_pred, teen_problem_pred],
[school_tscore_pred, prek_tscore_pred, teen_tscore_pred]]
sb.set(style="white", palette="hot")
sb.despine(left=True)
colors = '#7fc97f','#beaed4','#fdc086','#386cb0'
#colors = '#a6611a','#dfc27d','#80cdc1','#018571'
# colors = ('#fef0d9','#fdcc8a','#fc8d59','#d7301f')[::-1]
# colors = sb.cubehelix_palette(4, start=2, rot=0, dark=.25, light=.75, reverse=False)
titles = ['Pre-K Children', 'School Children', 'Teenage Children']
for i,measure in enumerate(unique_measures):
f, axes = plt.subplots(1, 3, figsize=(16, 6), sharex=True, sharey=True)
measure_traces = traces[i]
for j, trace in enumerate(np.array(measure_traces)[[1,0,2]]):
x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
c1, p1, m1 = trace.trace().T
g = sb.distplot(x + c1, color=colors[0], ax=axes[j])
sb.distplot(x + p1, color=colors[1], ax=axes[j])
sb.distplot(x + m1, color=colors[2], ax=axes[j])
if j:
age_effect = alpha_age.trace()[:, j-1]
else:
age_effect = 0
sb.distplot(x + baseline.trace()[:, i] + age_effect, color=colors[3], ax=axes[j]);
g.set_title(titles[j])
if not j:
g.set_ylabel('Frequency')
g.legend(g.lines, ['Child-only', 'Parent-only', 'Multi-component', 'Control/TAU'], loc='upper left')
if j==1:
g.set_xlabel(measure)
<matplotlib.figure.Figure at 0x11d8d1ba8>
Generate threshold proportions
thresholds = [127, # ECBI, intensity
11, # ECBI, problem
60] # CBC t-score
age_labels = ['school', 'pre-k', 'teen']
estimates = []
for i,measure in enumerate(unique_measures):
measure_traces = traces[i]
cutoff = thresholds[i]
for j, trace in enumerate(measure_traces):
x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
c1, p1, m1 = trace.trace().T
_child_only = ((x + c1) > cutoff).mean().round(2)
_parent_only = ((x + p1) > cutoff).mean().round(2)
_multi_component = ((x + m1) > cutoff).mean().round(2)
if j:
age_effect = alpha_age.trace()[:, j-1]
else:
age_effect = 0
_control = ((x + baseline.trace()[:, i] + age_effect) > cutoff).mean().round(2)
print('\n{0}, {1}'.format(measure, age_labels[j]))
print('Child-only: {0}\nParent-only: {1}\nMulti-component: {2}\nControl/TAU: {3}'.format(_child_only,
_parent_only,
_multi_component,
_control))
estimates.append((measure, age_labels[j], _child_only, _parent_only, _multi_component, _control))
Eyberg Child Behaviour Inventory, Intensity Subscale, school Child-only: 0.65 Parent-only: 0.47 Multi-component: 0.42 Control/TAU: 0.95 Eyberg Child Behaviour Inventory, Intensity Subscale, pre-k Child-only: 0.32 Parent-only: 0.16 Multi-component: 0.14 Control/TAU: 0.95 Eyberg Child Behaviour Inventory, Intensity Subscale, teen Child-only: 0.64 Parent-only: 0.47 Multi-component: 0.43 Control/TAU: 0.95 Eyberg Child Behaviour Inventory, Problem Subscale, school Child-only: 0.82 Parent-only: 0.79 Multi-component: 0.75 Control/TAU: 1.0 Eyberg Child Behaviour Inventory, Problem Subscale, pre-k Child-only: 0.63 Parent-only: 0.43 Multi-component: 0.37 Control/TAU: 1.0 Eyberg Child Behaviour Inventory, Problem Subscale, teen Child-only: 0.82 Parent-only: 0.74 Multi-component: 0.73 Control/TAU: 1.0 Child Behavior Checklist, Externalizing (T Score), school Child-only: 0.55 Parent-only: 0.35 Multi-component: 0.33 Control/TAU: 1.0 Child Behavior Checklist, Externalizing (T Score), pre-k Child-only: 0.27 Parent-only: 0.19 Multi-component: 0.16 Control/TAU: 1.0 Child Behavior Checklist, Externalizing (T Score), teen Child-only: 0.56 Parent-only: 0.39 Multi-component: 0.34 Control/TAU: 1.0
thresholds = [127, # ECBI, intensity
11, # ECBI, problem
60] # CBC t-score
age_labels = ['school', 'pre-k', 'teen']
estimates = []
for i,measure in enumerate(unique_measures):
measure_traces = traces[i]
cutoff = thresholds[i]
for j, trace in enumerate(measure_traces):
x = np.random.choice(analysis_subset[analysis_subset.measure_instrument==measure].baseline_mean, 10000)
c1, p1, m1 = trace.trace().T
_child_only = ((x + c1) > cutoff).mean().round(2)
_parent_only = ((x + p1) > cutoff).mean().round(2)
_multi_component = ((x + m1) > cutoff).mean().round(2)
if j:
age_effect = alpha_age.trace()[:, j-1]
else:
age_effect = 0
_control = ((x + baseline.trace()[:, i] + age_effect) > cutoff).mean().round(2)
print('\n{0}, {1}'.format(measure, age_labels[j]))
print('Child-only: {0}\nParent-only: {1}\nMulti-component: {2}\nControl/TAU: {3}'.format(_child_only,
_parent_only,
_multi_component,
_control))
estimates.append((measure, age_labels[j], _child_only, _parent_only, _multi_component, _control))
Eyberg Child Behaviour Inventory, Intensity Subscale, school Child-only: 0.65 Parent-only: 0.47 Multi-component: 0.43 Control/TAU: 0.95 Eyberg Child Behaviour Inventory, Intensity Subscale, pre-k Child-only: 0.32 Parent-only: 0.16 Multi-component: 0.14 Control/TAU: 0.95 Eyberg Child Behaviour Inventory, Intensity Subscale, teen Child-only: 0.64 Parent-only: 0.47 Multi-component: 0.42 Control/TAU: 0.95 Eyberg Child Behaviour Inventory, Problem Subscale, school Child-only: 0.83 Parent-only: 0.79 Multi-component: 0.76 Control/TAU: 1.0 Eyberg Child Behaviour Inventory, Problem Subscale, pre-k Child-only: 0.63 Parent-only: 0.43 Multi-component: 0.38 Control/TAU: 1.0 Eyberg Child Behaviour Inventory, Problem Subscale, teen Child-only: 0.82 Parent-only: 0.75 Multi-component: 0.73 Control/TAU: 1.0 Child Behavior Checklist, Externalizing (T Score), school Child-only: 0.56 Parent-only: 0.36 Multi-component: 0.33 Control/TAU: 1.0 Child Behavior Checklist, Externalizing (T Score), pre-k Child-only: 0.27 Parent-only: 0.18 Multi-component: 0.16 Control/TAU: 1.0 Child Behavior Checklist, Externalizing (T Score), teen Child-only: 0.56 Parent-only: 0.38 Multi-component: 0.34 Control/TAU: 1.0
estimates
[('Eyberg Child Behaviour Inventory, Intensity Subscale', 'school', 0.65000000000000002, 0.46999999999999997, 0.42999999999999999, 0.94999999999999996), ('Eyberg Child Behaviour Inventory, Intensity Subscale', 'pre-k', 0.32000000000000001, 0.16, 0.14000000000000001, 0.94999999999999996), ('Eyberg Child Behaviour Inventory, Intensity Subscale', 'teen', 0.64000000000000001, 0.46999999999999997, 0.41999999999999998, 0.94999999999999996), ('Eyberg Child Behaviour Inventory, Problem Subscale', 'school', 0.82999999999999996, 0.79000000000000004, 0.76000000000000001, 1.0), ('Eyberg Child Behaviour Inventory, Problem Subscale', 'pre-k', 0.63, 0.42999999999999999, 0.38, 1.0), ('Eyberg Child Behaviour Inventory, Problem Subscale', 'teen', 0.81999999999999995, 0.75, 0.72999999999999998, 1.0), ('Child Behavior Checklist, Externalizing (T Score)', 'school', 0.56000000000000005, 0.35999999999999999, 0.33000000000000002, 1.0), ('Child Behavior Checklist, Externalizing (T Score)', 'pre-k', 0.27000000000000002, 0.17999999999999999, 0.16, 1.0), ('Child Behavior Checklist, Externalizing (T Score)', 'teen', 0.56000000000000005, 0.38, 0.34000000000000002, 1.0)]
estimates_filename = 'estimates%s.csv' % (bool(~pcit_as_multicomponent)*'_pcit')
np.savetxt(estimates_filename, np.array([e[2:] for e in estimates]), delimiter='\t', fmt='%1.2f')
pd.DataFrame(estimates, columns=['Instrument', 'Age group', 'Child-only',
'Parent-only', 'Multi-component', 'TAU/Control']).to_csv('outcome_proportions.csv')
Summary statistics for TAU/control
baseline.summary()
baseline: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.106 0.078 0.002 [-0.042 0.266] 0.065 0.149 0.003 [-0.235 0.363] -0.101 0.045 0.002 [-0.194 0.008] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -0.049 0.056 0.106 0.156 0.26 -0.233 -0.028 0.066 0.158 0.365 -0.198 -0.119 -0.106 -0.088 0.005
Summary statistics for intensity subscale
prek_intensity_pred.summary()
school_intensity_pred.summary()
teen_intensity_pred.summary()
prek_intensity_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -38.798 7.654 0.361 [-53.423 -23.57 ] -44.408 4.226 0.153 [-52.811 -36.223] -45.972 5.153 0.233 [-55.762 -35.684] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -53.404 -43.773 -38.972 -33.768 -23.516 -52.845 -47.118 -44.368 -41.564 -36.24 -55.535 -49.562 -46.14 -42.636 -35.276 school_intensity_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -26.622 7.46 0.358 [-41.301 -11.872] -32.232 4.316 0.195 [-40.641 -23.955] -33.796 5.125 0.243 [-44.096 -24.034] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -41.045 -31.509 -26.788 -21.873 -11.468 -40.873 -35.115 -32.206 -29.289 -24.124 -43.265 -37.468 -33.792 -30.525 -23.072 teen_intensity_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -27.139 8.64 0.374 [-45.08 -11.171] -32.749 7.31 0.372 [-47.361 -18.752] -34.313 6.714 0.31 [-47.575 -21.585] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -44.859 -32.913 -26.766 -21.232 -10.858 -47.449 -37.69 -32.592 -27.607 -18.809 -47.567 -38.791 -34.237 -29.818 -21.574
Summary statistics for problem subscale
prek_problem_pred.summary()
school_problem_pred.summary()
teen_problem_pred.summary()
prek_problem_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -8.918 1.773 0.083 [-12.294 -5.364] -10.213 0.989 0.035 [-12.219 -8.329] -10.574 1.195 0.054 [-12.787 -8.105] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -12.314 -10.077 -8.952 -7.761 -5.38 -12.193 -10.844 -10.202 -9.551 -8.292 -12.784 -11.409 -10.616 -9.801 -8.097 school_problem_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -6.106 1.728 0.082 [-9.419 -2.592] -7.402 1.008 0.045 [-9.397 -5.478] -7.763 1.188 0.056 [-9.951 -5.296] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -9.428 -7.239 -6.149 -4.999 -2.595 -9.417 -8.06 -7.393 -6.71 -5.496 -9.972 -8.609 -7.776 -6.993 -5.31 teen_problem_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -6.226 2.001 0.086 [-10.335 -2.464] -7.521 1.697 0.086 [-10.779 -4.142] -7.882 1.556 0.072 [-10.992 -4.964] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -10.347 -7.562 -6.152 -4.864 -2.47 -10.946 -8.665 -7.49 -6.325 -4.279 -10.968 -8.909 -7.856 -6.828 -4.93
Summary statistics for t scores
prek_tscore_pred.summary()
school_tscore_pred.summary()
teen_tscore_pred.summary()
prek_tscore_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -12.034 2.347 0.111 [-16.58 -7.423] -13.755 1.299 0.047 [-16.319 -11.239] -14.235 1.581 0.071 [-17.27 -11.095] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -16.512 -13.552 -12.086 -10.499 -7.343 -16.346 -14.589 -13.745 -12.876 -11.256 -17.165 -15.336 -14.288 -13.213 -10.97 school_tscore_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -8.299 2.288 0.11 [-12.828 -3.793] -10.02 1.326 0.06 [-12.634 -7.51 ] -10.5 1.572 0.074 [-13.661 -7.488] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -12.709 -9.797 -8.354 -6.847 -3.663 -12.658 -10.905 -10.014 -9.114 -7.521 -13.408 -11.635 -10.502 -9.495 -7.22 teen_tscore_pred: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -8.458 2.649 0.114 [-13.921 -3.54 ] -10.179 2.243 0.114 [-14.44 -5.657] -10.659 2.058 0.095 [-14.742 -6.796] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -13.894 -10.229 -8.343 -6.654 -3.498 -14.692 -11.692 -10.134 -8.597 -5.887 -14.715 -12.04 -10.636 -9.275 -6.738
Each summary corresponds to teh differences between baseline and age x component categories for each outcome. The order is:
intensity_diff.summary()
problem_diff.summary()
tscore_diff.summary()
intensity_diff: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -38.904 7.653 0.361 [-53.491 -23.645] -44.514 4.228 0.153 [-52.948 -36.379] -46.078 5.152 0.233 [-55.871 -35.808] -26.728 7.459 0.358 [-41.425 -11.998] -32.338 4.317 0.195 [-40.723 -24.11 ] -33.902 5.123 0.243 [-44.127 -24.158] -27.245 8.639 0.373 [-45.269 -11.412] -32.855 7.31 0.372 [-47.463 -18.871] -34.419 6.712 0.31 [-47.746 -21.821] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -53.471 -43.851 -39.075 -33.891 -23.623 -52.947 -47.222 -44.477 -41.663 -36.374 -55.632 -49.652 -46.243 -42.755 -35.423 -41.193 -31.617 -26.883 -22.009 -11.651 -41.008 -35.237 -32.328 -29.394 -24.251 -43.362 -37.581 -33.897 -30.634 -23.235 -44.983 -33.003 -26.874 -21.35 -10.999 -47.582 -37.792 -32.694 -27.721 -18.911 -47.679 -38.914 -34.335 -29.922 -21.638 problem_diff: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -8.983 1.767 0.083 [-12.351 -5.46 ] -10.278 0.976 0.035 [-12.226 -8.4 ] -10.64 1.19 0.054 [-12.901 -8.268] -6.172 1.722 0.083 [-9.565 -2.77 ] -7.467 0.997 0.045 [-9.403 -5.567] -7.828 1.183 0.056 [-10.189 -5.578] -6.291 1.995 0.086 [-10.453 -2.635] -7.586 1.688 0.086 [-10.959 -4.357] -7.947 1.55 0.072 [-11.025 -5.039] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -12.346 -10.125 -9.023 -7.826 -5.455 -12.226 -10.904 -10.27 -9.62 -8.399 -12.846 -11.465 -10.678 -9.872 -8.179 -9.512 -7.3 -6.207 -5.082 -2.69 -9.469 -8.136 -7.465 -6.787 -5.6 -10.012 -8.678 -7.827 -7.074 -5.365 -10.387 -7.62 -6.205 -4.93 -2.54 -10.987 -8.726 -7.549 -6.401 -4.367 -11.009 -8.985 -7.928 -6.909 -4.996 tscore_diff: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ -11.933 2.347 0.111 [-16.407 -7.253] -13.654 1.297 0.047 [-16.241 -11.159] -14.134 1.58 0.071 [-17.137 -10.984] -8.198 2.288 0.11 [-12.706 -3.68 ] -9.919 1.324 0.06 [-12.491 -7.395] -10.399 1.572 0.075 [-13.535 -7.41 ] -8.357 2.65 0.115 [-13.886 -3.5 ] -10.078 2.242 0.114 [-14.558 -5.788] -10.557 2.059 0.095 [-14.645 -6.693] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| -16.401 -13.451 -11.986 -10.396 -7.246 -16.241 -14.485 -13.643 -12.78 -11.157 -17.064 -15.23 -14.184 -13.114 -10.866 -12.635 -9.698 -8.246 -6.751 -3.574 -12.579 -10.808 -9.916 -9.016 -7.439 -13.301 -11.527 -10.397 -9.397 -7.127 -13.798 -10.123 -8.243 -6.549 -3.374 -14.595 -11.592 -10.028 -8.503 -5.801 -14.625 -11.936 -10.532 -9.178 -6.637