%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pymc as pm
np.random.seed(20090425)
pm.__version__
/Users/fonnescj/anaconda3/lib/python3.5/site-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter. warnings.warn(self.msg_depr % (key, alt_key))
Couldn't import dot_parser, loading of dot files will not be possible.
'2.3.6'
As many of the interventions involve combinations of paradigms we identified categories that would best describe these combinations. Each intervention is described as involving the following elements, with some involving more than one.
# Import data
cognition = pd.read_csv('data/Cognition.csv',
na_values=['NR', 'nr', 'Varied see comment', 'Varied', 'see comment'],
index_col=0)
cognition_outcomes = pd.read_csv('data/Cognition_outcomes.csv', index_col=0)
# replace some values
cognition = cognition.replace({'immed': 0})
cognition.time_to_last_fup = cognition.time_to_last_fup.astype(float)
Missing diagnosis values should be zero
diagnoses = ['diagnosis_autism', 'diagnosis_pdd-nos', 'diagnosis_asperger', 'diagnosis_other']
cognition[diagnoses] = cognition[diagnoses].fillna(0)
cognition_outcomes.cognition_outcome.unique().shape
(31,)
cognition_outcomes.cognition_outcome = cognition_outcomes.cognition_outcome.replace(
['IQ', 'Early learning composite', 'Intellectual functioning',
'MDI', 'cognitive standard', 'Full scale IQ',
'DQ', 'GMDS-ER GQ', 'Early Learning composite'], 'IQ Standard Score')
cognition_outcomes.cognition_outcome = cognition_outcomes.cognition_outcome.replace(
['mental age', 'dev age', 'Developmental age'], 'IQ Age Scale')
cognition_outcomes.cognition_outcome = cognition_outcomes.cognition_outcome.replace(
['Non-Verbal IQ', 'non-verbal IQ', 'Performance IQ',
'Nonverbal IQ', 'Non-verbal', 'Non-verbal intelligence',
'non-verbal cognition (VR)', 'Visual', 'Visual-spatial IQ',
'Visual reception'], 'Non-verbal IQ')
cognition_outcomes.cognition_outcome = cognition_outcomes.cognition_outcome.replace('Fine motor',
'Motor IQ')
cognition_outcomes.cognition_outcome = cognition_outcomes.cognition_outcome.replace(
['Receptive Language', 'Receptive language', 'Expressive language'],
'Language IQ')
cognition_outcomes.cognition_outcome = cognition_outcomes.cognition_outcome.replace(
'Expressive language age', 'Language Age Scale')
Drop outcomes that cannot be categorized
cognition_outcomes = cognition_outcomes[cognition_outcomes.cognition_outcome.isin(
['Language and communication', 'Reciprocal social interaction']) ^ True]
Drop null outcomes
cognition_outcomes = cognition_outcomes[cognition_outcomes.cognition_outcome.notnull()]
Confirm replacement of values
cognition_outcomes.cognition_outcome.unique()
array(['IQ Standard Score', 'Language Age Scale', 'Language IQ', 'Non-verbal IQ', 'IQ Age Scale', 'Motor IQ'], dtype=object)
cognition_outcomes.head()
obsid | cognition_outcome | tool | baseline_mean | baseline_sd | end_tx_mean | end_tx_sd | final_mean | final_sd | change_mean | change_sd | |
---|---|---|---|---|---|---|---|---|---|---|---|
outcomeid | |||||||||||
1 | 1 | IQ Standard Score | Stanford-Binet and Bayley scale | 61.43 | 16.43 | 73.48 | 27.28 | 64.65 | 33.04 | NaN | NaN |
2 | 2 | IQ Standard Score | Stanford-Binet and Bayley scale | 63.83 | 13.98 | 61.00 | 27.30 | 61.94 | 31.09 | NaN | NaN |
3 | 3 | IQ Standard Score | Mullen | 59.60 | 6.90 | 68.50 | 7.50 | NaN | NaN | 8.9 | NaN |
4 | 4 | IQ Standard Score | Mullen | 63.20 | 6.60 | 61.40 | 9.00 | NaN | NaN | -1.8 | NaN |
5 | 5 | IQ Standard Score | BSID, SB:FE, Weschler | 51.60 | 16.90 | NaN | NaN | 66.60 | 24.80 | NaN | NaN |
Confirm numeric types throughout
cognition_outcomes.dtypes
obsid int64 cognition_outcome object tool object baseline_mean float64 baseline_sd float64 end_tx_mean float64 end_tx_sd float64 final_mean float64 final_sd float64 change_mean float64 change_sd float64 dtype: object
Counts of unique treatments (treatment names)
cognition.treatment.value_counts()
Eclectic 4 TAU 3 EIBI 3 ABA 3 control / TAU 2 Parent training 2 Clinic based\nABA 1 Low intensity behavioral 1 control 1 Intensive Eclectic 1 P-ESDM 1 Intensive Rx-\nUCLA 1 intervention manuals only 1 Parent directed 1 Parent\nmanaged\nABA 1 Preschool Autism Communication\nTrial [PACT] 1 Local services 1 UCLA 1 behavioral 1 non-intensive public early\nintervention 1 Local authority approach 1 intensive behavior\nanalytic (IBT) 1 low intensity behavioral 1 Non-IS 1 Assess & Monitor group 1 LEAP 1 Control 1 HTMW 1 EIBT-UCLA 1 Clinic directed 1 eclectic 1 Special nursery 1 Community based Rx 1 Interpersonal Synchrony (IS) 1 Portage 1 ESDM 1 Name: treatment, dtype: int64
Counts of unique tools
cognition_outcomes.tool.value_counts()
MSEL 24 Merrill-Palmer 8 Mullen 8 composite 5 PEP-R 4 Bayley, Stanford-Binet 2 Griffith mental developmental scales 2 Stanford-Binet and Bayley scale 2 BSID, SB:FE, Weschler 2 Bayley-Dutch version 2 Bayley and WPPSI-R 2 Bayley and Merrill-Palmer 2 Bayley, Merrill-Palmer 2 BSID-II 2 WPPSI-R/WISC-R/BSID 2 Bayley 2 GSID 2 Stanford Binet and BSID-II 2 MPSMT 2 Merrill Palmer 1 Name: tool, dtype: int64
Counts of outcome measures used
cognition_outcomes.cognition_outcome.value_counts()
IQ Standard Score 33 Non-verbal IQ 21 Language IQ 12 IQ Age Scale 6 Motor IQ 4 Language Age Scale 2 Name: cognition_outcome, dtype: int64
List of intervention classes
intervention_types = ['high_intensity_eibi', 'low_intensity_eibi',
'low_intensity_parent_training', 'defined_school_program',
'high_intensity_community', 'adjunctive_community_service',
'eclectic_school']
Create edges for network using all pairwise combinations of interventions
import itertools
network_edges = list(itertools.combinations(intervention_types, 2))
Initialize network
network = dict.fromkeys(network_edges, 0)
for i in intervention_types:
network[(i, i)] = 0
Create parallel network for sample size
sample_size = network.copy()
Populate network dicts
# Loop over all studies
for s in cognition.refid.unique():
# Construct pairs of comparisons
study = cognition[cognition.refid==s]
study_interventions = study[intervention_types].sum()
# Get sample sizes
studies = cognition[cognition.refid==s]
n_analysis = studies.n_analysis
# Generate pairs of interventions for this study
pairs = list(itertools.combinations(study_interventions.index[study_interventions==1].values, 2))
# Loop over pairs within study, and increment comparison count
for pair in pairs:
network[pair] += 1
sample_size[pair] += n_analysis[cognition[cognition.refid==s][pair[0]]==1].values[0]
sample_size[pair] += n_analysis[cognition[cognition.refid==s][pair[1]]==1].values[0]
# Account for comparisons within a class
within_comparisons = study_interventions[study_interventions==2].index.values
for c in within_comparisons:
network[(c, c)] += 1
sample_size[(c, c)] += n_analysis[cognition[cognition.refid==s][c]==1].values[0]
sample_size[(c, c)] += n_analysis[cognition[cognition.refid==s][c]==1].values[1]
nonzero_network = {tuple(ni.replace('_', ' ') for ni in n): network[n] for n in network if network[n]}
nonzero_samples = {tuple(ni.replace('_', ' ') for ni in n): sample_size[n] for n in sample_size if sample_size[n]}
Number of studies with direct comparisons between pairs of interventions
nonzero_network
{('adjunctive community service', 'adjunctive community service'): 1, ('adjunctive community service', 'eclectic school'): 1, ('defined school program', 'adjunctive community service'): 2, ('defined school program', 'defined school program'): 2, ('defined school program', 'eclectic school'): 3, ('eclectic school', 'eclectic school'): 1, ('high intensity community', 'eclectic school'): 1, ('high intensity eibi', 'adjunctive community service'): 3, ('high intensity eibi', 'defined school program'): 4, ('high intensity eibi', 'eclectic school'): 3, ('high intensity eibi', 'high intensity community'): 2, ('high intensity eibi', 'high intensity eibi'): 3, ('high intensity eibi', 'low intensity parent training'): 1, ('low intensity eibi', 'eclectic school'): 1, ('low intensity parent training', 'adjunctive community service'): 3, ('low intensity parent training', 'defined school program'): 1, ('low intensity parent training', 'eclectic school'): 1}
Sample sizes for each comparison
nonzero_samples
{('adjunctive community service', 'adjunctive community service'): 152, ('adjunctive community service', 'eclectic school'): 31, ('defined school program', 'adjunctive community service'): 77, ('defined school program', 'defined school program'): 112, ('defined school program', 'eclectic school'): 112, ('eclectic school', 'eclectic school'): 41, ('high intensity community', 'eclectic school'): 32, ('high intensity eibi', 'adjunctive community service'): 111, ('high intensity eibi', 'defined school program'): 184, ('high intensity eibi', 'eclectic school'): 105, ('high intensity eibi', 'high intensity community'): 90, ('high intensity eibi', 'high intensity eibi'): 90, ('high intensity eibi', 'low intensity parent training'): 28, ('low intensity eibi', 'eclectic school'): 40, ('low intensity parent training', 'adjunctive community service'): 172, ('low intensity parent training', 'defined school program'): 48, ('low intensity parent training', 'eclectic school'): 26}
nx.draw_networkx?
Object `nx.draw_networkx` not found.
nx.draw_networkx?
Object `nx.draw_networkx` not found.
import networkx as nx
G = nx.Graph()
for n in nonzero_network:
G.add_edge(n[0], n[1] ,weight=nonzero_network[n])
pos=nx.spring_layout(G)
fig, ax = plt.subplots(figsize=(12, 8))
nx.draw_networkx(G, pos=pos, ax=ax)
plt.figure(figsize=(14,8))
nx.draw_networkx(G, pos, with_labels=False, node_size=700)
nmax = float(max(nonzero_samples.values()))
for i in range(max(nonzero_network.values())):
# Generate edge with appropriate weight
edge_i = [(u,v) for (u,v,d) in G.edges(data=True) if d['weight']==(i+1)]
# Generate alpha for edges
try:
alpha = nonzero_samples[edge_i[0]] / nmax
except KeyError:
# Sometimes the pairs are backwards
alpha = nonzero_samples[edge_i[0][::-1]] / nmax
nx.draw_networkx_edges(G, pos, edgelist=edge_i, width=(i+1)*3, alpha=alpha)
pos_shifted = {p: (pos[p][0], pos[p][1]+0.05) for p in pos}
nx.draw_networkx_labels(G, pos=pos_shifted)
plt.gca().xaxis.set_major_locator(plt.NullLocator())
plt.gca().yaxis.set_major_locator(plt.NullLocator());
Merge study information and outcomes
# Columns from the cognition table that we are interested in
cols_to_keep = ['refid', 'author', 'year', 'treatment',
'high_intensity_eibi', 'low_intensity_eibi',
'low_intensity_parent_training', 'defined_school_program',
'high_intensity_community', 'adjunctive_community_service',
'eclectic_school', 'intervention_duration',
'n_analysis', 'age_enroll_mean', 'age_enroll_sd',
'age_enroll_lo', 'age_enroll_hi', 'diagnosis_autism',
'diagnosis_pdd-nos', 'diagnosis_asperger', 'diagnosis_other']
cognition_data = cognition[cols_to_keep].merge(cognition_outcomes, left_index=True, right_on='obsid')
Export and peek at merged dataset
cognition_data.to_csv('data/cognition_merged.csv')
cognition_data.head()
refid | author | year | treatment | high_intensity_eibi | low_intensity_eibi | low_intensity_parent_training | defined_school_program | high_intensity_community | adjunctive_community_service | ... | cognition_outcome | tool | baseline_mean | baseline_sd | end_tx_mean | end_tx_sd | final_mean | final_sd | change_mean | change_sd | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
outcomeid | |||||||||||||||||||||
1 | 6276_240 | Kovshoff | 2011 | EIBI | 1 | 0 | 0 | 0 | 0 | 0 | ... | IQ Standard Score | Stanford-Binet and Bayley scale | 61.43 | 16.43 | 73.48 | 27.28 | 64.65 | 33.04 | NaN | NaN |
49 | 6276_240 | Kovshoff | 2011 | EIBI | 1 | 0 | 0 | 0 | 0 | 0 | ... | IQ Age Scale | Bayley | 22.04 | 6.89 | 33.70 | 10.16 | 44.39 | 16.39 | NaN | NaN |
2 | 6276_240 | Kovshoff | 2011 | Control | 0 | 0 | 0 | 0 | 0 | 1 | ... | IQ Standard Score | Stanford-Binet and Bayley scale | 63.83 | 13.98 | 61.00 | 27.30 | 61.94 | 31.09 | NaN | NaN |
50 | 6276_240 | Kovshoff | 2011 | Control | 0 | 0 | 0 | 0 | 0 | 1 | ... | IQ Age Scale | Bayley | 23.71 | 6.00 | 29.81 | 9.89 | 38.00 | 17.44 | NaN | NaN |
3 | 6493 | Strain | 2011 | LEAP | 0 | 0 | 0 | 1 | 0 | 0 | ... | IQ Standard Score | Mullen | 59.60 | 6.90 | 68.50 | 7.50 | NaN | NaN | 8.9 | NaN |
5 rows × 32 columns
Another network to display the number of studies that contain multiple outcome measures, which allows us to relate outcomes to one another.
outcome_types = cognition_outcomes.cognition_outcome.unique()
outcome_types
array(['IQ Standard Score', 'Language Age Scale', 'Language IQ', 'Non-verbal IQ', 'IQ Age Scale', 'Motor IQ'], dtype=object)
Calculate set of edges
outcome_network_edges = list(itertools.combinations(outcome_types, 2))
Initialize network
outcome_network = dict.fromkeys(outcome_network_edges, 0)
for i in outcome_types:
outcome_network[(i, i)] = 0
The set of comparisons in each study
outcome_comparisons = cognition_data.groupby('refid')['cognition_outcome'].unique()
outcome_comparisons
refid 1117 [Non-verbal IQ] 1264 [IQ Standard Score, IQ Age Scale] 546 [IQ Standard Score, Non-verbal IQ] 5652 [IQ Standard Score] 5680 [IQ Standard Score, Non-verbal IQ] 5715 [IQ Standard Score] 588 [IQ Standard Score, Non-verbal IQ] 6245_6549 [IQ Standard Score] 6276_240 [IQ Standard Score, IQ Age Scale] 6284 [IQ Standard Score] 6288 [Language Age Scale, Language IQ] 6304_8035 [Language IQ, Non-verbal IQ] 6322 [IQ Standard Score] 6427 [IQ Standard Score] 647 [IQ Standard Score, Non-verbal IQ] 6493 [IQ Standard Score] 6537_6592 [Non-verbal IQ, Motor IQ, Language IQ] 6682 [IQ Standard Score] 734 [IQ Standard Score, Non-verbal IQ] 9006 [IQ Age Scale, Non-verbal IQ, Motor IQ, Langua... 997 [Non-verbal IQ] Name: cognition_outcome, dtype: object
# Loop over all studies
for oc in outcome_comparisons:
# Generate pairs of interventions for this study
pairs = list(itertools.combinations(oc, 2))
# Loop over pairs within study, and increment comparison count
for pair in pairs:
try:
outcome_network[pair] += 1
except KeyError:
outcome_network[pair[::-1]] += 1
G2 = nx.Graph()
for n in outcome_network:
G2.add_edge(n[0], n[1], weight=outcome_network[n])
pos2=nx.spring_layout(G2)
fig, ax = plt.subplots(figsize=(12,8))
nx.draw_networkx(G2, pos=pos2)
plt.figure(figsize=(14,8))
nx.draw_networkx(G2, pos2, node_size=700, with_labels=False)
for i in range(max(outcome_network.values())):
# Generate edge with appropriate weight
edge_i = [(u,v) for (u,v,d) in G2.edges(data=True) if d['weight']==(i+1)]
nx.draw_networkx_edges(G2, pos2, edgelist=edge_i, width=(i+1)*3)
pos_shifted2 = {p: (pos2[p][0]+0.02, pos2[p][1]-0.05) for p in pos2}
nx.draw_networkx_labels(G2, pos_shifted2, font_size=10, font_family='sans-serif')
plt.gca().xaxis.set_major_locator(plt.NullLocator())
plt.gca().yaxis.set_major_locator(plt.NullLocator());
Histogram of outcome values across outcomes
plt.figure(figsize=(9,5))
plot_axes = cognition_data.groupby('cognition_outcome')['baseline_mean'].hist(bins=25, grid=False, alpha=0.4)
plt.legend(plot_axes.index)
<matplotlib.legend.Legend at 0x10a83f400>
cognition_data['baseline_mean'].hist(by=cognition_data['cognition_outcome'], sharey=True, sharex=False, bins=10)
plt.tight_layout();
From the above, it is clear that there is little information in the age scale outcomes and in motor IQ, so we will restrict attention to the other three outcome measures.
outcomes_MA = ['IQ Standard Score', 'Language IQ', 'Non-verbal IQ']
cognition_data_MA = cognition_data[cognition_data.cognition_outcome.isin(outcomes_MA)]
Drop Zachor 2007 since there is no post-treatement value for the included outcome
cognition_data_MA = cognition_data_MA[cognition_data_MA.refid!='5652']
Eliminate change_mean
values for 6493 since it has no corresponding variance
cognition_data_MA.change_mean[cognition_data_MA.refid=='6493'] = np.nan
Drop 546 because it has no variance for treatment mean
cognition_data_MA = cognition_data_MA[cognition_data_MA.refid!='546']
cognition_data_MA[intervention_types].sum(0)
high_intensity_eibi 23 low_intensity_eibi 4 low_intensity_parent_training 6 defined_school_program 15 high_intensity_community 3 adjunctive_community_service 6 eclectic_school 11 dtype: int64
Create indicators for study, intervention, outcome
unique_studies = cognition_data_MA.refid.unique()
cognition_data_MA['study_id'] = np.squeeze([np.where(r==unique_studies) for r in cognition_data_MA.refid])
cognition_data_MA['outcome_id'] = 0
for i,o in enumerate(outcomes_MA):
if i:
cognition_data_MA.outcome_id[cognition_data_MA.cognition_outcome==o] = i
/Users/fonnescj/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Resulting indicators for all outcomes
cognition_data_MA[['study_id', 'outcome_id']].head()
study_id | outcome_id | |
---|---|---|
outcomeid | ||
1 | 0 | 0 |
2 | 0 | 0 |
3 | 1 | 0 |
4 | 1 | 0 |
5 | 2 | 0 |
Take final outcomes where available, end-of-treatment otherwise:
cognition_data_MA['after_mean'] = cognition_data_MA.apply(lambda x:
(x.final_mean, x.end_tx_mean)[np.isnan(x.final_mean)], axis=1)
cognition_data_MA['after_sd'] = cognition_data_MA.apply(lambda x: (x.final_sd, x.end_tx_sd)[np.isnan(x.final_sd)], axis=1)
Calculate difference of before and after treatment, assuming Gaussian outcomes
change_missing = cognition_data_MA.change_mean.isnull()
cognition_data_MA.loc[change_missing, 'change_mean'] = (cognition_data_MA.after_mean[change_missing] -
cognition_data_MA.baseline_mean[change_missing])
cognition_data_MA.loc[change_missing, 'change_sd'] = np.sqrt(cognition_data_MA.after_sd[change_missing]**2
+ cognition_data_MA.baseline_sd[change_missing]**2)
# Export MA data for reference
cognition_data_MA.to_csv('data/cognition_data_MA.csv')
Calculate standard error of mean change
# Local variables for data
intervention_ind = cognition_data_MA[intervention_types]
(high_intensity_eibi, low_intensity_eibi,
low_intensity_parent_training, defined_school_program,
high_intensity_community, adjunctive_community_service,
eclectic_school) = intervention_ind.values.T.astype(int)
intervention_duration = cognition_data_MA.intervention_duration
n_analysis = cognition_data_MA.n_analysis.values
(age_enroll_mean, age_enroll_sd, age_enroll_lo, age_enroll_hi) = cognition_data_MA[
['age_enroll_mean','age_enroll_sd','age_enroll_lo','age_enroll_hi']].values.T.astype(float)
(diagnosis_autism, diagnosis_pdd_nos,
diagnosis_asperger, diagnosis_other) = cognition_data_MA[['diagnosis_autism',
'diagnosis_pdd-nos',
'diagnosis_asperger',
'diagnosis_other']]
(baseline_mean, baseline_sd, change_mean, change_sd) = cognition_data_MA[['baseline_mean',
'baseline_sd',
'change_mean',
'change_sd']].values.T.astype(float)
study_id = cognition_data_MA.study_id.values
outcome_id = cognition_data_MA.outcome_id.values
cognition_data_MA.groupby('cognition_outcome')['change_mean'].plot(kind='kde')
plt.xlabel('Mean difference in outcome');
plt.legend()
<matplotlib.legend.Legend at 0x109aedef0>
The three outcome measures are modeled as a multivariate normal random variable, to account for the likely correlation among the measures. Thus, we specify the mean vector and variance-covariance matrix with diffuse normal and Wishart priors, respectively.
μ = pm.Normal('μ', 0, 0.001, value=[0]*3)
σ = pm.HalfCauchy('σ', 0, 5, value=[3]*3)
ρ = pm.Uniform('ρ', -1, 1, value=[0]*3)
@pm.deterministic
def Τ(σ=σ, ρ=ρ):
var = σ**2
S = np.eye(3)*var
S[0,1] = S[1,0] = σ[0]*σ[1]*ρ[0]
S[0,2] = S[2,0] = σ[0]*σ[2]*ρ[1]
S[2,1] = S[1,2] = σ[1]*σ[2]*ρ[2]
return np.linalg.inv(S)
Thus, each study is a draw from a multivariate normal with the above parameters.
m = [pm.MvNormal('m_{}'.format(i), μ, Τ) for i in range(len(unique_studies))]
The parameters of interest are the indicators for each intervention category:
high_intensity_eibi
low_intensity_eibi
low_intensity_parent_training
defined_school_program
high_intensity_community
adjunctive_community_service
eclectic_school
Each study arm will have one or more of these indicators contributing to the response. We might like to have interactions among these parameters, but we likely do not have enough studies to fit all the additional parameters precisely.
β = pm.Normal('β', 0, 0.001, value=[0]*7)
In addition, since it appears there is a slight negative relationship between the treatment effect and the baseline score (see plot below), we model the mean baseline score for each treatment arm as a confounder.
plot_data = cognition_data_MA[['after_mean', 'baseline_mean']].dropna()
x = plot_data.baseline_mean.values
y = (plot_data.after_mean - plot_data.baseline_mean).values
fit = np.polyfit(x, y, 1)
fit_fn = np.poly1d(fit)
xvals = np.linspace(0, 100)
plt.plot(x, y, 'ro', xvals, fit_fn(xvals), '--k')
plt.xlabel('Baseline score'); plt.ylabel('Outcome difference');
α_base = pm.Normal('α_base', 0, 0.0001)
Putting this all together, the expected value for each result is the treatment effect mean for each outcome, along with the additive effects of treatment class and the baseline score effect.
@pm.deterministic
def θ(m=m, β=β, α=α_base):
mi = [m[i][j] for i,j in zip(study_id, outcome_id)]
return(mi + np.dot(intervention_ind, β)
+ α*(baseline_mean - baseline_mean.mean())/baseline_mean.std())
Finally, the likelihood is just a normal distribution, with the observed standard error of the treatment effect as the standard deviation of the estimates.
change_se = change_sd/np.sqrt(n_analysis)
d = pm.Normal('d', θ, change_se**-2, observed=True, value=change_mean)
In order to check our model, we simulate data from the posterior predictive distribution, to compare with observed values.
d_sim = pm.Normal('d_sim', θ, change_se**-2, value=change_mean)
Indicator the best treatment, to be used to calculate the probability of best treatment
best = pm.Lambda('best', lambda b=β: (b==b.max()).astype(int))
Run the model using Markov chain Monte Carlo.
M = pm.MCMC(locals())
# M.use_step_method(pm.AdaptiveMetropolis, M.m)
# M.use_step_method(pm.AdaptiveMetropolis, M.mu)
# M.use_step_method(pm.AdaptiveMetropolis, M.beta)
/Users/fonnescj/Repositories/pymc/pymc/Node.py:403: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future self.__name__ = input['__name__']
n_iterations, n_burn = 500000, 450000
M.sample(n_iterations, n_burn)
[-----------------100%-----------------] 200000 of 200000 complete in 1140.9 sec
M.sample(n_iterations, n_burn)
[-----------------100%-----------------] 200000 of 200000 complete in 1241.2 sec
plt.figure(figsize=(10,6))
pm.Matplot.summary_plot([M.β, M.α_base], custom_labels=[
'high intensity eibi',
'low intensity eibi',
'low intensity parent training',
'defined school program',
'high intensity community',
'adjunctive community service',
'eclectic school',
'baseline score'])
plt.figure(figsize=(10,6))
pm.Matplot.summary_plot([M.beta, M.alpha_base], custom_labels=[
'high intensity eibi',
'low intensity eibi',
'low intensity parent training',
'defined school program',
'high intensity community',
'adjunctive community service',
'eclectic school',
'baseline score'])
Could not calculate Gelman-Rubin statistics. Requires multiple chains of equal length.
plt.figure(figsize=(10,6))
pm.Matplot.summary_plot([M.beta, M.alpha_base], custom_labels=[
'high intensity eibi',
'low intensity eibi',
'low intensity parent training',
'defined school program',
'high intensity community',
'adjunctive community service',
'eclectic school',
'baseline score'])
Could not calculate Gelman-Rubin statistics. Requires multiple chains of equal length.
M.β.summary()
β: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 5.958 2.183 0.18 [ 2.053 10.541] 2.05 2.967 0.247 [-4.518 7.394] -0.655 2.266 0.179 [-4.998 3.674] 2.697 1.912 0.147 [-0.752 6.636] -3.506 4.536 0.408 [-11.78 6.385] -0.54 2.647 0.214 [-5.41 4.785] -5.635 2.187 0.179 [-9.755 -1.405] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 1.821 4.5 5.851 7.474 10.342 -4.515 0.176 2.317 4.016 7.448 -5.156 -2.17 -0.671 0.921 3.618 -0.727 1.398 2.572 3.893 6.673 -12.745 -6.515 -3.48 -0.53 5.726 -5.718 -2.362 -0.419 1.188 4.598 -10.176 -7.118 -5.519 -4.038 -1.711
/Users/fonnescj/anaconda3/lib/python3.5/site-packages/numpy/core/fromnumeric.py:225: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future return reshape(newshape, order=order)
Calculate posterior probability of best intervention.
M.best.summary()
best: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.779 0.415 0.026 [ 0. 1.] 0.104 0.305 0.018 [ 0. 1.] 0.0 0.017 0.0 [ 0. 0.] 0.108 0.31 0.022 [ 0. 1.] 0.006 0.078 0.003 [ 0. 0.] 0.004 0.06 0.002 [ 0. 0.] 0.0 0.0 0.0 [ 0. 0.] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.0 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
/Users/fonnescj/anaconda3/lib/python3.5/site-packages/numpy/core/fromnumeric.py:225: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future return reshape(newshape, order=order)
pm.Matplot.plot(M.β)
Plotting β_0 Plotting β_1 Plotting β_2 Plotting β_3 Plotting β_4 Plotting β_5 Plotting β_6
pm.Matplot.summary_plot([M.μ], custom_labels=outcomes_MA)
These posterior predictive plots illustrate the fit of the model. Each frame represents a study outcome, where the red line is the value of the observed mean outcome. The blue histograms are draws from the posterior predictive distribution of the model, so we look for cases where the red line is far outside the support of the simulated data as evidence for lack of fit. Here, the fit is reasonable across all study outcomes.
pm.Matplot.gof_plot(M.d_sim.trace(), change_mean, verbose=0)
/Users/fonnescj/anaconda3/lib/python3.5/site-packages/matplotlib/pyplot.py:516: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). max_open_warning, RuntimeWarning)
def post_pred_checks(simdata, trueval, round=3):
"""
Performs posterior predictive checks, returning the quantile of the observed data relative
to simulated.
"""
if isinstance(simdata, pm.Variable):
simdata = simdata.trace()
if np.ndim(trueval) == 1 and np.ndim(simdata == 2):
# Iterate over more than one set of data
return [post_pred_checks(simdata[:,i], trueval[i]) for i in range(len(trueval))]
return (simdata > trueval).mean()
ppp = post_pred_checks(M.d_sim.trace(), change_mean)
_ = plt.hist(ppp, bins=8)