#!/usr/bin/env python # coding: utf-8 # # Pediatrics Paper Analysis # > All studies with successful outcomes reported for early-identified children who are deaf or hard of hearing have intervention provided by specialists who are trained in parent-infant intervention services.[12,90,97] Early intervention programs should develop mechanisms to ensure that early intervention professionals have special skills necessary for providing families with the highest quality of service specific to children with hearing loss. Professionals with a background in deaf education, audiology, and speech-language pathology will typically have the skills needed for providing intervention services. Professionals should be highly qualified in their respective fields and should be skilled communicators who are knowledgeable and sensitive to the importance of enhancing families' strengths and supporting their priorities. When early intervention professionals have knowledge of the principles of adult learning, it increases their success with parents and other professionals. # # TJC must be competent to provide services. # # **References**: # 1. Yoshinaga-Itano C, Sedey AL, Coulter DK, Mehl AL. Language of early- and later-identified children with hearing loss. Pediatrics.1998;102 :1161– 1171 # 2. Moeller MP. Early intervention and language development in children who are deaf and hard of hearing. Pediatrics.2000;106(3) :e43 . Available at: www.pediatrics.org/cgi/content/full/106/3/e43 # 3. Calderon R. Parental involvement in deaf children's education programs as a predictor of child's language, early reading, and social-emotional development. J Deaf Stud Deaf Educ.2000;5 :140– 155 # # # Purpose of the study is to examine the relationship between highly qualified providers, age of enrollment in intervention, and speech and language outcomes at 4 years of age in a group of children enrolled in school programs that specialize in the development of listening and spoken language. # # - Does age of enrollment in intervention impact articulation, vocabulary, and language scores? (age_int) # - Does place of enrollment in intervention impact articulation, vocabulary, and language scores? (age and age_int) # - Is there an interaction effect between the place and age? # - What other factors contribute to the variance? # - Parental involvement # - Degree of hearing loss # - Mother’s education level (mother_ed) # - Prematurity (premature_weeks) # - Gender (gender) # - Language (prim_lang) # - Number of children in the home (sib) # - Age when first amplified (age_amp) # - Additional disability (secondary_diagnosis, known_synd) # In[1]: # Import modules and set options get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import pandas as pd import numpy as np pd.set_option('display.notebook_repr_html', False) pd.set_option('display.max_columns', 20) pd.set_option('display.max_rows', 100) # Import data # In[2]: lsl_dr = pd.read_csv('lsl_dr.csv', index_col=0) # In[3]: lsl_dr.head() # Indicator for non-profound hearing loss # In[4]: lsl_dr['deg_hl_below6'] = lsl_dr.degree_hl<6 lsl_dr.loc[lsl_dr.degree_hl.isnull(), 'deg_hl_below6'] = np.nan # Indicator for first intervention outside OPTION # In[5]: lsl_dr['int_outside_option'] = lsl_dr.age > lsl_dr.age_int lsl_dr.loc[lsl_dr.age < lsl_dr.age_int, 'int_outside_option'] = np.nan # Indicator for high school graduation of mother # In[6]: lsl_dr['mother_hs'] = lsl_dr.mother_ed > 1 lsl_dr.loc[lsl_dr.mother_ed.isnull(), 'mother_hs'] = None # Joint commission benchmark # In[7]: lsl_dr.onset_1.unique() # In[8]: lsl_dr['ident_by_3'] = ident_by_3 = lsl_dr.onset_1 <= 3 lsl_dr['program_by_6'] = program_by_6 = lsl_dr.age <= 6 lsl_dr['jcih'] = ident_by_3 & program_by_6 lsl_dr.loc[lsl_dr.onset_1.isnull() | lsl_dr.age.isnull(), 'jcih'] = None # Create age in years variable # In[9]: lsl_dr['age_years'] = lsl_dr.age/12. # Retain only 4-year-olds # In[10]: lsl_dr = lsl_dr[(lsl_dr.age_test>=48) & (lsl_dr.age_test<60)] # In[11]: lsl_dr.shape # In[12]: lsl_dr.study_id.unique().shape # ## Summary Statistics for Paper # In[13]: unique_students = lsl_dr.drop_duplicates(subset='study_id') # In[14]: unique_students.shape # In[15]: unique_students.age.describe() # In[16]: unique_students.male.mean() # In[17]: unique_students.sib.describe() # In[18]: (unique_students.degree_hl == 6).describe() # In[19]: ((unique_students.degree_hl < 6) & (unique_students.degree_hl > 1)).describe() # In[20]: unique_students.mother_hs.describe() # In[21]: unique_students.synd_or_disab.describe() # In[22]: grouped_age = unique_students.replace({'jcih':{1:'JCIH', 0:'non-JCIH'}}).groupby('jcih')['age_years'] fig, ax = plt.subplots() for k, v in grouped_age: v.hist(label=k, alpha=.75, ax=ax, grid=False) ax.legend() plt.xlabel('Age (years)'); plt.ylabel('Frequency'); # ## Receptive Language Test Score Model # In[23]: # Filter domain and English-only, and appropriate bilateral subset receptive_language_dataset = lsl_dr[(lsl_dr.domain=='Language') & (lsl_dr.test_type=='receptive') & (lsl_dr.non_english==False)] receptive_language_dataset.head() # This is the size of the resulting dataset to be used in this analysis # In[24]: receptive_language_dataset.shape # In[25]: mean_score = receptive_language_dataset.groupby('study_id')['score'].mean() # In[26]: receptive_language_dataset = receptive_language_dataset.drop_duplicates('study_id') # In[27]: receptive_language_dataset = receptive_language_dataset.set_index('study_id') # In[28]: receptive_language_dataset['mean_score'] = mean_score # Specify criterion to use: `jcih`, `ident_by_3`, `program_by_6` # In[31]: criterion_labels = {'jcih': 'JCIH', 'ident_by_3': 'Diagnosed by 3 months', 'program_by_6': 'Intervention by 6 months'} criterion = 'ident_by_3' # Proportion of missing values for each variable # In[32]: covs = ['age_years', 'school', 'score', 'male', 'family_inv', 'sib', 'deg_hl_below6', 'mother_hs', 'synd_or_disab'] + [criterion] (receptive_language_dataset[covs].isnull().sum() / receptive_language_dataset.shape[0]).round(2) # Drop individuals with missing `age`, since there is <1% of them # In[33]: receptive_language_dataset = receptive_language_dataset[receptive_language_dataset.age.notnull()] # In[34]: receptive_language_dataset.score.hist(grid=False, bins=25, figsize=(10, 6)) plt.xlabel('Standard Scores'); plt.ylabel('Frequency'); # In[35]: receptive_language_dataset.mother_ed.hist() # Final analysis dataset size # In[36]: receptive_language_dataset.shape # In[37]: missing_pct = (receptive_language_dataset[covs].isnull().mean()*100).round(1) missing_pct.sort(ascending=False) missing_pct # In[38]: receptive_language_dataset['age_amp_missing'] = receptive_language_dataset.age_amp.isnull() by_age_amp_missing = receptive_language_dataset.groupby('age_amp_missing') # In[39]: by_age_amp_missing['male', 'synd_or_disab', 'deg_hl_below6', 'mother_hs', 'non_english'].mean() # In[40]: for col in ['age', 'score', 'degree_hl', 'family_inv', 'age_int']: try: _ = by_age_amp_missing.boxplot(column=col) except: pass # In[41]: receptive_language_dataset['jcih_missing'] = receptive_language_dataset.jcih.isnull() by_jcih_missing = receptive_language_dataset.groupby('jcih_missing') # In[42]: by_jcih_missing['male', 'synd_or_disab', 'deg_hl_below6', 'mother_hs', 'non_english'].mean() # In[43]: for col in ['age', 'score', 'degree_hl', 'family_inv', 'age_int']: _ = by_jcih_missing.boxplot(column=col) # In[44]: receptive_language_dataset.age_years.mean() # In[45]: from pymc import Bernoulli, Normal, Uniform, invlogit, logit, deterministic, MCMC, Matplot, MAP from pymc import Lambda, Dirichlet, Categorical, Beta, NegativeBinomial, Exponential, T from numpy.ma import masked_values def generate_model(dataset): # Extract data to local variables age_years = dataset.age_years.values school = dataset.school.values score = dataset.score.values non_english = dataset.non_english.astype(int).values male = dataset.male.values sib = dataset.sib.values synd_or_disab = dataset.synd_or_disab.values int_outside_option = dataset.int_outside_option.values synd_or_disab = dataset.synd_or_disab.values jcih = dataset.jcih.values study_id = dataset.index.values # Transform some data age_std = age_years - age_years.mean() # Imputation of family involvement n_family_inv_cats = len(dataset.family_inv.unique()) p_family_inv = Dirichlet("p_family_inv", np.ones(n_family_inv_cats), value=[1./n_family_inv_cats]*(n_family_inv_cats-1)) family_inv_masked = masked_values(dataset.family_inv.fillna(0.5), value=0.5) x_family_inv = Categorical('x_family_inv', p_family_inv, value=family_inv_masked, observed=True) # Imputation of hearing loss p_crit = Beta("p_%s" % criterion, 1, 1, value=0.5) crit_masked = masked_values(dataset[criterion].values, value=np.nan) x_crit = Bernoulli('x_%s' % criterion, p_crit, value=crit_masked, observed=True) # Imputation of hearing loss p_hl = Beta("p_hl", 1, 1, value=0.5) hl_masked = masked_values(dataset.deg_hl_below6.values, value=np.nan) x_hl = Bernoulli('x_hl', p_hl, value=hl_masked, observed=True) # Imputation of maternal education p_mother_hs = Beta("p_mother_hs", 1, 1, value=0.5) mother_hs_masked = masked_values(dataset.mother_hs.values, value=np.nan) x_mother_hs = Bernoulli('x_mother_hs', p_mother_hs, value=mother_hs_masked, observed=True) # Imputation of previous disability p_synd_or_disab = Beta("p_synd_or_disab", 1, 1, value=0.5) synd_or_disab_masked = masked_values(dataset.synd_or_disab.values, value=np.nan) x_synd_or_disab = Bernoulli('x_synd_or_disab', p_synd_or_disab, value=synd_or_disab_masked, observed=True) # Imputation of siblings n_sib_cats = len(dataset.sib.unique()) p_sib = Dirichlet("p_sib", np.ones(n_sib_cats), value=[1./n_sib_cats]*(n_sib_cats-1)) sib_masked = masked_values(dataset.sib.fillna(0.5), value=0.5) x_sib = Categorical('x_sib', p_sib, value=sib_masked, observed=True) mean_score = Normal("mean_score", 100, 0.001, value=80) # Indices to school random effects unique_schools = np.unique(school) school_index = [list(unique_schools).index(s) for s in school] # School random effect sigma_school = Uniform("sigma_school", 0, 1000, value=10) tau_school = sigma_school**-2 alpha_school = Normal("alpha_school", mu=0, tau=tau_school, value=np.zeros(len(unique_schools))) # Indices to child random effects unique_child = np.unique(study_id) child_index = [list(unique_child).index(s) for s in study_id] # Robust individual random effect nu_child = Exponential("nu_child", 0.1, value=10) alpha_child = T("alpha_child", nu=nu_child, value=np.zeros(len(unique_child))) @deterministic def intercept(a0=mean_score, a1=alpha_school): """Random intercepts""" return a0 + a1[school_index] # Fixed effects beta = Normal("beta", 0, 0.001, value=[0]*8) @deterministic def theta(b0=intercept, b=beta, x_family_inv=x_family_inv, x_sib=x_sib, x_hl=x_hl, x_mother_hs=x_mother_hs, x_synd_or_disab=x_synd_or_disab, x_crit=x_crit, a=alpha_child): # Complete predictors X = [age_std, male] # Partially-imputed predictors X += [x_family_inv, x_sib, x_hl, x_mother_hs, x_synd_or_disab, x_crit] return b0 + np.dot(b, X) + a[child_index] sigma = Uniform("sigma", 0, 1000, value=10) tau = sigma**-2 # Data likelihood score_like = Normal("score_like", mu=theta, tau=tau, value=score, observed=True) #score_like_pred = Normal("score_like_pred", mu=theta, tau=tau, value=score) # Predictions: # Girl, average age, normal family involvement, 2 siblings, non-profound hearing loss, # mother with HS, no previous disability, JCIH x1 = [0, 0, 0, 2, 1, 1, 0, 1] # Boy, 6 months younger than average, normal family involvement, no siblings, profound hearing loss, # mother without HS diploma, previous disability, JCIH x2 = [-0.5, 1, 0, 0, 0, 0, 1, 1] # Boy, 1.5 years older than average, poor family involvement, 1 sibling, non-profound hearing loss, # mother without HS diploma, previous disability, no JCIH x3 = [1.5, 1, 2, 1, 1, 0, 1, 0] # Girl, 6 months older than average, impaired family involvement, 3 siblings, profound hearing loss, # mother with HS diploma, no previous disability, JCIH x4 = [0.5, 1, 1, 3, 0, 1, 0, 1] X = np.c_[x1, x2, x3, x4] theta_pred = Lambda('theta_pred', lambda b=beta, mu=mean_score: mu + np.dot(X.T, b)) predictions = Normal('predictions', mu=theta_pred, tau=tau) return locals() # In[46]: M_reclang = MCMC(generate_model(receptive_language_dataset), db='sqlite', name='reclang', dbmode='w') # In[47]: iterations = 100000 burn = 90000 # In[48]: M_reclang.sample(iterations, burn) # In[49]: labels = ['Age of Enrollment', 'Male', 'Reduced Family Involvement', 'Sibling Count', 'Non-profound Hearing Loss', 'Mother with HS Diploma','Additional Disability', criterion_labels[criterion]] # In[50]: Matplot.summary_plot([M_reclang.beta], rhat=False, custom_labels=labels, x_range=(-10,10), main='Receptive Language') # In[51]: Matplot.summary_plot([M_reclang.beta], rhat=False, custom_labels=labels, x_range=(-10,10), main='Receptive Language') # In[52]: M_reclang.beta.summary() # In[53]: Matplot.summary_plot([M_reclang.predictions], rhat=False, custom_labels=['Student {}'.format(i) for i in range(1,5)]) # ## Expressive Language Model # In[54]: expressive_language_dataset = lsl_dr[(lsl_dr.domain=='Language') & (lsl_dr.test_type=='expressive') & (lsl_dr.non_english==False)] expressive_language_dataset.head() # In[55]: mean_score = expressive_language_dataset.groupby('study_id')['score'].mean() expressive_language_dataset = expressive_language_dataset.drop_duplicates('study_id').set_index('study_id') expressive_language_dataset['mean_score'] = mean_score # In[56]: (expressive_language_dataset[covs].isnull().sum() / expressive_language_dataset.shape[0]).round(2) # In[57]: expressive_language_dataset = expressive_language_dataset[expressive_language_dataset.age.notnull() & expressive_language_dataset.int_outside_option.notnull()] # In[58]: expressive_language_dataset.score.hist(grid=False, bins=25, figsize=(10, 6)) plt.xlabel('Standard Scores'); plt.ylabel('Frequency'); # In[59]: M_explang = MCMC(generate_model(expressive_language_dataset), db='sqlite', name='explang', dbmode='w') # In[60]: M_explang.sample(iterations, burn) # In[61]: M_explang.beta.summary() # In[62]: Matplot.summary_plot([M_explang.beta], custom_labels=labels, rhat=False, main='Expressive Language', x_range=(-10,10)) # ### Making predictions # # **Student 1** Girl, 1 year older than average, normal family involvement, 2 siblings, non-profound hearing loss, # mother with HS, no previous disability, JCIH # # **Student 2** Boy, 6 months younger than average, normal family involvement, no siblings, profound hearing loss, # mother without HS diploma, previous disability, JCIH # # **Student 3** Boy, 1.5 years older than average, poor family involvement, 1 sibling, non-profound hearing loss, mother without HS diploma, previous disability, no JCIH # # **Student 4** Girl, 6 months older than average, impaired family involvement, 3 siblings, profound hearing loss, mother with HS diploma, no previous disability, JCIH # In[63]: M_reclang.predictions.summary() # In[64]: Matplot.summary_plot([M_reclang.predictions], rhat=False, custom_labels=['Student {}'.format(i) for i in range(1,5)]) # In[65]: M_explang.predictions.summary() # In[66]: Matplot.summary_plot([M_explang.predictions], rhat=False, custom_labels=['Student {}'.format(i) for i in range(1,5)]) # ## Articulation Model # In[67]: articulation_dataset = lsl_dr[(lsl_dr.domain=='Articulation') & (lsl_dr.non_english==False)] articulation_dataset.head() # In[68]: mean_score = articulation_dataset.groupby('study_id')['score'].mean() articulation_dataset = articulation_dataset.drop_duplicates('study_id').set_index('study_id') articulation_dataset['mean_score'] = mean_score # In[69]: (articulation_dataset[covs].isnull().sum() / articulation_dataset.shape[0]).round(2) # In[70]: articulation_dataset = articulation_dataset[articulation_dataset.age.notnull() & articulation_dataset.int_outside_option.notnull()] # In[71]: articulation_dataset.score.hist(grid=False, bins=25, figsize=(10, 6)) plt.xlabel('Standard Scores'); plt.ylabel('Frequency'); # In[72]: M_articulation = MCMC(generate_model(articulation_dataset), db='sqlite', name='articulation', dbmode='w') # In[73]: M_articulation.sample(iterations, burn) # In[74]: Matplot.summary_plot([M_articulation.beta], custom_labels=labels, rhat=False, main='Articulation', x_range=(-10,10)) # In[75]: Matplot.summary_plot([M_articulation.predictions], rhat=False, custom_labels=['Student {}'.format(i) for i in range(1,5)]) # In[76]: M_articulation.beta.summary() # ## Expressive Vocabulary Model # In[77]: expressive_vocab_dataset = lsl_dr[(lsl_dr.domain=='Expressive Vocabulary') & (lsl_dr.non_english==False)] expressive_vocab_dataset.head() # In[78]: mean_score = expressive_vocab_dataset.groupby('study_id')['score'].mean() expressive_vocab_dataset = expressive_vocab_dataset.drop_duplicates('study_id').set_index('study_id') expressive_vocab_dataset['mean_score'] = mean_score # In[79]: expressive_vocab_dataset[covs].isnull().sum() # In[80]: expressive_vocab_dataset = expressive_vocab_dataset[expressive_vocab_dataset.age.notnull() & expressive_vocab_dataset.male.notnull() & expressive_vocab_dataset.int_outside_option.notnull()] # In[81]: expressive_vocab_dataset.score.hist(grid=False, bins=25, figsize=(10, 6)) plt.xlabel('Standard Scores'); plt.ylabel('Frequency'); # In[82]: M_expressive_vocab = MCMC(generate_model(expressive_vocab_dataset), db='sqlite', name='expressive_vocab', dbmode='w') # In[83]: M_expressive_vocab.sample(iterations, burn) # In[84]: Matplot.summary_plot([M_expressive_vocab.beta], custom_labels=labels, rhat=False, main='Expressive Vocabulary', x_range=(-10,10)) # In[85]: M_expressive_vocab.beta.summary() # In[86]: Matplot.summary_plot(M_explang.alpha_school, custom_labels=[' ']*42, main='Expressive language school effects', x_range=(-25, 25)) # In[87]: Matplot.summary_plot(M_expressive_vocab.alpha_school, custom_labels=[' ']*42, main='Expressive vocabulary school effects', x_range=(-25, 25)) # ## Receptive Vocabulary Model # In[88]: receptive_vocab_dataset = lsl_dr[(lsl_dr.domain=='Receptive Vocabulary') & (lsl_dr.non_english==False)] receptive_vocab_dataset.head() # In[89]: mean_score = receptive_vocab_dataset.groupby('study_id')['score'].mean() receptive_vocab_dataset = receptive_vocab_dataset.drop_duplicates('study_id').set_index('study_id') receptive_vocab_dataset['mean_score'] = mean_score # In[90]: receptive_vocab_dataset[covs].isnull().sum() # In[91]: receptive_vocab_dataset = receptive_vocab_dataset[receptive_vocab_dataset.age.notnull() & receptive_vocab_dataset.male.notnull() & receptive_vocab_dataset.int_outside_option.notnull()] # In[92]: receptive_vocab_dataset.age_int.hist(bins=40) # In[93]: receptive_vocab_dataset.shape # In[94]: receptive_vocab_dataset.score.hist(grid=False, bins=25, figsize=(10, 6)) plt.xlabel('Standard Scores'); plt.ylabel('Frequency'); # In[95]: M_receptive_vocab = MCMC(generate_model(receptive_vocab_dataset), db='sqlite', name='receptive_vocab', dbmode='w') # In[96]: M_receptive_vocab.sample(iterations, burn) # In[97]: #order = np.argsort(M.beta.stats()['mean'])[::-1] Matplot.summary_plot([M_receptive_vocab.beta], rhat=False, custom_labels=labels, main='Receptive Vocabulary', x_range=(-10,10)) # In[98]: M_receptive_vocab.beta.summary() # ## Database queries # In[99]: import sqlite3 # In[100]: db_files = get_ipython().getoutput('ls *sqlite') # In[101]: articulation = sqlite3.connect("articulation.sqlite") # In[102]: c_artic = articulation.cursor() # In[103]: c_artic.execute('SELECT v1 FROM sigma_school WHERE trace==0') # In[104]: sigma_school_artic = np.ravel(c_artic.fetchall()) # In[105]: sigma_school_artic.mean() # In[106]: for dbname in db_files: db = sqlite3.connect(dbname) cur = db.cursor() cur.execute('SELECT v1 FROM sigma_school WHERE trace==0') trace = np.ravel(cur.fetchall()) trace.sort() print('\n'+dbname) print(np.median(trace)) print(trace[[int(len(trace)*0.025), int(len(trace)*0.975)]]) # In[107]: expressive_vocab_data = lsl_dr[lsl_dr.domain=='Expressive Vocabulary'] # In[108]: expressive_vocab_data.score.hist()