%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style='ticks')
DATA_PATH = '../data/NCS/'
teaching_feeding = pd.read_csv(DATA_PATH + 'ncs_teaching_feeding_v1.csv',
na_values=['M'])
teaching_feeding.head()
CHILD_PIDX | CHILD_AGE | VISIT | BREAST_FEED | BREAST_FEED_NOW | BREAST_MILK | BREAST_STOP | FORMULA | FORMULA_AGE | |
---|---|---|---|---|---|---|---|---|---|
0 | a00058528 | 6.0 | 6 | 2.0 | NaN | NaN | NaN | 21.0 | 1.0 |
1 | a00058528 | 12.0 | 12 | NaN | NaN | 0.0 | NaN | 7.0 | 5.0 |
2 | a00103956 | 5.0 | 6 | 1.0 | 2.0 | NaN | NaN | 42.0 | 1.0 |
3 | a00104038 | 6.0 | 6 | 2.0 | NaN | NaN | NaN | 35.0 | 1.0 |
4 | a00104038 | 12.0 | 12 | NaN | NaN | 0.0 | NaN | 0.0 | 5.0 |
teaching_anthro = pd.read_csv(DATA_PATH + 'ncs_teaching_anthro_v1.csv',
na_values=['M'])
teaching_anthro.tail()
CHILD_PIDX | CHILD_ADJ_AGE | VISIT | WEIGHT | LENGTH | HEIGHT | BMI | UPPER_ARM_LENGTH | UPPER_ARM_CIRC | WAIST_CIRC | HEAD_CIRC | |
---|---|---|---|---|---|---|---|---|---|---|---|
2790 | a99545568 | 23 | 24 | 11.0 | 89.9 | NaN | NaN | 12.0 | 15.1 | 46.1 | 47.0 |
2791 | a99549215 | 30 | 24 | 12.3 | 87.0 | NaN | 16.2 | 18.5 | NaN | NaN | NaN |
2792 | a99549215 | 42 | 36 | 14.1 | NaN | 92.2 | 16.6 | 19.5 | 16.0 | 49.4 | 49.5 |
2793 | a99623267 | 41 | 36 | 16.6 | NaN | NaN | NaN | 21.0 | 18.1 | 52.8 | 50.4 |
2794 | a99772418 | 12 | 12 | 9.5 | 73.5 | NaN | NaN | 13.0 | 15.0 | 51.0 | 47.0 |
teaching_child = pd.read_csv(DATA_PATH + 'ncs_teaching_child_v1_1.csv',
index_col=0, na_values=['M'])
assert teaching_child.index.is_unique
teaching_child.tail()
MOM_PIDX | PSU_IDX | RECRUITTYPE | CHILD_SEX | CHILD_DOB_YEAR | CHILD_DOB_QTR | CHILD_RACE | CHILD_ETHNICITY | GESTATIONAL_AGE | BABY_WEIGHT | ... | POVERTY_PERC | TENURE_PERC | LING_ISO_PERC | WNH_PERC | AFAMNH_PERC | ASNH_PERC | HISP_PERC | OTHERNH_PERC | LESSTHANHS_PERC | HSPLUS_PERC | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CHILD_PIDX | |||||||||||||||||||||
a99888867 | b91579332 | 22 | 6 | 1.0 | 2013.0 | 4.0 | NaN | NaN | 4.0 | NaN | ... | 3.0 | 7.0 | 6.0 | 6.0 | 5.0 | 7.0 | 7.0 | 5.0 | 8.0 | 3.0 |
a99913745 | b82779567 | 28 | 2 | 2.0 | 2011.0 | 3.0 | 2.0 | 2.0 | 4.0 | 4.0 | ... | 9.0 | 3.0 | 4.0 | 2.0 | 10.0 | 1.0 | 1.0 | 1.0 | 8.0 | 3.0 |
a99943223 | b72295698 | 2 | 6 | 2.0 | 2013.0 | 4.0 | 3.0 | 2.0 | 4.0 | 5.0 | ... | 8.0 | 5.0 | 2.0 | 5.0 | 8.0 | 9.0 | 5.0 | 5.0 | 7.0 | 4.0 |
d61924669 | c85727084 | 15 | 5 | 2.0 | 2010.0 | 2.0 | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
d92887756 | b58983750 | 15 | 5 | 1.0 | 2010.0 | 2.0 | NaN | NaN | NaN | 5.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 37 columns
teaching_childhealth = pd.read_csv(DATA_PATH + 'ncs_teaching_childhealth_v1.csv',
na_values=['M'])
teaching_childhealth.head()
CHILD_PIDX | CHILD_AGE | VISIT | VISIT_WT | CHILD_HEALTH | GASTRO | DIARRHEA | EAR_INFECTION | EAR_INFECTION_FREQ | RESPIRATORY | FEVER | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | a00058528 | 6.7 | 6 | 18.0 | 1.0 | 2.0 | NaN | 2.0 | NaN | 2.0 | 0.0 |
1 | a00058528 | 12.2 | 12 | 20.0 | 1.0 | 1.0 | NaN | 2.0 | NaN | 2.0 | 0.0 |
2 | a00058528 | 17.4 | 18 | 21.0 | 1.0 | NaN | NaN | NaN | NaN | 2.0 | 2.0 |
3 | a00103956 | 5.9 | 6 | 14.0 | 1.0 | 2.0 | NaN | 2.0 | NaN | 2.0 | 0.0 |
4 | a00103956 | 18.6 | 18 | 24.0 | 1.0 | NaN | 2.0 | NaN | 2.0 | 2.0 | 0.0 |
teaching_mompreghealth = pd.read_csv(DATA_PATH + 'ncs_teaching_mompreghealth_v1.csv',
index_col=0, na_values=['M'])
assert teaching_mompreghealth.index.is_unique
teaching_mompreghealth.head()
HEALTH | BMI | BMI_CAT | THYROID | HIGHBP_NOTPREG | ASTHMA | DIABETES | HIGHBP_PREG | PREECLAMPSIA | EARLY_LABOR | ANEMIA | KIDNEY | NAUSEA | RH_DISEASE | URINE | VAGINOSIS | GROUP_B | CIG_NOW | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOM_PIDX | ||||||||||||||||||
b00014490 | 1.0 | 22.0 | 2.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
b00028364 | 1.0 | 22.9 | 2.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
b00048093 | 2.0 | 24.8 | 2.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
b00060642 | 2.0 | 37.8 | 5.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
b00096696 | 1.0 | 28.0 | 3.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
teaching_sleep = pd.read_csv(DATA_PATH + 'ncs_teaching_sleep_v1.csv',
na_values=['M'])
teaching_sleep.head()
CHILD_PIDX | CHILD_AGE | VISIT | SLEEP_HRS_NIGHT | SLEEP_DIFFICULT | SLEEP_THROUGH | |
---|---|---|---|---|---|---|
0 | a00058528 | 2.0 | 3 | 10.0 | 4.0 | NaN |
1 | a00058528 | 6.0 | 6 | 9.0 | 4.0 | 2.0 |
2 | a00058528 | 17.0 | 18 | 10.0 | 3.0 | 2.0 |
3 | a00058528 | 33.0 | 30 | 10.0 | 4.0 | 2.0 |
4 | a00103956 | 2.0 | 3 | 6.0 | 5.0 | NaN |
pd.plotting.scatter_matrix(teaching_anthro, figsize=(14, 14));
pd.plotting.scatter_matrix(teaching_sleep, figsize=(14, 14), alpha=0.1);
plot_cols = ['CHILD_RACE','GESTATIONAL_AGE',
'BABY_WEIGHT', 'MULTIPLE', 'SIBLINGS', 'WITHDREW',
'MOM_RACE', 'MOM_ETHNICITY', 'MOM_MARISTAT', 'MOM_EDUCATION',
'MOM_INSURANCE', 'SURVEY_LANG', 'HOUSEHOLD_INCOME']
ax = pd.plotting.parallel_coordinates(teaching_child[plot_cols].dropna(subset=['CHILD_RACE']),
'CHILD_RACE')
plt.xticks(rotation=90);
(array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), <a list of 12 Text xticklabel objects>)