#!/usr/bin/env python # coding: utf-8 # In[39]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt sns.set(style='ticks') # ## Data import # In[2]: DATA_PATH = '../data/NCS/' # In[13]: teaching_feeding = pd.read_csv(DATA_PATH + 'ncs_teaching_feeding_v1.csv', na_values=['M']) teaching_feeding.head() # In[24]: teaching_anthro = pd.read_csv(DATA_PATH + 'ncs_teaching_anthro_v1.csv', na_values=['M']) teaching_anthro.tail() # In[25]: teaching_child = pd.read_csv(DATA_PATH + 'ncs_teaching_child_v1_1.csv', index_col=0, na_values=['M']) assert teaching_child.index.is_unique teaching_child.tail() # In[16]: teaching_childhealth = pd.read_csv(DATA_PATH + 'ncs_teaching_childhealth_v1.csv', na_values=['M']) teaching_childhealth.head() # In[17]: teaching_mompreghealth = pd.read_csv(DATA_PATH + 'ncs_teaching_mompreghealth_v1.csv', index_col=0, na_values=['M']) assert teaching_mompreghealth.index.is_unique teaching_mompreghealth.head() # In[18]: teaching_sleep = pd.read_csv(DATA_PATH + 'ncs_teaching_sleep_v1.csv', na_values=['M']) teaching_sleep.head() # ## Data Visualizations # In[20]: pd.plotting.scatter_matrix(teaching_anthro, figsize=(14, 14)); # In[45]: pd.plotting.scatter_matrix(teaching_sleep, figsize=(14, 14), alpha=0.1); # In[41]: plot_cols = ['CHILD_RACE','GESTATIONAL_AGE', 'BABY_WEIGHT', 'MULTIPLE', 'SIBLINGS', 'WITHDREW', 'MOM_RACE', 'MOM_ETHNICITY', 'MOM_MARISTAT', 'MOM_EDUCATION', 'MOM_INSURANCE', 'SURVEY_LANG', 'HOUSEHOLD_INCOME'] # In[42]: ax = pd.plotting.parallel_coordinates(teaching_child[plot_cols].dropna(subset=['CHILD_RACE']), 'CHILD_RACE') plt.xticks(rotation=90);