#!/usr/bin/env python
# coding: utf-8

# # Introduction

# This notebook is a simple demonstration of how you may be able to predict the phase of the gait cycle from the states from a simple linear regression.
# 
# **warning** This notebook doesn't seem to run with Pandas < 0.14.0. I get an error calling `results.rsquared`.

# # Imports and Setup

# In[1]:


import sys
sys.path.append('../src')


# In[2]:


import matplotlib.pyplot as plt
from pandas import concat
import statsmodels.formula.api as smf
from gaitanalysis.gait import plot_gait_cycles


# In[3]:


import utils
from gait_landmark_settings import settings


# In[4]:


get_ipython().run_line_magic('matplotlib', 'inline')


# In[5]:


from IPython.core.pylabtools import figsize
figsize(14, 10)


# # Load First Data Set

# Load the path to the directory with the experimental data.

# In[6]:


trials_dir = utils.config_paths()['raw_data_dir']


# This is the "training" data set. We simple collect all of the gait cycles from the perturbed portion of one trial from subject "A".

# In[7]:


trial_number = '068'
trial = utils.Trial('068')
trial.prep_data('Longitudinal Perturbation')
gait_cycles = trial.gait_data_objs['Longitudinal Perturbation'].gait_cycles


# Now create a big data frame which has a column with the percent gait cycle `phi`. It also contains columns for all of the state values.

# In[8]:


dfs = []
for i, df in gait_cycles.iteritems():
    dfs.append(df)
bigdf = concat(dfs)
bigdf['phi'] = bigdf.index.values


# # Fit a model

# Now specify a patsy formula for the module. We simply want to predict `phi` using all of the states as the multivariate linear regressors.

# In[9]:


sensors, controls = utils.load_sensors_and_controls()
model = 'phi ~ Q("' + '") + Q("'.join(sensors) + '")' #+ ' + Q("FP1.ForY") + Q("FP2.ForY")'
model


# In[10]:


results = smf.ols(model, data=bigdf).fit()


# The results show that we have a decent model with a high $R^2$ value.

# In[11]:


print(results.summary())


# # Load validation data

# Now we load in data from the same subject but at a different speed to test the model.

# In[12]:


trial_number = '069'
trial = utils.Trial('068')
trial.prep_data('Longitudinal Perturbation')
gait_cycles = trial.gait_data_objs['Longitudinal Perturbation'].gait_cycles


# In[13]:


dfs = []
for i, df in gait_cycles.iteritems():
    dfs.append(df)
bigdf = concat(dfs)
bigdf['phi'] = bigdf.index.values


# # Test the model

# The following plot shows the actual percent gait cycle and that predicted by the linear model given the states from the new trial.

# In[14]:


plt.plot(results.predict(bigdf)[:400])
plt.plot(bigdf['phi'].values[:400])
plt.legend(['Prediction', 'Actual'])
plt.ylabel('Percent Gait Cycle')


# # Footer

# In[15]:


get_ipython().system('git rev-parse HEAD')


# In[16]:


get_ipython().system('git --git-dir=/home/moorepants/src/GaitAnalysisToolKit/.git --work-tree=/home/moorepants/src/GaitAnalysisToolKit rev-parse HEAD')


# In[17]:


get_ipython().run_line_magic('install_ext', 'http://raw.github.com/jrjohansson/version_information/master/version_information.py')


# In[18]:


get_ipython().run_line_magic('load_ext', 'version_information')


# In[19]:


get_ipython().run_line_magic('version_information', 'gaitanalysis, dtk, numpy, scipy, pandas, matplotlib, tables, statsmodels, oct2py')


# In[20]:


get_ipython().system('conda list')


# In[21]:


get_ipython().system('pip freeze')