#!/usr/bin/env python # coding: utf-8 # In[1]: import glob import pandas as pd import os import statsmodels.api as sm # In[2]: log_dir = (os.path.join(os.path.expanduser('~'), '.scrimmage', 'experiments', 'my_first_parameter_varying')) files = glob.glob(os.path.join(log_dir, '*_job_*', 'cpa.csv')) agg = pd.DataFrame() for file in files: run_num = int((os.path.basename(os.path.dirname(file))).split('_')[-1]) frame = pd.read_csv(file) frame['run'] = run_num agg = pd.concat([agg, frame], copy=False) agg = agg.reset_index(drop=True) agg.head() # In[3]: params_agg = pd.read_csv(os.path.join(log_dir, 'batch_params.csv'), index_col='run') params_agg.head() # In[4]: data = agg.join(params_agg, on='run') data.head() # In[5]: entity1 = data[::2] entity1.head() # In[6]: entity1.to_csv(os.path.join(log_dir, 'entity_1_data.csv'), index_label='index') # In[7]: # Basic statistical analysis X = entity1[["MS_gain", 'max_speed']] y = entity1['cpa'] model = sm.OLS(y, X).fit() predictions = model.predict(X) model.summary() # In[ ]: