In [1]:
import glob
import pandas as pd
import os
import statsmodels.api as sm
In [2]:
log_dir = (os.path.join(os.path.expanduser('~'), '.scrimmage', 'experiments', 'my_first_parameter_varying'))
files = glob.glob(os.path.join(log_dir, '*_job_*', 'cpa.csv'))

agg = pd.DataFrame()
for file in files:
    run_num = int((os.path.basename(os.path.dirname(file))).split('_')[-1])
    frame = pd.read_csv(file)
    frame['run'] = run_num
    agg = pd.concat([agg, frame], copy=False)
agg = agg.reset_index(drop=True)
agg.head()
Out[2]:
entity cpa closest_entity time run
0 1 5.476739 2 52.3 77
1 2 5.476739 1 52.3 77
2 1 1.919017 2 5.4 86
3 2 1.919017 1 5.4 86
4 1 4.505857 2 10.4 9
In [3]:
params_agg = pd.read_csv(os.path.join(log_dir, 'batch_params.csv'), index_col='run')
params_agg.head()
Out[3]:
MS_gain max_speed
run
1 1.733505 24.813339
2 0.497387 22.070393
3 0.515396 19.717730
4 0.053652 17.737459
5 1.957513 20.121153
In [4]:
data = agg.join(params_agg, on='run')
data.head()
Out[4]:
entity cpa closest_entity time run MS_gain max_speed
0 1 5.476739 2 52.3 77 1.805475 20.063098
1 2 5.476739 1 52.3 77 1.805475 20.063098
2 1 1.919017 2 5.4 86 0.336285 24.769219
3 2 1.919017 1 5.4 86 0.336285 24.769219
4 1 4.505857 2 10.4 9 1.370620 16.866877
In [5]:
entity1 = data[::2]
entity1.head()
Out[5]:
entity cpa closest_entity time run MS_gain max_speed
0 1 5.476739 2 52.3 77 1.805475 20.063098
2 1 1.919017 2 5.4 86 0.336285 24.769219
4 1 4.505857 2 10.4 9 1.370620 16.866877
6 1 4.180133 2 23.9 84 1.346520 22.231913
8 1 2.104974 2 9.2 100 0.669359 17.816883
In [6]:
entity1.to_csv(os.path.join(log_dir, 'entity_1_data.csv'), index_label='index')
In [7]:
# Basic statistical analysis
X = entity1[["MS_gain", 'max_speed']]
y = entity1['cpa']

model = sm.OLS(y, X).fit()
predictions = model.predict(X)

model.summary()
Out[7]:
OLS Regression Results
Dep. Variable: cpa R-squared: 0.972
Model: OLS Adj. R-squared: 0.972
Method: Least Squares F-statistic: 1730.
Date: Wed, 05 Dec 2018 Prob (F-statistic): 3.65e-77
Time: 14:49:16 Log-Likelihood: -95.970
No. Observations: 100 AIC: 195.9
Df Residuals: 98 BIC: 201.2
Df Model: 2
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
MS_gain 2.6621 0.109 24.399 0.000 2.446 2.879
max_speed 0.0380 0.006 6.095 0.000 0.026 0.050
Omnibus: 136.337 Durbin-Watson: 2.117
Prob(Omnibus): 0.000 Jarque-Bera (JB): 4337.281
Skew: 4.736 Prob(JB): 0.00
Kurtosis: 33.842 Cond. No. 34.6


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [ ]: