import glob
import pandas as pd
import os
import statsmodels.api as sm
log_dir = (os.path.join(os.path.expanduser('~'), '.scrimmage', 'experiments', 'my_first_parameter_varying'))
files = glob.glob(os.path.join(log_dir, '*_job_*', 'cpa.csv'))
agg = pd.DataFrame()
for file in files:
run_num = int((os.path.basename(os.path.dirname(file))).split('_')[-1])
frame = pd.read_csv(file)
frame['run'] = run_num
agg = pd.concat([agg, frame], copy=False)
agg = agg.reset_index(drop=True)
agg.head()
entity | cpa | closest_entity | time | run | |
---|---|---|---|---|---|
0 | 1 | 5.476739 | 2 | 52.3 | 77 |
1 | 2 | 5.476739 | 1 | 52.3 | 77 |
2 | 1 | 1.919017 | 2 | 5.4 | 86 |
3 | 2 | 1.919017 | 1 | 5.4 | 86 |
4 | 1 | 4.505857 | 2 | 10.4 | 9 |
params_agg = pd.read_csv(os.path.join(log_dir, 'batch_params.csv'), index_col='run')
params_agg.head()
MS_gain | max_speed | |
---|---|---|
run | ||
1 | 1.733505 | 24.813339 |
2 | 0.497387 | 22.070393 |
3 | 0.515396 | 19.717730 |
4 | 0.053652 | 17.737459 |
5 | 1.957513 | 20.121153 |
data = agg.join(params_agg, on='run')
data.head()
entity | cpa | closest_entity | time | run | MS_gain | max_speed | |
---|---|---|---|---|---|---|---|
0 | 1 | 5.476739 | 2 | 52.3 | 77 | 1.805475 | 20.063098 |
1 | 2 | 5.476739 | 1 | 52.3 | 77 | 1.805475 | 20.063098 |
2 | 1 | 1.919017 | 2 | 5.4 | 86 | 0.336285 | 24.769219 |
3 | 2 | 1.919017 | 1 | 5.4 | 86 | 0.336285 | 24.769219 |
4 | 1 | 4.505857 | 2 | 10.4 | 9 | 1.370620 | 16.866877 |
entity1 = data[::2]
entity1.head()
entity | cpa | closest_entity | time | run | MS_gain | max_speed | |
---|---|---|---|---|---|---|---|
0 | 1 | 5.476739 | 2 | 52.3 | 77 | 1.805475 | 20.063098 |
2 | 1 | 1.919017 | 2 | 5.4 | 86 | 0.336285 | 24.769219 |
4 | 1 | 4.505857 | 2 | 10.4 | 9 | 1.370620 | 16.866877 |
6 | 1 | 4.180133 | 2 | 23.9 | 84 | 1.346520 | 22.231913 |
8 | 1 | 2.104974 | 2 | 9.2 | 100 | 0.669359 | 17.816883 |
entity1.to_csv(os.path.join(log_dir, 'entity_1_data.csv'), index_label='index')
# Basic statistical analysis
X = entity1[["MS_gain", 'max_speed']]
y = entity1['cpa']
model = sm.OLS(y, X).fit()
predictions = model.predict(X)
model.summary()
Dep. Variable: | cpa | R-squared: | 0.972 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.972 |
Method: | Least Squares | F-statistic: | 1730. |
Date: | Wed, 05 Dec 2018 | Prob (F-statistic): | 3.65e-77 |
Time: | 14:49:16 | Log-Likelihood: | -95.970 |
No. Observations: | 100 | AIC: | 195.9 |
Df Residuals: | 98 | BIC: | 201.2 |
Df Model: | 2 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
MS_gain | 2.6621 | 0.109 | 24.399 | 0.000 | 2.446 | 2.879 |
max_speed | 0.0380 | 0.006 | 6.095 | 0.000 | 0.026 | 0.050 |
Omnibus: | 136.337 | Durbin-Watson: | 2.117 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 4337.281 |
Skew: | 4.736 | Prob(JB): | 0.00 |
Kurtosis: | 33.842 | Cond. No. | 34.6 |