In [1]:

import glob
import pandas as pd
import os
import statsmodels.api as sm

In [2]:

log_dir = (os.path.join(os.path.expanduser('~'), '.scrimmage', 'experiments', 'my_first_parameter_varying'))
files = glob.glob(os.path.join(log_dir, '*_job_*', 'cpa.csv'))

agg = pd.DataFrame()
for file in files:
    run_num = int((os.path.basename(os.path.dirname(file))).split('_')[-1])
    frame = pd.read_csv(file)
    frame['run'] = run_num
    agg = pd.concat([agg, frame], copy=False)
agg = agg.reset_index(drop=True)
agg.head()

Out[2]:

	entity	cpa	closest_entity	time	run
0	1	5.476739	2	52.3	77
1	2	5.476739	1	52.3	77
2	1	1.919017	2	5.4	86
3	2	1.919017	1	5.4	86
4	1	4.505857	2	10.4	9

In [3]:

params_agg = pd.read_csv(os.path.join(log_dir, 'batch_params.csv'), index_col='run')
params_agg.head()

Out[3]:

	MS_gain	max_speed
run
1	1.733505	24.813339
2	0.497387	22.070393
3	0.515396	19.717730
4	0.053652	17.737459
5	1.957513	20.121153

In [4]:

data = agg.join(params_agg, on='run')
data.head()

Out[4]:

	entity	cpa	closest_entity	time	run	MS_gain	max_speed
0	1	5.476739	2	52.3	77	1.805475	20.063098
1	2	5.476739	1	52.3	77	1.805475	20.063098
2	1	1.919017	2	5.4	86	0.336285	24.769219
3	2	1.919017	1	5.4	86	0.336285	24.769219
4	1	4.505857	2	10.4	9	1.370620	16.866877

In [5]:

entity1 = data[::2]
entity1.head()

Out[5]:

	entity	cpa	closest_entity	time	run	MS_gain	max_speed
0	1	5.476739	2	52.3	77	1.805475	20.063098
2	1	1.919017	2	5.4	86	0.336285	24.769219
4	1	4.505857	2	10.4	9	1.370620	16.866877
6	1	4.180133	2	23.9	84	1.346520	22.231913
8	1	2.104974	2	9.2	100	0.669359	17.816883

In [6]:

entity1.to_csv(os.path.join(log_dir, 'entity_1_data.csv'), index_label='index')

In [7]:

# Basic statistical analysis
X = entity1[["MS_gain", 'max_speed']]
y = entity1['cpa']

model = sm.OLS(y, X).fit()
predictions = model.predict(X)

model.summary()

Out[7]:

OLS Regression Results
Dep. Variable:	cpa	R-squared:	0.972
Model:	OLS	Adj. R-squared:	0.972
Method:	Least Squares	F-statistic:	1730.
Date:	Wed, 05 Dec 2018	Prob (F-statistic):	3.65e-77
Time:	14:49:16	Log-Likelihood:	-95.970
No. Observations:	100	AIC:	195.9
Df Residuals:	98	BIC:	201.2
Df Model:	2
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
MS_gain	2.6621	0.109	24.399	0.000	2.446	2.879
max_speed	0.0380	0.006	6.095	0.000	0.026	0.050

Omnibus:	136.337	Durbin-Watson:	2.117
Prob(Omnibus):	0.000	Jarque-Bera (JB):	4337.281
Skew:	4.736	Prob(JB):	0.00
Kurtosis:	33.842	Cond. No.	34.6

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [ ]: